In [1]:
!pip3 install --user numpy pandas scipy matplotlib
import pandas as pd
import numpy as np
import scipy as sp
import matplotlib.pyplot as plt
from matplotlib import colors as mcolors
# import plotly.plotly as py
# import plotly.figure_factory as ff
# from plotly.graph_objs import *



In [10]:
import plotly
import plotly.graph_objs as go
plotly.offline.init_notebook_mode(connected=True)
plotly.__version__

'3.1.0'

In [118]:
import math
import os
import time

Use `plotly.offline.iplot()` when working offline in a Jupyter Notebook to display the plot in the notebook.

In [2]:
data = pd.read_csv('diagrams/Q1-GroupBy-Tool-Time-01-Load.csv')

In [101]:
phase_names = dict(Load='01-Load', Initial='02-Initial', Update='03-Update')

In [160]:
def draw_scatter(df, query='Q1', phase='Load', download_file=True, minY=0, maxY=3, **kwargs):
    # craft a yaxis (execution time)
    tval = [minor * 10 ** major for major in range(minY, maxY + 1) for minor in range(1, 10)]
    ttxt = [v if i % 9 == 0 else "" for i,v in enumerate(tval)]
    yaxis=dict(type='log', range=[minY, maxY], tickvals=tval, ticktext=ttxt,
               title='Time (ms)', exponentformat='power')
    
    # craft a xaxis (changeset)
    minX = math.log10(df.ChangeSet.min()) - 0.1
    maxX = math.log10(df.ChangeSet.max()) + 0.1
    xaxis = dict(type='log', title='ChangeSet', tickvals=[2**i for i in range(6)], range=[minX, maxX])

    # create multiple scatters, filter df by Tool for each tool
    traces = []
    if 'mean' not in df.columns:
        mean_col = df.groupby(['Tool', 'ChangeSet'])['value'].mean() # don't reset the index!
        df = df.set_index(['Tool', 'ChangeSet']) # make the same index here
        df['mean'] = mean_col
        df = df.reset_index() # to take the hierarchical index off again
    for toolname in df['Tool'].unique():
        sub = df[df['Tool'] == toolname]
        trace = go.Scatter(x = sub['ChangeSet'], y = sub['mean'],
                           name=toolname.replace('jastadd-ttc18-', ''))
        traces.append(trace)
    layout = go.Layout(title='LiveContest Query {} {}'.format(query[1:], phase),
                       #plot_bgcolor='rgb(230, 230,230)',
                       yaxis=yaxis,
                       xaxis=xaxis,
                       showlegend=True)
    fig = go.Figure(data=traces, layout=layout)

    if download_file:
        extension = 'png'
        source_filename = 'temp-plot'
        dload = os.path.expanduser('~/Downloads')
        target_path = os.path.join('diagrams', 'plot-{}-{}.png'.format(query, phase_names[phase]))
        source_path = os.path.join(dload, source_filename + '.' + extension)
        # remove the file beforehand to be able to check for its (new) existence afterwards
        print('Source:', source_path, ', Target: ', target_path)
        if os.path.exists(source_path):
            os.remove(source_path)
        plotly.offline.iplot(fig, filename=source_filename, image=extension)
        while not os.path.exists(source_path):
            time.sleep(1)
        os.rename(source_path, target_path)
    else:
        plotly.offline.iplot(fig)

In [None]:
draw_scatter(data)

In [123]:
output = pd.read_csv('output/output.csv', sep=';')
output = output.query('MetricName == "Time"')
output = output.apply(pd.to_numeric, errors='ignore')
output.loc[:,'value'] = (output.MetricValue / 10**6)
output.head()

Unnamed: 0,Tool,View,ChangeSet,RunIndex,Iteration,PhaseName,MetricName,MetricValue,value
0,jastadd-ttc18-xml-inc,Q1,1,0,0,Initialization,Time,124618,0.124618
2,jastadd-ttc18-xml-inc,Q1,1,0,0,Load,Time,262347766,262.347766
4,jastadd-ttc18-xml-inc,Q1,1,0,0,Initial,Time,26318967,26.318967
7,jastadd-ttc18-xml-inc,Q1,1,0,1,Update,Time,7426371,7.426371
10,jastadd-ttc18-xml-inc,Q1,1,0,2,Update,Time,6635210,6.63521


In [126]:
# compute min and max values, rounded to nearest log10-value
minY = math.floor(math.log10(output.value.min()))
maxY = math.ceil(math.log10(output.value.max()))
print(minY, maxY)

-3 4


In [161]:
#for query in ('Q1', 'Q2'):
#    for phase in ('Load', 'Initial', 'Update'):

for query in ('Q1',):
    for phase in ('Update',):
        new_data = output.query('View == @query & PhaseName == @phase')
        draw_scatter(new_data, query, phase, False, minY=minY, maxY=maxY)
#new_data = output.query('View == "Q1" & PhaseName == "Load"')
#new_data = new_data.apply(pd.to_numeric, errors='ignore')
#new_data.loc[:,'mean'] = new_data.MetricValue / 10**6
#draw_scatter(new_data)