In [1]:
from bokeh.plotting import figure, show, output_notebook
from bokeh.layouts import Row, Column, gridplot
output_notebook()

In [2]:
def comparison_plot(part, axis_type='linear'):
    row = part.iloc[0]
    title = (row['collection'] + ': ' + row['name']).title().replace('0M', '0m').replace('1S', '1s')

    fig = figure(title=title, sizing_mode='scale_width', x_axis_type=axis_type, y_axis_type=axis_type)
    fig.line(x=part.threads, y=part.rate)
    fig.circle(x=part.threads, y=part.rate)
    fig.xaxis.axis_label = 'threads'
    fig.yaxis.axis_label = row['unit']
    fig.x_range.start = 0
    fig.y_range.start = 0

    # Add in perfect scaling line
#     y_end = fig.y_range.end
#     mn = part.n.min()
#     mx = part.n.max()
#     slope = part[part.n == mn].iloc[0]['rate'] / mn
#     fig.line(x=[0, mx], y=[0, slope * mx], color='gray', line_dash='dashed')
#     fig.y_range.end = part.rate.max()
    
    fig.xaxis.ticker = part.threads

    return fig

In [3]:
def scaling_plot(part, axis_type='linear'):
    row = part.iloc[0]
    title = (row['collection'] + ': ' + row['name']).title().replace('0M', '0m').replace('1S', '1s')

    fig = figure(title=title, sizing_mode='scale_width', x_axis_type=axis_type, y_axis_type=axis_type)
    fig.line(x=part.n, y=part.rate)
    fig.circle(x=part.n, y=part.rate)
    fig.xaxis.axis_label = 'cores'
    fig.yaxis.axis_label = row['unit']
    fig.x_range.start = 0
    fig.y_range.start = 0

    # Add in perfect scaling line
    y_end = fig.y_range.end
    mn = part.n.min()
    mx = part.n.max()
    slope = part[part.n == mn].iloc[0]['rate'] / mn
    fig.line(x=[0, mx], y=[0, slope * mx], color='gray', line_dash='dashed')
    fig.y_range.end = part.rate.max()
    
    fig.xaxis.ticker = part.n

    return fig

In [4]:
df = pd.read_csv('scaling-data-cores.csv')
df.head()

Unnamed: 0,collection,name,n,workers,threads,unit,duration,rate
0,arrays,blockwise 100ms tasks,4,1,4,MB/s,0.253265,505.399674
1,arrays,blockwise 100ms tasks,8,2,4,MB/s,0.371806,688.322911
2,arrays,blockwise 100ms tasks,16,4,4,MB/s,0.30956,1653.958862
3,arrays,blockwise 100ms tasks,32,8,4,MB/s,0.476441,2149.000511
4,arrays,blockwise 100ms tasks,64,16,4,MB/s,0.634127,3229.637466


In [5]:
df2 = df.groupby(['collection', 'name'])['collection', 'name', 'n', 'rate', 'unit', 'threads'].apply(scaling_plot)
df2.head()

collection  name                        
arrays      blockwise 100ms tasks           Figure(id='1002', ...)
            create random                   Figure(id='1053', ...)
            elementwise computation         Figure(id='1104', ...)
            nearest neighbor 100ms tasks    Figure(id='1155', ...)
            nearest neighbor fast tasks     Figure(id='1206', ...)
dtype: object

In [6]:
names = ['task map 1s tasks', 'task map 100ms tasks', 'task map fast tasks',
         'tree reduction 100ms tasks', 'tree reduction fast tasks', 'sequential',
         'nearest neighbor 100ms tasks', 'nearest neighbor fast tasks', 
         'dynamic tree reduction 100ms tasks', 'dynamic tree reduction fast tasks']
from toolz import partition_all
L = df2.loc['tasks'].loc[names].values.tolist()
grid = list(partition_all(3, L))
# show(Column(*[Row(*g, sizing_mode='scale_width') for g in grid], sizing_mode='scale_width'))
show(gridplot(grid, sizing_mode='scale_width'))

In [7]:
names = ['create random', 'blockwise 100ms tasks', 'elementwise computation', 'reduction', 
         'reduction along axis', 'random access', 'transpose addition', 'rechunk large', 
         'nearest neighbor fast tasks', 'nearest neighbor 100ms tasks']
from toolz import partition_all
L = df2.loc['arrays'].loc[names].values.tolist()
grid = list(partition_all(3, L))
# show(Column(*[Row(*g, sizing_mode='scale_width') for g in grid], sizing_mode='scale_width'))
show(gridplot(grid, sizing_mode='scale_width'))

In [8]:
names = ['task map 1s tasks', 'task map 100ms tasks', 'task map fast tasks',
         'tree reduction 100ms tasks', 'tree reduction fast tasks', 'sequential',
         'nearest neighbor 100ms tasks', 'nearest neighbor fast tasks', 
         'dynamic tree reduction 100ms tasks', 'dynamic tree reduction fast tasks']
L = df2.loc['dataframes'].values.tolist()
grid = list(partition_all(3, L))
# show(Column(*[Row(*g, sizing_mode='scale_width') for g in grid], sizing_mode='scale_width'))
show(gridplot(grid, sizing_mode='scale_width'))