In [1]:
import json
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import re
import sys
import itertools
from collections import namedtuple
from pathlib import Path

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

%load_ext autoreload
%autoreload 1
%aimport analyze

with open('plots/style.json') as f:
    mpl.rcParams.update(json.load(f))

In [2]:
summit = analyze.Analysis('results/summit')
print(summit)
some_results = summit.load_results(('testem3-flat','orange','gpu'), 0)

Analysis for Celeritas v0.1.4 on summit


In [3]:
mp_per_gpu = some_results['system']['device']['multiprocessor_count']

In [4]:
inp = some_results['input']
primaries_per_event = inp['primary_gen_options']['primaries_per_event']
def calc_event_rate(summary):
    event_rate = analyze.inverse_summary(summary['avg_time_per_primary'])
    event_rate['mean'] /= primaries_per_event
    event_rate['std'] /= primaries_per_event
    return event_rate

In [5]:
failures = summit.failures()['stderr'].fillna(0)

for key, lines in failures.items():
    print("="*78)
    print(key)
    if lines:
        print("   " + " - \n".join(lines[-3:]))

('cms2018+field+msc', 'vecgeom', 'cpu', 0)
('cms2018+field+msc', 'vecgeom', 'cpu', 2)
('cms2018+field+msc', 'vecgeom', 'cpu', 4)
('cms2018+field+msc', 'vecgeom', 'cpu', 6)
('cms2018+field+msc', 'vecgeom', 'cpu', 8)
('cms2018+field+msc', 'vecgeom', 'cpu', 10)
('simple-cms+field+msc', 'orange', 'cpu', 2)
('simple-cms+field+msc', 'orange', 'cpu', 4)
('simple-cms+field+msc', 'orange', 'cpu', 6)
('simple-cms+field+msc', 'orange', 'cpu', 8)
('simple-cms+field+msc', 'vecgeom', 'cpu', 0)
('simple-cms+field+msc', 'vecgeom', 'cpu', 2)
('simple-cms+field+msc', 'vecgeom', 'cpu', 4)
('simple-cms+field+msc', 'vecgeom', 'cpu', 6)
('simple-cms+field', 'orange', 'cpu', 3)
('testem3-flat+field', 'orange', 'cpu', 0)
('testem3-flat+field', 'orange', 'cpu', 1)
ERROR:  One or more process (first noticed rank 0) terminated with signal 6
('testem3-flat+field', 'orange', 'cpu', 2)
('testem3-flat+field', 'orange', 'cpu', 4)
('testem3-flat+field', 'orange', 'cpu', 5)
ERROR:  One or more process (first noticed ra

In [6]:
failures.groupby(['problem', 'geo', 'arch']).count().unstack().fillna(0)

Unnamed: 0_level_0,arch,cpu,gpu
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
cms2018+field+msc,vecgeom,6.0,0.0
simple-cms+field,orange,1.0,0.0
simple-cms+field+msc,orange,4.0,0.0
simple-cms+field+msc,vecgeom,4.0,0.0
testem3-flat+field,orange,8.0,8.0
testem3-flat+msc,orange,10.0,12.0


In [7]:
summed = analyze.summarize_instances(summit.result[[
    'avg_steps_per_primary',
    'avg_time_per_primary',
    'avg_time_per_step',
    'num_steps',
    'total_time',
    'unconverged',
    'slot_occupancy']])
#summed.xs('mean', axis=1, level=1).to_csv('summit.csv')

In [8]:
a = {}
a.setdefault('foo', 0)

0

In [9]:
summit.result['num_primaries'].xs(('gpu', 0), level=('arch', 'instance'))

problem               geo    
cms2018+field+msc     vecgeom    9100.0
cms2018               vecgeom    9100.0
simple-cms+field+msc  orange     9100.0
                      vecgeom    9100.0
simple-cms+field      orange     9100.0
simple-cms+msc        orange     9100.0
testem15+field+msc    orange     9100.0
                      vecgeom    9100.0
testem15+field        orange     9100.0
testem15              orange     9100.0
testem3-flat+field    orange     9100.0
testem3-flat+msc      orange        NaN
testem3-flat          orange     9100.0
                      vecgeom    9100.0
Name: num_primaries, dtype: float64

In [10]:
summed[('total_time', 'mean')].unstack()

Unnamed: 0_level_0,arch,cpu,gpu
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
cms2018,vecgeom,107.025782,11.821177
cms2018+field+msc,vecgeom,1131.321589,64.939373
simple-cms+field,orange,307.741126,9.713818
simple-cms+field+msc,orange,1029.322475,16.968421
simple-cms+field+msc,vecgeom,1045.083569,7.181184
simple-cms+msc,orange,77.541897,3.570815
testem15,orange,54.209993,2.615673
testem15+field,orange,62.112445,2.299341
testem15+field+msc,orange,77.068047,2.584099
testem15+field+msc,vecgeom,74.976561,2.69679


In [11]:
summed['avg_time_per_primary'].xs('testem3-flat', level='problem')

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std
geo,arch,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
orange,cpu,6.0,0.023879,8.1e-05
orange,gpu,6.0,0.000427,3.7e-05
vecgeom,cpu,6.0,0.022833,0.000157
vecgeom,gpu,6.0,0.000462,3.2e-05


In [12]:
unconv = summed['unconverged']
unconv[unconv['mean'] > 0]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,mean,std
problem,geo,arch,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
cms2018+field+msc,vecgeom,cpu,6.0,1.666667,1.36626
cms2018+field+msc,vecgeom,gpu,6.0,6.0,2.828427
simple-cms+field,orange,cpu,6.0,0.166667,0.408248
simple-cms+field,orange,gpu,6.0,0.666667,1.632993
simple-cms+field+msc,orange,cpu,6.0,0.833333,0.752773
simple-cms+field+msc,orange,gpu,6.0,0.333333,0.516398
simple-cms+field+msc,vecgeom,cpu,6.0,0.833333,0.752773


In [13]:
problems = summit.problems()
problem_to_abbr = summit.problem_to_abbr(problems)
p_to_i = dict(zip(problems, itertools.count()))

In [14]:
speedup = analyze.get_cpugpu_ratio(summed['total_time'])
fig, ax = plt.subplots()
summit.plot_results(ax, speedup)
ax.set_ylabel("Speedup (7-CPU / 1-GPU wall time)")
ax.set_ylim([0, None])
analyze.annotate_metadata(ax, summit);
fig.savefig('plots/speedups.pdf', transparent=True)
plt.close()

In [15]:
fig, axes = plt.subplots(nrows=2, figsize=(4,4), subplot_kw=dict(yscale='log'))
for (ax, q) in zip(axes, ['step', 'primary']):
    summit.plot_results(ax, analyze.inverse_summary(summed['avg_time_per_' + q]))
    ax.set_ylabel(q + ' per sec')
    ax.legend()
fig.savefig('plots/steps-vs-primaries.png', dpi=300)
plt.close()

In [16]:
event_rate = calc_event_rate(summed)

In [17]:
event_rate.xs('testem3-flat', level='problem')

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std
geo,arch,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
orange,cpu,6.0,0.032214,0.000109
orange,gpu,6.0,1.802425,0.154971
vecgeom,cpu,6.0,0.03369,0.000231
vecgeom,gpu,6.0,1.665989,0.113981


In [18]:
(fig, (time_ax, occ_ax)) = plt.subplots(
    nrows=2, figsize=(4, 4),
    gridspec_kw=dict(height_ratios=[3, 1])
)
time_ax.set_yscale('log')
summit.plot_results(time_ax, event_rate)
time_ax.set_ylabel(r"Event rate [1/s]")
time_ax.legend()
time_ax.set_xticklabels([])
summit.plot_results(occ_ax, summed['slot_occupancy'])
occ_ax.set_ylabel("Slot occupancy")
analyze.annotate_metadata(ax, summit)
fig.savefig('plots/rate-occupancy.pdf', transparent=True)
plt.close()

In [19]:
speedup.dropna().applymap("{:.1f}".format)

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
cms2018,vecgeom,9.1,0.3
cms2018+field+msc,vecgeom,17.4,7.6
simple-cms+field,orange,31.7,68.0
simple-cms+field+msc,orange,60.7,58.3
simple-cms+field+msc,vecgeom,145.5,105.2
simple-cms+msc,orange,21.7,1.5
testem15,orange,20.7,0.1
testem15+field,orange,27.0,2.6
testem15+field+msc,orange,29.8,3.8
testem15+field+msc,vecgeom,27.8,3.0


## Action fraction pie charts


In [20]:
mean_action_times = summit.action_times().xs('mean', axis=1, level=1).T
mean_action_times.sort_index(inplace=True)

In [21]:
for ext in ["", "+field+msc"]:
    prob_geo = ('cms2018'+ext, 'vecgeom')
    temp = mean_action_times.xs(prob_geo, axis=1, level=('problem', 'geo')).dropna()

    for (arch, series) in temp.items():
        (fig, ax) = plt.subplots(figsize=(4, 4))
        ax.pie(series, labels=series.index, autopct='%1.1f%%', pctdistance=0.85)
        ax.axis('equal')
        name = prob_geo + (arch,)
        slashname = "/".join(name)
        fig.text(
            0.98, 0.02, f"{slashname}\n{summit.version} on {summit.system}",
            va='bottom', ha='right',
            fontstyle='italic', color=(0.5,)*3, size='xx-small',
            zorder=-100
        )
        dashname = "-".join(name)
        fig.savefig(f'plots/{dashname}.pdf', transparent=True)
        plt.close()

### Plot per-step timing on GPU

In [22]:
cms = [summit.load_results((p, 'vecgeom', 'gpu'), 0)
       for p in ['cms2018', 'cms2018+field+msc']]

for plot, label in [(analyze.plot_counts, 'counts'),
                    (analyze.plot_accum_time, 'time')]:
    (fig, axes) = plt.subplots(ncols=2, figsize=(8, 2))
    
    for (i, ax, data) in zip(itertools.count(), axes, cms):
        objs = plot(ax, data)
        analyze.annotate_metadata(ax, data['_metadata'])
        if i == 0:
            objs['oax'].set_ylabel(None)
        elif i == 1:
            objs['ax'].set_ylabel(None)
    fig.savefig(f'plots/cms-{label}.pdf', transparent=True)
    plt.close()

## Crusher

In [23]:
crusher = analyze.Analysis('results/crusher')
print(crusher)

Analysis for Celeritas v0.1.4+374bda2d on crusher


In [24]:
failures = crusher.failures().xs('orange', level='geo')['stderr'].fillna(0)

for key, lines in failures.items():
    print("="*78)
    print(key)
    if lines:
        print("   " + " - \n".join(lines[-3:]))

('simple-cms+field+msc', 'cpu', 0)
('simple-cms+field+msc', 'cpu', 2)
('simple-cms+field+msc', 'cpu', 4)
('simple-cms+field+msc', 'cpu', 6)
('simple-cms+field+msc', 'cpu', 8)
('simple-cms+field+msc', 'cpu', 10)
('simple-cms+field+msc', 'cpu', 12)
('simple-cms+field+msc', 'cpu', 14)
('simple-cms+field+msc', 'gpu', 0)
('simple-cms+field+msc', 'gpu', 2)
('simple-cms+field+msc', 'gpu', 4)
('simple-cms+field+msc', 'gpu', 6)
('simple-cms+field+msc', 'gpu', 8)
('simple-cms+field+msc', 'gpu', 10)
('simple-cms+field+msc', 'gpu', 12)
('simple-cms+field+msc', 'gpu', 14)
('simple-cms+field', 'cpu', 0)
('simple-cms+field', 'cpu', 2)
('simple-cms+field', 'cpu', 4)
('simple-cms+field', 'cpu', 6)
('simple-cms+field', 'cpu', 8)
('simple-cms+field', 'cpu', 10)
('simple-cms+field', 'cpu', 12)
('simple-cms+field', 'cpu', 14)
('simple-cms+field', 'gpu', 0)
('simple-cms+field', 'gpu', 2)
('simple-cms+field', 'gpu', 3)
:0:rocdevice.cpp            :2614: 556111167842 us: 21116: [tid:0x7fffd4457700] Device::ca

In [25]:
failures.groupby(['problem', 'arch']).count().unstack()

arch,cpu,gpu
problem,Unnamed: 1_level_1,Unnamed: 2_level_1
simple-cms+field,8,11
simple-cms+field+msc,8,8
simple-cms+msc,8,8
testem15,8,8
testem15+field,8,8
testem15+field+msc,8,8
testem3-flat,8,8
testem3-flat+field,11,13
testem3-flat+msc,16,16


In [26]:
csum = analyze.summarize_instances(crusher.result[~crusher.invalid][[
    'avg_steps_per_primary',
    'avg_time_per_primary',
    'avg_time_per_step',
    'num_steps',
    'total_time',
    'unconverged',
    'slot_occupancy',
]])

In [27]:
csum[('total_time', 'mean')].unstack()

Unnamed: 0_level_0,arch,cpu,gpu
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
simple-cms+field,orange,214.099983,224.384868
simple-cms+field+msc,orange,490.961898,819.333431
simple-cms+msc,orange,83.966397,2.308912
testem15,orange,72.154267,1.71926
testem15+field,orange,70.418587,1.846559
testem15+field+msc,orange,90.417174,2.14447
testem3-flat,orange,93.115427,3.320142
testem3-flat+field,orange,103.390999,5.363526


In [28]:
rel_err = csum.xs('std', axis=1, level=1) / csum.xs('mean', axis=1, level=1)
high_err = rel_err > 0.02
rel_err[high_err].dropna(how='all').dropna(how='all', axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,avg_time_per_primary,avg_time_per_step,total_time,unconverged,slot_occupancy
problem,geo,arch,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
simple-cms+field,orange,cpu,1.671232,1.669579,1.671232,2.828427,0.348094
simple-cms+field,orange,gpu,,,,0.037162,
simple-cms+field+msc,orange,cpu,1.199033,1.198704,1.199033,1.380131,0.675084
simple-cms+msc,orange,cpu,0.249874,0.249841,0.249874,,
simple-cms+msc,orange,gpu,,,,,0.033633
testem15,orange,cpu,0.278915,0.278906,0.278915,,
testem15,orange,gpu,,,,,0.020387
testem15+field,orange,cpu,0.168191,0.168273,0.168191,,
testem15+field,orange,gpu,,,,,0.026928
testem15+field+msc,orange,cpu,0.128287,0.128307,0.128287,,


In [29]:
analyze.get_cpugpu_ratio(csum['total_time'])

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
simple-cms+field,orange,0.954164,1.594646
simple-cms+field+msc,orange,0.599221,0.718492
simple-cms+msc,orange,36.36621,9.100757
testem15,orange,41.96822,11.71031
testem15+field,orange,38.135027,6.422899
testem15+field+msc,orange,42.162954,5.414966
testem3-flat,orange,28.045614,7.392742
testem3-flat+field,orange,19.276684,7.553659


In [30]:
crusher_times = csum['total_time']
crusher_times

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,mean,std
problem,geo,arch,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
simple-cms+field,orange,cpu,8.0,214.099983,357.810659
simple-cms+field,orange,gpu,5.0,224.384868,1.721269
simple-cms+field+msc,orange,cpu,8.0,490.961898,588.679425
simple-cms+field+msc,orange,gpu,8.0,819.333431,3.995812
simple-cms+msc,orange,cpu,8.0,83.966397,20.981037
simple-cms+msc,orange,gpu,8.0,2.308912,0.031784
testem15,orange,cpu,8.0,72.154267,20.124918
testem15,orange,gpu,8.0,1.71926,0.013645
testem15+field,orange,cpu,8.0,70.418587,11.843777
testem15+field,orange,gpu,8.0,1.846559,0.016393


In [31]:
crusher_rates = calc_event_rate(csum)
summit_rates = calc_event_rate(summed.loc[crusher_times.index])

counts = {
    ('summit', 'cpu'): 7,
    ('summit', 'gpu'): 1,
    ('crusher', 'cpu'): 8,
    ('crusher', 'gpu'): 1,
}

In [32]:
(crusher_rates['mean'] / summit_rates['mean']).unstack()

Unnamed: 0_level_0,arch,cpu,gpu
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
simple-cms+field,orange,1.437371,0.043291
simple-cms+field+msc,orange,2.096542,0.02071
simple-cms+msc,orange,0.923487,1.546536
testem15,orange,0.751307,1.521395
testem15+field,orange,0.882046,1.245203
testem15+field+msc,orange,0.852361,1.205006
testem3-flat,orange,1.050401,1.169726
testem3-flat+field,orange,1.377536,0.743494


In [33]:
fig, ax = plt.subplots()
ax.set_yscale('log')
for offset, color, machine, rates in [(-0.05, '#7A954F', 'Summit', summit_rates),
                                      (0.05, '#BC5544', 'Crusher', crusher_rates)]:
    for arch in ['cpu', 'gpu']:
        summary = rates.xs(arch, level='arch')
        index = np.array([p_to_i[p]
                          for p in summary.index.get_level_values('problem')], dtype=float)
        index += offset
    
        mark = analyze.ARCH_SHAPES[arch]
        count = counts[(machine.lower(), arch)]
        arch = arch.upper()
        ax.errorbar(index, summary['mean'], summary['std'],
                    capsize=0, fmt='none', ecolor=(0.2,)*3)
        scat = ax.scatter(index, summary['mean'], c=color, marker=mark,
                         label=f"{machine} ({count} {arch})")    
xax = ax.get_xaxis()
xax.set_ticks(np.arange(len(problems)))
xax.set_ticklabels(list(problem_to_abbr.values()), rotation=90)
grid = ax.grid()
ax.set_axisbelow(True)
ax.legend()
ax.set_ylabel(r"Event rate [1/s]")
analyze.annotate_metadata(ax, summit)
fig.savefig('plots/crusher-vs-summit.pdf')
plt.close()