In [1]:
import json
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import re
import sys
import itertools
from collections import namedtuple
from pathlib import Path

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

%load_ext autoreload
%autoreload 1
%aimport analyze

with open('plots/style.json') as f:
    mpl.rcParams.update(json.load(f))

## Wildstyle failures

In [2]:
ws = analyze.Analysis('results/wildstyle')
print(ws)

Analysis for Celeritas v0.3.0-dev-5+0dcab606 on wildstyle


In [3]:
ftab = analyze.make_failure_table(ws.failures())
ftab.to_frame()

Unnamed: 0,Failure
cms2018+field+msc/vecgeom+cpu (0),internal assertion failed: `mfp > 0` at `Along...
cms2018+field+msc/vecgeom+cpu (1),internal assertion failed: `step_limit->action...
cms2018+field+msc/vecgeom+gpu (0),`celeritas: internal assertion failed: succeeded`
cms2018+field+msc/vecgeom+gpu (1),`celeritas: internal assertion failed: succeeded`
cms2018/vecgeom+cpu (0),internal assertion failed: `speed > 0` at `Alo...
cms2018/vecgeom+cpu (1),internal assertion failed: `speed > 0` at `Alo...
cms2018/vecgeom+gpu (0),`celeritas: internal assertion failed: speed > 0`
cms2018/vecgeom+gpu (1),`celeritas: internal assertion failed: speed > 0`
simple-cms+field+msc/orange+cpu (0),internal assertion failed: `p.distance < local...
simple-cms+field+msc/orange+cpu (1),internal assertion failed: `init.volume` at `O...


In [4]:
print("\n".join(k for (k, v) in ftab.items()
                if 'is_soft' in v))

simple-cms+field+msc/orange+gpu (0)
simple-cms+field+msc/orange+gpu (1)
simple-cms+field/orange+gpu (0)
simple-cms+field/orange+gpu (1)
simple-cms+msc/orange+gpu (0)
simple-cms+msc/orange+gpu (1)
testem15+field+msc/orange+gpu (0)
testem15+field+msc/orange+gpu (1)
testem15+field/orange+gpu (0)
testem15+field/orange+gpu (1)
testem15/orange+gpu (0)
testem15/orange+gpu (1)
testem3-flat+field/orange+gpu (0)
testem3-flat+field/orange+gpu (1)
testem3-flat+msc/orange+gpu (0)
testem3-flat+msc/orange+gpu (1)
testem3-flat/orange+gpu (0)
testem3-flat/orange+gpu (1)


In [5]:
summed = analyze.summarize_instances(ws.result[['unconverged']])

Unconverged tracks (from runs that didn't fail):

In [6]:
unconv = summed[('unconverged', 'mean')]
unconv[unconv > 0].unstack('arch') 

Unnamed: 0_level_0,arch,gpu
problem,geo,Unnamed: 2_level_1
simple-cms+field+msc,vecgeom,1.0


## Summit results

In [7]:
summit = analyze.Analysis('results/summit')
print(summit)
summed = analyze.summarize_instances(summit.result[summit.successful].dropna(how='all'))

Analysis for Celeritas v0.3.0-dev-18+865fc2ff on summit


In [8]:
deets = summit.load_results(('testem3-flat','orange','gpu'), 0)
deets['system']['device']

{'capability_major': 7,
 'capability_minor': 0,
 'clock_rate': 1530000,
 'default_block_size': 256,
 'device_id': 0,
 'eu_per_cu': 1,
 'max_blocks_per_grid': 2147483647,
 'max_blocks_per_multiprocessor': 32,
 'max_cache_size': 6291456,
 'max_threads_per_block': 1024,
 'max_threads_per_cu': 2048,
 'memory_clock_rate': 877000,
 'multiprocessor_count': 80,
 'name': 'Tesla V100-SXM2-16GB',
 'platform': 'cuda',
 'regs_per_block': 65536,
 'regs_per_multiprocessor': 65536,
 'shared_mem_per_block': 49152,
 'threads_per_warp': 32,
 'total_const_mem': 65536,
 'total_global_mem': 16911433728}

### Failures

Average number of unconverged tracks:

In [9]:
unconv = analyze.summarize_instances(summit.result['unconverged'])['mean']
unconv[unconv > 0].unstack('arch') 

Unnamed: 0_level_0,arch,cpu,gpu
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
cms2018+field+msc,vecgeom,1.166667,11.666667
simple-cms+field,orange,,0.166667
simple-cms+field+msc,orange,0.5,0.166667
simple-cms+field+msc,vecgeom,0.5,0.333333


### Timing tables

In [10]:
(fig, [run_ax, setup_ax]) = plt.subplots(nrows=2,
                                         gridspec_kw=dict(height_ratios=[3, 1]),
                                         subplot_kw=dict(yscale='log'))
summit.plot_results(run_ax, summed['total_time'])
run_ax.legend();
run_ax.set_ylabel('Run [s]')
run_ax.tick_params(labelbottom=False)
summit.plot_results(setup_ax, summed['setup_time'])
setup_ax.set_ylabel('Setup [s]')
analyze.annotate_metadata(run_ax, summit)
fig.savefig('plots/timing.pdf', transparent=True)
plt.close()

In [11]:
def float_fmt_transform(digits):
    format = "{{:.{}f}}".format(digits).format
    def transform(val):
        if np.isnan(val):
            return "---"
        return format(val)
    return transform

times = summed[('total_time', 'mean')].unstack()
times.style.format(float_fmt_transform(2))

Unnamed: 0_level_0,arch,cpu,gpu
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
cms2018,vecgeom,125.88,14.28
cms2018+field+msc,vecgeom,373.05,---
simple-cms+field,orange,87.38,6.34
simple-cms+field+msc,orange,107.29,8.42
simple-cms+field+msc,vecgeom,108.48,9.45
simple-cms+msc,orange,84.05,3.86
testem15,orange,59.76,2.64
testem15+field,orange,76.45,2.65
testem15+field+msc,orange,93.21,3.02
testem15+field+msc,vecgeom,91.74,3.05


In [12]:
event_rate = analyze.calc_event_rate(summit, summed)
event_rate['mean'].xs('simple-cms+field+msc', level='problem').unstack('arch')

arch,cpu,gpu
geo,Unnamed: 1_level_1,Unnamed: 2_level_1
orange,0.065242,0.830912
vecgeom,0.06453,0.740715


In [13]:
speedup = analyze.get_cpugpu_ratio(summed['total_time'])
speedup.dropna().style.format(float_fmt_transform(1))

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
cms2018,vecgeom,8.8,0.1
simple-cms+field,orange,13.8,1.3
simple-cms+field+msc,orange,12.7,1.0
simple-cms+field+msc,vecgeom,11.5,2.9
simple-cms+msc,orange,21.8,0.2
testem15,orange,22.6,0.2
testem15+field,orange,28.8,3.6
testem15+field+msc,orange,30.8,3.2
testem15+field+msc,vecgeom,30.1,3.0
testem3-flat,orange,23.9,0.2


### Plots

In [14]:
problems = summit.problems()
problem_to_abbr = summit.problem_to_abbr(problems)
p_to_i = dict(zip(problems, itertools.count()))

In [15]:
fig, ax = plt.subplots()
summit.plot_results(ax, speedup)
ax.set_ylabel("Speedup (7-CPU / 1-GPU wall time)")
ax.set_ylim([0, None])
analyze.annotate_metadata(ax, summit);
fig.savefig('plots/speedups.pdf', transparent=True)
plt.close()

In [16]:
fig, axes = plt.subplots(nrows=2, figsize=(4,4), subplot_kw=dict(yscale='log'))
for (ax, q) in zip(axes, ['step', 'primary']):
    summit.plot_results(ax, analyze.inverse_summary(summed['avg_time_per_' + q]))
    ax.set_ylabel(q + ' per sec')
    if ax != axes[-1]:
        ax.tick_params(labelbottom=False)
    ax.legend()
fig.savefig('plots/steps-vs-primaries.png', dpi=300)
plt.close()

In [17]:
(fig, (time_ax, occ_ax)) = plt.subplots(
    nrows=2, figsize=(4, 4),
    gridspec_kw=dict(height_ratios=[3, 1])
)
time_ax.set_yscale('log')
summit.plot_results(time_ax, event_rate)
time_ax.set_ylabel(r"Event rate [1/s]")
time_ax.set_ylim([0.5 * event_rate['mean'].min(), None])
time_ax.legend()
time_ax.tick_params(labelbottom=False)
summit.plot_results(occ_ax, summed['slot_occupancy'])
occ_ax.set_ylabel("Slot occupancy")
analyze.annotate_metadata(ax, summit)
fig.savefig('plots/rate-occupancy.pdf', transparent=True)
plt.close()

## Action fraction pie charts


In [18]:
avg_time = summed[('total_time', 'mean')].T
mean_action_times = summit.action_times().xs('mean', axis=1, level=1).T

In [19]:
mean_action_times.divide(avg_time, axis=1).dropna(how='all', axis=1).style.format(float_fmt_transform(2))

problem,cms2018,cms2018,cms2018+field+msc,simple-cms+field,simple-cms+field,simple-cms+field+msc,simple-cms+field+msc,simple-cms+field+msc,simple-cms+field+msc,simple-cms+msc,simple-cms+msc,testem15,testem15+field,testem15+field+msc,testem15+field+msc,testem3-flat,testem3-flat,testem3-flat+field,testem3-flat+msc
geo,vecgeom,vecgeom,vecgeom,orange,orange,orange,orange,vecgeom,vecgeom,orange,orange,orange,orange,orange,vecgeom,orange,vecgeom,orange,orange
arch,cpu,gpu,cpu,cpu,gpu,cpu,gpu,cpu,gpu,cpu,gpu,cpu,cpu,cpu,cpu,cpu,cpu,cpu,cpu
action_times,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3
along-step-uniform-msc,---,---,0.69,0.41,1.13,0.52,1.10,0.52,1.42,---,---,---,0.36,0.47,0.46,---,---,0.52,---
annihil-2-gamma,0.00,0.01,0.00,0.00,0.01,0.00,0.01,0.00,0.01,0.00,0.01,0.01,0.00,0.00,0.00,0.01,0.01,0.00,0.00
brems-rel,0.00,0.01,0.00,0.00,0.01,0.00,0.01,0.00,0.01,0.00,0.01,0.01,0.00,0.00,0.00,0.01,0.01,0.00,0.00
brems-sb,0.04,0.02,0.01,0.04,0.02,0.03,0.02,0.03,0.02,0.04,0.03,0.07,0.05,0.04,0.05,0.05,0.05,0.03,0.03
conv-bethe-heitler,0.01,0.01,0.00,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01
geo-boundary,0.12,0.18,0.07,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.00,0.00,0.06,0.10,0.04,0.03
ioni-moller-bhabha,0.00,0.01,0.00,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.00,0.00,0.00,0.01,0.01,0.00,0.00
photoel-livermore,0.03,0.02,0.01,0.02,0.02,0.02,0.01,0.02,0.02,0.02,0.03,0.04,0.03,0.02,0.02,0.03,0.03,0.02,0.02
physics-discrete-select,0.04,0.02,0.02,0.04,0.03,0.03,0.02,0.03,0.02,0.04,0.04,0.08,0.06,0.05,0.05,0.04,0.04,0.02,0.02
pre-step,0.26,0.12,0.13,0.27,0.16,0.22,0.12,0.22,0.11,0.28,0.23,0.35,0.28,0.23,0.23,0.40,0.43,0.24,0.28


In [20]:
mean_action_times.columns.get_level_values('problem')

Index(['cms2018', 'cms2018', 'cms2018+field+msc', 'cms2018+field+msc',
       'simple-cms+field', 'simple-cms+field', 'simple-cms+field+msc',
       'simple-cms+field+msc', 'simple-cms+field+msc', 'simple-cms+field+msc',
       'simple-cms+msc', 'simple-cms+msc', 'testem15', 'testem15',
       'testem15+field', 'testem15+field', 'testem15+field+msc',
       'testem15+field+msc', 'testem15+field+msc', 'testem15+field+msc',
       'testem3-flat', 'testem3-flat', 'testem3-flat', 'testem3-flat',
       'testem3-flat+field', 'testem3-flat+field', 'testem3-flat+msc',
       'testem3-flat+msc'],
      dtype='object', name='problem')

In [21]:
for prob in ["cms2018", "cms2018+field+msc"]:
    geo = 'vecgeom'
    if prob not in mean_action_times.columns.get_level_values('problem'):
        print("Missing problem:", prob)
        continue
    temp = mean_action_times.xs((prob, geo), axis=1, level=('problem', 'geo')).dropna()
    for (arch, series) in temp.items():
        (fig, ax) = plt.subplots(figsize=(4, 4))
        ax.pie(series, labels=series.index, autopct='%1.1f%%', pctdistance=0.85)
        ax.axis('equal')
        name = (prob, geo, arch)
        slashname = "/".join(name)
        fig.text(
            0.98, 0.02, f"{slashname}\n{summit.version} on {summit.system}",
            va='bottom', ha='right',
            fontstyle='italic', color=(0.5,)*3, size='xx-small',
            zorder=-100
        )
        dashname = "-".join(name)
        fig.savefig(f'plots/actions-{dashname}.pdf', transparent=True)
        plt.close()

### Plot per-step timing on GPU

In [22]:
cms = [summit.load_results((p, 'vecgeom', 'gpu'), 0)
       for p in ['cms2018', 'cms2018+field+msc']]

for plot, label in [(analyze.plot_counts, 'counts'),
                    (analyze.plot_accum_time, 'time')]:
    (fig, axes) = plt.subplots(ncols=2, figsize=(8, 2))
    
    for (i, ax, data) in zip(itertools.count(), axes, cms):
        objs = plot(ax, data)
        analyze.annotate_metadata(ax, data['_metadata'])
        if i == 0:
            objs['oax'].set_ylabel(None)
        elif i == 1:
            objs['ax'].set_ylabel(None)
    fig.savefig(f'plots/cms-{label}.pdf', transparent=True)
    plt.close()

## Crusher

In [23]:
crusher = analyze.Analysis('results/crusher')
print(crusher)

Analysis for Celeritas v0.3.0-dev-4+1954a8a2 on crusher


In [24]:
# VecGeom failures aren't really failures; just missing capability
failures = crusher.failures().xs('orange', level='geo').fillna(1)
failures.groupby(['problem', 'arch']).count().unstack()

failure,stderr,stderr,stdout,stdout
arch,cpu,gpu,cpu,gpu
problem,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
simple-cms+field,2.0,,2.0,
simple-cms+field+msc,5.0,,5.0,
testem3-flat+field,5.0,8.0,5.0,8.0
testem3-flat+msc,7.0,8.0,7.0,8.0


In [25]:
csum = analyze.summarize_instances(crusher.result[crusher.successful].dropna(how='all'))

In [26]:
csum[('total_time', 'mean')].unstack()

Unnamed: 0_level_0,arch,cpu,gpu
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
simple-cms+field,orange,87.244826,8.200846
simple-cms+field+msc,orange,76.413252,12.333417
simple-cms+msc,orange,103.52194,2.655218
testem15,orange,88.516368,2.091297
testem15+field,orange,106.883987,2.376601
testem15+field+msc,orange,84.427658,2.712454
testem3-flat,orange,116.987989,3.902967
testem3-flat+field,orange,123.670175,
testem3-flat+msc,orange,128.854735,


In [27]:
rel_err = csum.xs('std', axis=1, level=1) / csum.xs('mean', axis=1, level=1)
high_err = rel_err > 0.02
rel_err[high_err].dropna(how='all').dropna(how='all', axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,avg_time_per_primary,avg_time_per_step,num_step_iters,pre_emptying_time,setup_time,slot_occupancy,total_time
problem,geo,arch,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
simple-cms+field,orange,cpu,0.492626,0.492413,,0.527449,0.033776,,0.492626
simple-cms+field,orange,gpu,0.071036,0.070929,,0.046211,0.39417,,0.071036
simple-cms+field+msc,orange,cpu,0.304102,0.303822,,0.051567,0.179283,,0.304102
simple-cms+field+msc,orange,gpu,0.09618,0.095665,0.14565,0.078943,,0.125854,0.09618
simple-cms+msc,orange,cpu,0.471922,0.472096,,0.757852,0.571732,,0.471922
simple-cms+msc,orange,gpu,,,0.034525,,0.035021,0.033759,
testem15,orange,cpu,0.502677,0.50245,,0.571536,0.565115,,0.502677
testem15,orange,gpu,,,,,0.038337,,
testem15+field,orange,cpu,0.487951,0.487929,,0.640903,,,0.487951
testem15+field,orange,gpu,,,,,0.029455,,


In [28]:
analyze.get_cpugpu_ratio(csum['total_time'])

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
simple-cms+field,orange,10.638515,5.295017
simple-cms+field+msc,orange,6.195627,1.976092
simple-cms+msc,orange,38.988115,18.403161
testem15,orange,42.326054,21.277025
testem15+field,orange,44.97347,21.945325
testem15+field+msc,orange,31.125929,6.678695
testem3-flat,orange,29.974117,8.959984
testem3-flat+field,orange,,
testem3-flat+msc,orange,,


In [29]:
crusher_times = csum['total_time']
crusher_times

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,mean,std
problem,geo,arch,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
simple-cms+field,orange,cpu,6.0,87.244826,42.979077
simple-cms+field,orange,gpu,6.0,8.200846,0.582559
simple-cms+field+msc,orange,cpu,3.0,76.413252,23.237435
simple-cms+field+msc,orange,gpu,5.0,12.333417,1.186228
simple-cms+msc,orange,cpu,8.0,103.52194,48.854251
simple-cms+msc,orange,gpu,8.0,2.655218,0.025546
testem15,orange,cpu,8.0,88.516368,44.495182
testem15,orange,gpu,8.0,2.091297,0.00836
testem15+field,orange,cpu,8.0,106.883987,52.154121
testem15+field,orange,gpu,8.0,2.376601,0.007726


In [30]:
crusher_rates = analyze.calc_event_rate(crusher, csum)
summit_rates = analyze.calc_event_rate(summit, summed.loc[crusher_times.index])

counts = {
    ('summit', 'cpu'): 7,
    ('summit', 'gpu'): 1,
    ('crusher', 'cpu'): 8,
    ('crusher', 'gpu'): 1,
}

In [31]:
(crusher_rates['mean'] / summit_rates['mean']).unstack()

Unnamed: 0_level_0,arch,cpu,gpu
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
simple-cms+field,orange,1.00158,0.77256
simple-cms+field+msc,orange,1.40411,0.683061
simple-cms+msc,orange,0.811916,1.45518
testem15,orange,0.675135,1.263542
testem15+field,orange,0.715235,1.116394
testem15+field+msc,orange,1.104037,1.114626
testem3-flat,orange,0.88102,1.104065
testem3-flat+field,orange,1.403397,
testem3-flat+msc,orange,1.38752,


In [32]:
fig, ax = plt.subplots()
ax.set_yscale('log')
for offset, color, machine, rates in [(-0.05, '#7A954F', 'Summit', summit_rates),
                                      (0.05, '#BC5544', 'Crusher', crusher_rates)]:
    for arch in ['cpu', 'gpu']:
        summary = rates.xs(arch, level='arch')
        index = np.array([p_to_i[p]
                          for p in summary.index.get_level_values('problem')], dtype=float)
        index += offset
    
        mark = analyze.ARCH_SHAPES[arch]
        count = counts[(machine.lower(), arch)]
        arch = arch.upper()
        ax.errorbar(index, summary['mean'], summary['std'],
                    capsize=0, fmt='none', ecolor=(0.2,)*3)
        scat = ax.scatter(index, summary['mean'], c=color, marker=mark,
                         label=f"{machine} ({count} {arch})")    
xax = ax.get_xaxis()
xax.set_ticks(np.arange(len(problems)))
xax.set_ticklabels(list(problem_to_abbr.values()), rotation=90)
grid = ax.grid()
ax.set_axisbelow(True)
ax.legend()
ax.set_ylabel(r"Event rate [1/s]")
analyze.annotate_metadata(ax, summit)
fig.savefig('plots/crusher-vs-summit.pdf')
plt.close()