In [1]:
import json
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import re
import sys
import itertools
from collections import namedtuple
from pathlib import Path

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

%load_ext autoreload
%autoreload 1
%aimport analyze

with open('plots/style.json') as f:
    mpl.rcParams.update(json.load(f))

## Wildstyle failures

In [2]:
ws = analyze.Analysis('results/wildstyle')
print(ws)

Analysis for Celeritas v0.2.0 on wildstyle


In [3]:
ftab = analyze.make_failure_table(ws.failures())
ftab.to_frame()

Unnamed: 0,Failure
cms2018+field+msc/vecgeom+cpu (0),internal assertion failed: `mfp > 0` at `Along...
cms2018+field+msc/vecgeom+cpu (1),internal assertion failed: `mfp > 0` at `Along...
cms2018+field+msc/vecgeom+gpu (0),`celeritas: internal assertion failed: succeeded`
cms2018+field+msc/vecgeom+gpu (1),`celeritas: internal assertion failed: succeeded`
cms2018/vecgeom+cpu (0),internal assertion failed: `speed > 0` at `Alo...
cms2018/vecgeom+cpu (1),internal assertion failed: `speed > 0` at `Alo...
cms2018/vecgeom+gpu (0),`celeritas: internal assertion failed: speed > 0`
cms2018/vecgeom+gpu (1),`celeritas: internal assertion failed: speed > 0`
simple-cms+field+msc/orange+gpu (1),`celeritas: internal assertion failed: is_soft...
simple-cms+field+msc/vecgeom+cpu (0),internal assertion failed: `step_limit->action...


In [4]:
summed = analyze.summarize_instances(ws.result[['unconverged']])

Unconverged tracks (from runs that didn't fail):

In [5]:
unconv = summed[('unconverged', 'mean')]
unconv[unconv > 0].unstack('arch') 

Unnamed: 0_level_0,arch,cpu,gpu
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
simple-cms+field+msc,orange,0.5,2.0


## Summit results

In [6]:
summit = analyze.Analysis('results/summit')
print(summit)
summed = analyze.summarize_instances(summit.result[summit.successful].dropna(how='all'))

Analysis for Celeritas v0.2.0 on summit


In [7]:
deets = summit.load_results(('testem3-flat','orange','gpu'), 0)
deets['system']['device']

{'capability_major': 7,
 'capability_minor': 0,
 'clock_rate': 1530000,
 'default_block_size': 256,
 'device_id': 0,
 'eu_per_cu': 1,
 'max_blocks_per_grid': 2147483647,
 'max_blocks_per_multiprocessor': 32,
 'max_cache_size': 6291456,
 'max_threads_per_block': 1024,
 'max_threads_per_cu': 2048,
 'memory_clock_rate': 877000,
 'multiprocessor_count': 80,
 'name': 'Tesla V100-SXM2-16GB',
 'platform': 'cuda',
 'regs_per_block': 65536,
 'regs_per_multiprocessor': 65536,
 'shared_mem_per_block': 49152,
 'threads_per_warp': 32,
 'total_const_mem': 65536,
 'total_global_mem': 16911433728}

### Failures

Average number of unconverged tracks:

In [8]:
unconv = analyze.summarize_instances(summit.result['unconverged'])['mean']
unconv[unconv > 0].unstack('arch') 

Unnamed: 0_level_0,arch,cpu,gpu
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
cms2018+field+msc,vecgeom,1.5,9.666667
simple-cms+field,orange,,0.666667
simple-cms+field+msc,orange,1.0,0.333333
simple-cms+field+msc,vecgeom,0.5,0.166667


### Timing tables

In [9]:
(fig, [run_ax, setup_ax]) = plt.subplots(nrows=2,
                                         gridspec_kw=dict(height_ratios=[3, 1]),
                                         subplot_kw=dict(yscale='log'))
summit.plot_results(run_ax, summed['total_time'])
run_ax.legend();
run_ax.set_ylabel('Run [s]')
run_ax.tick_params(labelbottom=False)
summit.plot_results(setup_ax, summed['setup_time'])
setup_ax.set_ylabel('Setup [s]')
analyze.annotate_metadata(run_ax, summit)
fig.savefig('plots/timing.pdf', transparent=True)
plt.close()

In [10]:
def float_fmt_transform(digits):
    format = "{{:.{}f}}".format(digits).format
    def transform(val):
        if np.isnan(val):
            return "---"
        return format(val)
    return transform

times = summed[('total_time', 'mean')].unstack()
times.style.format(float_fmt_transform(2))

Unnamed: 0_level_0,arch,cpu,gpu
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
cms2018,vecgeom,99.41,10.78
simple-cms+field,orange,72.46,5.82
simple-cms+field+msc,orange,90.77,8.50
simple-cms+field+msc,vecgeom,89.42,8.64
simple-cms+msc,orange,71.51,3.14
testem15,orange,47.8,2.44
testem15+field,orange,61.43,2.39
testem15+field+msc,orange,76.51,2.77
testem15+field+msc,vecgeom,74.55,2.70
testem3-flat,orange,87.85,3.39


In [11]:
event_rate = analyze.calc_event_rate(summit, summed)
event_rate['mean'].xs('simple-cms+field+msc', level='problem').unstack('arch')

arch,cpu,gpu
geo,Unnamed: 1_level_1,Unnamed: 2_level_1
orange,0.07712,0.823527
vecgeom,0.078286,0.810254


In [12]:
speedup = analyze.get_cpugpu_ratio(summed['total_time'])
speedup.dropna().style.format(float_fmt_transform(1))

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
cms2018,vecgeom,9.2,0.2
simple-cms+field,orange,12.5,1.2
simple-cms+field+msc,orange,10.7,2.6
simple-cms+field+msc,vecgeom,10.3,4.6
simple-cms+msc,orange,22.8,2.1
testem15,orange,19.6,0.2
testem15+field,orange,25.7,3.3
testem15+field+msc,orange,27.6,3.1
testem15+field+msc,vecgeom,27.6,3.0
testem3-flat,orange,25.9,2.1


### Plots

In [13]:
problems = summit.problems()
problem_to_abbr = summit.problem_to_abbr(problems)
p_to_i = dict(zip(problems, itertools.count()))

In [14]:
fig, ax = plt.subplots()
summit.plot_results(ax, speedup)
ax.set_ylabel("Speedup (7-CPU / 1-GPU wall time)")
ax.set_ylim([0, None])
analyze.annotate_metadata(ax, summit);
fig.savefig('plots/speedups.pdf', transparent=True)
plt.close()

In [15]:
fig, axes = plt.subplots(nrows=2, figsize=(4,4), subplot_kw=dict(yscale='log'))
for (ax, q) in zip(axes, ['step', 'primary']):
    summit.plot_results(ax, analyze.inverse_summary(summed['avg_time_per_' + q]))
    ax.set_ylabel(q + ' per sec')
    ax.legend()
fig.savefig('plots/steps-vs-primaries.png', dpi=300)
plt.close()

In [16]:
(fig, (time_ax, occ_ax)) = plt.subplots(
    nrows=2, figsize=(4, 4),
    gridspec_kw=dict(height_ratios=[3, 1])
)
time_ax.set_yscale('log')
summit.plot_results(time_ax, event_rate)
time_ax.set_ylabel(r"Event rate [1/s]")
time_ax.set_ylim([0.5 * event_rate['mean'].min(), None])
time_ax.legend()
time_ax.tick_params(labelbottom=False)
summit.plot_results(occ_ax, summed['slot_occupancy'])
occ_ax.set_ylabel("Slot occupancy")
analyze.annotate_metadata(ax, summit)
fig.savefig('plots/rate-occupancy.pdf', transparent=True)
plt.close()

## Action fraction pie charts


In [17]:
avg_time = summed[('total_time', 'mean')].T
mean_action_times = summit.action_times().xs('mean', axis=1, level=1).T

In [18]:
mean_action_times.divide(avg_time, axis=1).dropna(how='all', axis=1).style.format(float_fmt_transform(2))

problem,cms2018,cms2018,simple-cms+field,simple-cms+field,simple-cms+field+msc,simple-cms+field+msc,simple-cms+field+msc,simple-cms+field+msc,simple-cms+msc,simple-cms+msc,testem15,testem15+field,testem15+field+msc,testem15+field+msc,testem3-flat,testem3-flat,testem3-flat+field,testem3-flat+msc
geo,vecgeom,vecgeom,orange,orange,orange,orange,vecgeom,vecgeom,orange,orange,orange,orange,orange,vecgeom,orange,vecgeom,orange,orange
arch,cpu,gpu,cpu,gpu,cpu,gpu,cpu,gpu,cpu,gpu,cpu,cpu,cpu,cpu,cpu,cpu,cpu,cpu
action_times,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3
along-step-uniform-msc,---,---,0.42,1.23,0.54,1.12,0.53,1.11,---,---,---,0.37,0.48,0.47,---,---,0.53,---
brems-rel,0.00,0.01,0.00,0.01,0.00,0.01,0.00,0.01,0.01,0.01,0.01,0.01,0.00,0.00,0.01,0.01,0.00,0.00
brems-sb,0.04,0.02,0.04,0.02,0.03,0.02,0.03,0.02,0.04,0.03,0.08,0.06,0.05,0.05,0.05,0.05,0.03,0.03
conv-bethe-heitler,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.02,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01
geo-boundary,0.11,0.18,0.01,0.01,0.00,0.01,0.00,0.01,0.01,0.01,0.01,0.01,0.00,0.00,0.06,0.10,0.04,0.03
ioni-moller-bhabha,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.01,0.02,0.01,0.01,0.00,0.00,0.01,0.01,0.00,0.00
msc-urban,---,---,---,---,0.00,0.00,0.00,0.00,0.00,0.00,---,---,0.00,0.00,---,---,---,0.00
photoel-livermore,0.03,0.02,0.02,0.02,0.02,0.01,0.02,0.01,0.02,0.03,0.04,0.03,0.02,0.02,0.03,0.03,0.02,0.02
physics-discrete-select,0.05,0.02,0.04,0.02,0.03,0.02,0.03,0.02,0.04,0.04,0.08,0.06,0.05,0.05,0.03,0.04,0.02,0.02
pre-step,0.26,0.13,0.26,0.14,0.21,0.10,0.22,0.09,0.27,0.23,0.35,0.27,0.22,0.22,0.40,0.42,0.23,0.27


In [19]:
mean_action_times.columns.get_level_values('problem')

Index(['cms2018', 'cms2018', 'cms2018+field+msc', 'simple-cms+field',
       'simple-cms+field', 'simple-cms+field+msc', 'simple-cms+field+msc',
       'simple-cms+field+msc', 'simple-cms+field+msc', 'simple-cms+msc',
       'simple-cms+msc', 'testem15', 'testem15', 'testem15+field',
       'testem15+field', 'testem15+field+msc', 'testem15+field+msc',
       'testem15+field+msc', 'testem15+field+msc', 'testem3-flat',
       'testem3-flat', 'testem3-flat', 'testem3-flat', 'testem3-flat+field',
       'testem3-flat+field', 'testem3-flat+msc'],
      dtype='object', name='problem')

In [20]:
for prob in ["cms2018", "cms2018+field+msc"]:
    geo = 'vecgeom'
    if prob not in mean_action_times.columns.get_level_values('problem'):
        print("Missing problem:", prob)
        continue
    temp = mean_action_times.xs((prob, geo), axis=1, level=('problem', 'geo')).dropna()
    for (arch, series) in temp.items():
        (fig, ax) = plt.subplots(figsize=(4, 4))
        ax.pie(series, labels=series.index, autopct='%1.1f%%', pctdistance=0.85)
        ax.axis('equal')
        name = (prob, geo, arch)
        slashname = "/".join(name)
        fig.text(
            0.98, 0.02, f"{slashname}\n{summit.version} on {summit.system}",
            va='bottom', ha='right',
            fontstyle='italic', color=(0.5,)*3, size='xx-small',
            zorder=-100
        )
        dashname = "-".join(name)
        fig.savefig(f'plots/actions-{dashname}.pdf', transparent=True)
        plt.close()

### Plot per-step timing on GPU

In [21]:
cms = [summit.load_results((p, 'vecgeom', 'gpu'), 0)
       for p in ['cms2018', 'cms2018+field+msc']]

for plot, label in [(analyze.plot_counts, 'counts'),
                    (analyze.plot_accum_time, 'time')]:
    (fig, axes) = plt.subplots(ncols=2, figsize=(8, 2))
    
    for (i, ax, data) in zip(itertools.count(), axes, cms):
        objs = plot(ax, data)
        analyze.annotate_metadata(ax, data['_metadata'])
        if i == 0:
            objs['oax'].set_ylabel(None)
        elif i == 1:
            objs['ax'].set_ylabel(None)
    fig.savefig(f'plots/cms-{label}.pdf', transparent=True)
    plt.close()

## Crusher

In [22]:
crusher = analyze.Analysis('results/crusher')
print(crusher)

Analysis for Celeritas v0.2.0-1+49ccc7c8 on crusher


In [23]:
# VecGeom failures aren't really failures; just missing capability
failures = crusher.failures().xs('orange', level='geo').fillna(1)
failures.groupby(['problem', 'arch']).count().unstack()

failure,stderr,stderr,stdout,stdout
arch,cpu,gpu,cpu,gpu
problem,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
simple-cms+field+msc,2.0,,2.0,
testem3-flat+field,4.0,4.0,4.0,4.0
testem3-flat+msc,7.0,8.0,7.0,8.0


In [24]:
csum = analyze.summarize_instances(crusher.result[crusher.successful].dropna(how='all'))

In [25]:
csum[('total_time', 'mean')].unstack()

Unnamed: 0_level_0,arch,cpu,gpu
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
simple-cms+field,orange,86.364525,7.079069
simple-cms+field+msc,orange,88.769386,10.412078
simple-cms+msc,orange,83.134969,2.17532
testem15,orange,89.462248,1.660482
testem15+field,orange,59.999452,1.850357
testem15+field+msc,orange,89.137889,2.165253
testem3-flat,orange,107.263641,3.244948
testem3-flat+field,orange,140.217632,6.640344
testem3-flat+msc,orange,174.890197,


In [26]:
rel_err = csum.xs('std', axis=1, level=1) / csum.xs('mean', axis=1, level=1)
high_err = rel_err > 0.02
rel_err[high_err].dropna(how='all').dropna(how='all', axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,avg_time_per_primary,avg_time_per_step,num_step_iters,pre_emptying_time,setup_time,slot_occupancy,total_time
problem,geo,arch,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
simple-cms+field,orange,cpu,0.189675,0.189687,,0.478345,0.856062,,0.189675
simple-cms+field,orange,gpu,0.109828,0.109866,0.039383,0.294876,0.030434,0.037875,0.109828
simple-cms+field+msc,orange,cpu,0.24273,0.242599,,0.53376,0.049217,,0.24273
simple-cms+field+msc,orange,gpu,0.382798,0.382813,0.297543,0.033697,,0.232652,0.382798
simple-cms+msc,orange,cpu,0.172376,0.172437,,0.431574,0.884673,,0.172376
simple-cms+msc,orange,gpu,,,0.034899,,0.036296,0.035145,
testem15,orange,cpu,0.422068,0.422085,,0.42624,0.775743,,0.422068
testem15,orange,gpu,,,0.026858,,0.043046,0.026928,
testem15+field,orange,cpu,0.321016,0.321043,,0.454078,,,0.321016
testem15+field,orange,gpu,,,0.026858,,0.032466,0.026928,


In [27]:
analyze.get_cpugpu_ratio(csum['total_time'])

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
simple-cms+field,orange,12.199984,2.673957
simple-cms+field+msc,orange,8.525616,3.864392
simple-cms+msc,orange,38.217353,6.601693
testem15,orange,53.877265,22.741743
testem15+field,orange,32.425875,10.413711
testem15+field+msc,orange,41.16742,6.786948
testem3-flat,orange,33.055577,17.828039
testem3-flat+field,orange,21.11602,7.360462
testem3-flat+msc,orange,,


In [28]:
crusher_times = csum['total_time']
crusher_times

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,mean,std
problem,geo,arch,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
simple-cms+field,orange,cpu,8.0,86.364525,16.381155
simple-cms+field,orange,gpu,7.0,7.079069,0.777478
simple-cms+field+msc,orange,cpu,6.0,88.769386,21.546988
simple-cms+field+msc,orange,gpu,5.0,10.412078,3.985723
simple-cms+msc,orange,cpu,8.0,83.134969,14.330453
simple-cms+msc,orange,gpu,8.0,2.17532,0.024413
testem15,orange,cpu,8.0,89.462248,37.759161
testem15,orange,gpu,8.0,1.660482,0.008983
testem15+field,orange,cpu,8.0,59.999452,19.260794
testem15+field,orange,gpu,8.0,1.850357,0.017432


In [29]:
crusher_rates = analyze.calc_event_rate(crusher, csum)
summit_rates = analyze.calc_event_rate(summit, summed.loc[crusher_times.index])

counts = {
    ('summit', 'cpu'): 7,
    ('summit', 'gpu'): 1,
    ('crusher', 'cpu'): 8,
    ('crusher', 'gpu'): 1,
}

In [30]:
(crusher_rates['mean'] / summit_rates['mean']).unstack()

Unnamed: 0_level_0,arch,cpu,gpu
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
simple-cms+field,orange,0.839008,0.821999
simple-cms+field+msc,orange,1.022506,0.816362
simple-cms+msc,orange,0.860179,1.444362
testem15,orange,0.534269,1.471178
testem15+field,orange,1.023794,1.291316
testem15+field+msc,orange,0.858386,1.27825
testem3-flat,orange,0.819027,1.044006
testem3-flat+field,orange,1.07345,0.859865
testem3-flat+msc,orange,0.921367,


In [31]:
fig, ax = plt.subplots()
ax.set_yscale('log')
for offset, color, machine, rates in [(-0.05, '#7A954F', 'Summit', summit_rates),
                                      (0.05, '#BC5544', 'Crusher', crusher_rates)]:
    for arch in ['cpu', 'gpu']:
        summary = rates.xs(arch, level='arch')
        index = np.array([p_to_i[p]
                          for p in summary.index.get_level_values('problem')], dtype=float)
        index += offset
    
        mark = analyze.ARCH_SHAPES[arch]
        count = counts[(machine.lower(), arch)]
        arch = arch.upper()
        ax.errorbar(index, summary['mean'], summary['std'],
                    capsize=0, fmt='none', ecolor=(0.2,)*3)
        scat = ax.scatter(index, summary['mean'], c=color, marker=mark,
                         label=f"{machine} ({count} {arch})")    
xax = ax.get_xaxis()
xax.set_ticks(np.arange(len(problems)))
xax.set_ticklabels(list(problem_to_abbr.values()), rotation=90)
grid = ax.grid()
ax.set_axisbelow(True)
ax.legend()
ax.set_ylabel(r"Event rate [1/s]")
analyze.annotate_metadata(ax, summit)
fig.savefig('plots/crusher-vs-summit.pdf')
plt.close()