In [1]:
import json
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import re
import sys
import itertools
from collections import namedtuple
from pathlib import Path

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

%load_ext autoreload
%autoreload 1
%aimport analyze

with open('plots/style.json') as f:
    mpl.rcParams.update(json.load(f))

## Wildstyle failures

In [2]:
ws = analyze.Analysis('results/wildstyle')
print(ws)

Analysis for Celeritas v0.3.0-dev.116+27141e37 on wildstyle


In [3]:
ftab = analyze.make_failure_table(ws.failures())
ftab.to_frame()

Unnamed: 0,Failure


In [4]:
print("\n".join(k for (k, v) in ftab.items()
                if 'is_soft' in v))




In [5]:
summed = analyze.summarize_instances(ws.result[['unconverged']])

Unconverged tracks (from runs that didn't fail):

In [6]:
unconv = summed[('unconverged', 'mean')]
unconv[unconv > 0].unstack('arch') 

Unnamed: 0_level_0,arch
problem,geo


## Summit results

In [7]:
summit = analyze.Analysis('results/summit')
print(summit)
summed = analyze.summarize_instances(summit.result[summit.successful].dropna(how='all'))

Analysis for Celeritas v0.3.0-dev.117+f5cc679d on summit


In [8]:
deets = summit.load_results(('testem3-flat','orange','gpu'), 0)
deets['system']['device']

{'capability_major': 7,
 'capability_minor': 0,
 'clock_rate': 1530000,
 'default_block_size': 256,
 'device_id': 0,
 'eu_per_cu': 1,
 'max_blocks_per_grid': 2147483647,
 'max_blocks_per_multiprocessor': 32,
 'max_cache_size': 6291456,
 'max_threads_per_block': 1024,
 'max_threads_per_cu': 2048,
 'memory_clock_rate': 877000,
 'multiprocessor_count': 80,
 'name': 'Tesla V100-SXM2-16GB',
 'platform': 'cuda',
 'regs_per_block': 65536,
 'regs_per_multiprocessor': 65536,
 'shared_mem_per_block': 49152,
 'threads_per_warp': 32,
 'total_const_mem': 65536,
 'total_global_mem': 16911433728}

### Failures

Average number of unconverged tracks:

In [9]:
unconv = analyze.summarize_instances(summit.result['unconverged'])['mean']
unconv[unconv > 0].unstack('arch') 

Unnamed: 0_level_0,arch
problem,geo


### Timing tables

In [10]:
(fig, [run_ax, setup_ax]) = plt.subplots(nrows=2,
                                         gridspec_kw=dict(height_ratios=[3, 1]),
                                         subplot_kw=dict(yscale='log'))
summit.plot_results(run_ax, summed['total_time'])
run_ax.legend();
run_ax.set_ylabel('Run [s]')
run_ax.tick_params(labelbottom=False)
summit.plot_results(setup_ax, summed['setup_time'])
setup_ax.set_ylabel('Setup [s]')
analyze.annotate_metadata(run_ax, summit)
fig.savefig('plots/timing.pdf', transparent=True)
plt.close()

In [11]:
def float_fmt_transform(digits):
    format = "{{:.{}f}}".format(digits).format
    def transform(val):
        if np.isnan(val):
            return "---"
        return format(val)
    return transform

times = summed[('total_time', 'mean')].unstack()
times.style.format(float_fmt_transform(2))

Unnamed: 0_level_0,arch,cpu,gpu
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
cms2018,vecgeom,129.73,14.33
cms2018+field+msc,vecgeom,299.19,50.48
simple-cms+field,orange,84.46,4.64
simple-cms+field+msc,orange,103.12,4.98
simple-cms+field+msc,vecgeom,101.12,4.81
simple-cms+msc,orange,90.39,2.71
testem15,orange,64.37,2.0
testem15+field,orange,75.71,2.13
testem15+field+msc,orange,93.37,2.41
testem15+msc,vecgeom,78.33,2.13


In [12]:
event_rate = analyze.calc_event_rate(summit, summed)
event_rate['mean'].xs('simple-cms+field+msc', level='problem').unstack('arch')

arch,cpu,gpu
geo,Unnamed: 1_level_1,Unnamed: 2_level_1
orange,0.067884,1.406446
vecgeom,0.069227,1.455049


In [13]:
speedup = analyze.get_cpugpu_ratio(summed['total_time'])
speedup.dropna().style.format(float_fmt_transform(1))

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
cms2018,vecgeom,9.1,0.1
cms2018+field+msc,vecgeom,5.9,0.1
simple-cms+field,orange,18.2,1.3
simple-cms+field+msc,orange,20.7,1.3
simple-cms+field+msc,vecgeom,21.0,1.0
simple-cms+msc,orange,33.3,0.3
testem15,orange,32.2,0.4
testem15+field,orange,35.6,0.4
testem15+field+msc,orange,38.7,1.0
testem15+msc,vecgeom,36.8,0.6


In [14]:
(speedup.dropna() * 7).describe()

Unnamed: 0,mean,std
count,16.0,16.0
mean,175.894903,3.551876
std,70.961377,2.934587
min,41.485608,0.403778
25%,140.609274,1.303592
50%,179.743612,2.669346
75%,236.355545,4.703273
max,270.994783,9.274282


In [15]:
# Determine the fraction of action time spent in geometry routines
action_times_inst = analyze.unstack_subdict(summit.result['action_times'][summit.valid]).T
total_time_inst = summit.result['total_time']
geo_actions = [lab for lab in action_times_inst.index
               if lab.startswith('along-step-') or lab.startswith('geo-')]
geo_frac_inst = action_times_inst.loc[geo_actions].sum() / total_time_inst
geo_frac = analyze.summarize_instances(geo_frac_inst)

In [16]:
geo_frac['mean'].unstack('arch')

Unnamed: 0_level_0,arch,cpu,gpu
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
cms2018,vecgeom,0.453428,0.788791
cms2018+field+msc,vecgeom,0.703379,0.920562
simple-cms+field,orange,0.357892,0.611616
simple-cms+field+msc,orange,0.478964,0.635619
simple-cms+field+msc,vecgeom,0.472926,0.69112
simple-cms+msc,orange,0.404616,0.334149
testem15,orange,0.206237,0.0
testem15+field,orange,0.323664,0.0
testem15+field+msc,orange,0.441175,0.0
testem15+msc,vecgeom,0.352365,0.0


### Plots

In [17]:
problems = summit.problems()
problem_to_abbr = summit.problem_to_abbr(problems)
p_to_i = dict(zip(problems, itertools.count()))

In [18]:
fig, ax = plt.subplots()
summit.plot_results(ax, speedup)
ax.set_ylabel("Speedup (7-CPU / 1-GPU wall time)")
ax.set_ylim([0, None])
analyze.annotate_metadata(ax, summit);
fig.savefig('plots/speedups.pdf', transparent=True)
plt.close()

In [19]:
fig, axes = plt.subplots(nrows=2, figsize=(4,4), subplot_kw=dict(yscale='log'))
for (ax, q) in zip(axes, ['step', 'primary']):
    summit.plot_results(ax, analyze.inverse_summary(summed['avg_time_per_' + q]))
    ax.set_ylabel(q + ' per sec')
    if ax != axes[-1]:
        ax.tick_params(labelbottom=False)
    ax.legend()
fig.savefig('plots/steps-vs-primaries.png', dpi=300)
plt.close()

In [20]:
(fig, (time_ax, geo_ax)) = plt.subplots(
    nrows=2, figsize=(4, 4),
    gridspec_kw=dict(height_ratios=[3, 1])
)
time_ax.set_yscale('log')
summit.plot_results(time_ax, event_rate)
time_ax.set_ylabel(r"Throughput [event/s]")
time_ax.set_ylim([0.5 * event_rate['mean'].min(), None])
time_ax.legend()
time_ax.tick_params(labelbottom=False)
summit.plot_results(geo_ax, geo_frac * 100)
geo_ax.set_ylabel("Geometry [%]")
geo_ax.set_ylim([0, 100])
analyze.annotate_metadata(ax, summit)
fig.savefig('plots/throughput-geo.pdf', transparent=True)
plt.close()

## Action fraction pie charts

In [22]:
avg_time = summed[('total_time', 'mean')].T
mean_action_times = summit.action_times().xs('mean', axis=1, level=1).T

In [23]:
# mean_action_times.divide(avg_time, axis=1).dropna(how='all', axis=1).style.format(float_fmt_transform(2))

In [24]:
ACTION_PRIORITY = {
    # along-step: 0,
    "geo-boundary": (0, "zzz"),
    "extend-from-secondaries": (1, "ex"),
    "initialize-tracks": (1, "init"),
    "pre-step": (2, ""),
    "physics-discrete-select": (2, "b"),
    # other physics: 3
}
def get_action_priority(k):
    if k.startswith('along-step'):
        return (0, k)
    try:
        return ACTION_PRIORITY[k]
    except KeyError:
        return (3, k)

In [25]:
results = summit
def autopct_format(pctvalue):
    if pctvalue < 2:
        return ""
    return "{:1.1f}%".format(pctvalue)

for prob in ["cms2018", "cms2018+field+msc"]:
    geo = 'vecgeom'
    if prob not in mean_action_times.columns.get_level_values('problem'):
        print("Missing problem:", prob)
        continue
    r = results.load_results((prob,'vecgeom','gpu'), 0)
    temp = mean_action_times.xs((prob, geo), axis=1, level=('problem', 'geo')).dropna()
    actions = list(temp.index)
    priorities = [get_action_priority(a) for a in actions]
    (priorities, actions) = zip(*(sorted(zip(priorities, actions))))
    explode = [0.05 * p[0] for p in priorities]
    labels = [a if p[0] < 3 else "" for (p, a) in zip(priorities, actions)]
    for (arch, series) in temp.items():
        (fig, ax) = plt.subplots(figsize=(4, 4))
        (patches, labeltext, pctlabels) = ax.pie(
            [series[t] for t in actions], labels=labels, explode=explode,
            autopct=autopct_format, pctdistance=1.25, labeldistance=0.2, rotatelabels=True
        )
        ax.axis('equal')
        name = (prob, geo, arch)
        slashname = "/".join(name)
        fig.text(
            0.98, 0.02, f"{slashname}\n{results.version} on {results.system}",
            va='bottom', ha='right',
            fontstyle='italic', color=(0.5,)*3, size='xx-small',
            zorder=-100
        )
        dashname = "-".join(name)
        fig.savefig(f'plots/actions-{dashname}.pdf', transparent=True)
        plt.close()

## Plot per-step timing on GPU

In [26]:
cms = [summit.load_results((p, 'vecgeom', 'gpu'), 0)
       for p in ['cms2018', 'cms2018+field+msc']]

for plot, label in [(analyze.plot_counts, 'counts'),
                    (analyze.plot_accum_time, 'time')]:
    (fig, axes) = plt.subplots(ncols=2, figsize=(8, 2))
    
    for (i, ax, data) in zip(itertools.count(), axes, cms):
        objs = plot(ax, data)
        analyze.annotate_metadata(ax, data['_metadata'])
        if i == 0:
            objs['oax'].set_ylabel(None)
        elif i == 1:
            objs['ax'].set_ylabel(None)
    fig.savefig(f'plots/cms-{label}.pdf', transparent=True)
    plt.close()

## Crusher

In [27]:
crusher = analyze.Analysis('results/crusher')
print(crusher)

Analysis for Celeritas v0.3.0-dev.117+f5cc679d on crusher


In [28]:
# VecGeom failures aren't really failures; just missing capability
#failures = crusher.failures().xs('orange', level='geo').fillna(1)
#failures.groupby(['problem', 'arch']).count().unstack()

In [29]:
csum = analyze.summarize_instances(crusher.result[crusher.successful].dropna(how='all'))

In [30]:
csum[('total_time', 'mean')].unstack()

Unnamed: 0_level_0,arch,cpu,gpu
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
simple-cms+field,orange,98.954807,14.064661
simple-cms+field+msc,orange,109.093269,14.881994
simple-cms+msc,orange,106.332851,11.272708
testem15,orange,140.628369,11.871341
testem15+field,orange,108.869614,10.109528
testem15+field+msc,orange,116.423839,8.969689
testem3-flat,orange,170.201814,11.199139
testem3-flat+field,orange,159.12834,17.950579
testem3-flat+field+msc,orange,238.647082,22.97132
testem3-flat+msc,orange,224.768816,13.440013


In [31]:
rel_err = csum.xs('std', axis=1, level=1) / csum.xs('mean', axis=1, level=1)
high_err = rel_err > 0.02
rel_err[high_err].dropna(how='all').dropna(how='all', axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,avg_time_per_primary,avg_time_per_step,num_step_iters,pre_emptying_time,setup_time,slot_occupancy,total_time
problem,geo,arch,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
simple-cms+field,orange,cpu,0.442522,0.442683,,0.469665,0.054429,,0.442522
simple-cms+field,orange,gpu,0.051673,0.051697,0.024009,0.422947,0.10189,0.023444,0.051673
simple-cms+field+msc,orange,cpu,0.257028,0.257086,,0.402627,0.047063,,0.257028
simple-cms+field+msc,orange,gpu,0.080388,0.080313,,0.372378,0.036127,,0.080388
simple-cms+msc,orange,cpu,0.296687,0.296639,,0.394499,0.051353,,0.296687
simple-cms+msc,orange,gpu,0.02644,0.026768,,,0.052661,,0.02644
testem15,orange,cpu,0.330846,0.330857,,0.383106,0.126688,,0.330846
testem15,orange,gpu,0.404589,0.404684,,,0.236952,,0.404589
testem15+field,orange,cpu,0.487872,0.487856,,0.462544,0.035888,,0.487872
testem15+field,orange,gpu,0.022983,0.022933,,,0.07095,,0.022983


In [32]:
analyze.get_cpugpu_ratio(csum['total_time'])

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
simple-cms+field,orange,7.035705,3.134609
simple-cms+field+msc,orange,7.330554,1.974159
simple-cms+msc,orange,9.432769,2.809669
testem15,orange,11.846039,6.191194
testem15+field,orange,10.769011,5.259726
testem15+field+msc,orange,12.979697,2.623419
testem3-flat,orange,15.197759,5.159678
testem3-flat+field,orange,8.864802,2.221936
testem3-flat+field+msc,orange,10.388915,1.943088
testem3-flat+msc,orange,16.723853,4.957123


In [33]:
crusher_times = csum['total_time']
crusher_times

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,mean,std
problem,geo,arch,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
simple-cms+field,orange,cpu,8.0,98.954807,43.789691
simple-cms+field,orange,gpu,8.0,14.064661,0.72676
simple-cms+field+msc,orange,cpu,8.0,109.093269,28.040012
simple-cms+field+msc,orange,gpu,8.0,14.881994,1.196327
simple-cms+msc,orange,cpu,8.0,106.332851,31.547546
simple-cms+msc,orange,gpu,8.0,11.272708,0.298056
testem15,orange,cpu,8.0,140.628369,46.526291
testem15,orange,gpu,8.0,11.871341,4.803009
testem15+field,orange,cpu,8.0,108.869614,53.114443
testem15+field,orange,gpu,8.0,10.109528,0.232349


In [34]:
crusher_rates = analyze.calc_event_rate(crusher, csum)
summit_rates = analyze.calc_event_rate(summit, summed.loc[crusher_times.index])

counts = {
    ('summit', 'cpu'): 7,
    ('summit', 'gpu'): 1,
    ('crusher', 'cpu'): 8,
    ('crusher', 'gpu'): 1,
}

In [35]:
(crusher_rates['mean'] / summit_rates['mean']).unstack()

Unnamed: 0_level_0,arch,cpu,gpu
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
simple-cms+field,orange,0.853539,0.330091
simple-cms+field+msc,orange,0.945224,0.334437
simple-cms+msc,orange,0.85006,0.240521
testem15,orange,0.457756,0.168422
testem15+field,orange,0.695424,0.210296
testem15+field+msc,orange,0.80195,0.268874
testem3-flat,orange,0.656456,0.341607
testem3-flat+field,orange,0.866813,0.36455
testem3-flat+field+msc,orange,0.928901,0.432579
testem3-flat+msc,orange,0.860286,0.410512


In [36]:
fig, ax = plt.subplots()
ax.set_yscale('log')
for offset, color, machine, rates in [(-0.05, '#7A954F', 'Summit', summit_rates),
                                      (0.05, '#BC5544', 'Crusher', crusher_rates)]:
    for arch in ['cpu', 'gpu']:
        summary = rates.xs(arch, level='arch')
        index = np.array([p_to_i[p]
                          for p in summary.index.get_level_values('problem')], dtype=float)
        index += offset
    
        mark = analyze.ARCH_SHAPES[arch]
        count = counts[(machine.lower(), arch)]
        arch = arch.upper()
        ax.errorbar(index, summary['mean'], summary['std'],
                    capsize=0, fmt='none', ecolor=(0.2,)*3)
        scat = ax.scatter(index, summary['mean'], c=color, marker=mark,
                         label=f"{machine} ({count} {arch})")    
xax = ax.get_xaxis()
xax.set_ticks(np.arange(len(problems)))
xax.set_ticklabels(list(problem_to_abbr.values()), rotation=90)
grid = ax.grid()
ax.set_axisbelow(True)
ax.legend()
ax.set_ylabel(r"Event rate [1/s]")
analyze.annotate_metadata(ax, summit)
fig.savefig('plots/crusher-vs-summit.pdf')
plt.close()