In [1]:
import json
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import re
import sys
import itertools
from collections import namedtuple
from pathlib import Path

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

%load_ext autoreload
%autoreload 1
%aimport analyze

with open('plots/style.json') as f:
    mpl.rcParams.update(json.load(f))

## Wildstyle failures

In [2]:
ws = analyze.Analysis('results/wildstyle')
print(ws)

Analysis for Celeritas v0.3.1-rc.2 on wildstyle


In [3]:
ftab = analyze.make_failure_table(ws.failures())
ftab.to_frame()

Unnamed: 0,Failure
testem3-flat+field+msc/orange+cpu (1),precondition failed: `this->make_lsa().boundar...


In [4]:
print("\n".join(k for (k, v) in ftab.items()
                if 'is_soft' in v))




In [5]:
summed = analyze.summarize_instances(ws.result[['unconverged']])

Unconverged tracks (from runs that didn't fail):

In [6]:
unconv = summed[('unconverged', 'mean')]
unconv[unconv > 0].unstack('arch') 

Unnamed: 0_level_0,arch,cpu,gpu
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
cms2018+field+msc,vecgeom,,2.0
testem3-flat+field+msc,orange,1.0,3.5


## Summit results

In [7]:
summit = analyze.Analysis('results/summit')
print(summit)
summed = analyze.summarize_instances(summit.result[summit.successful].dropna(how='all'))

Analysis for Celeritas v0.3.1-rc.2.4+192408abf on summit


In [8]:
deets = summit.load_results(('testem3-flat','orange','gpu'), 0)
deets['system']['device']

{'capability_major': 7,
 'capability_minor': 0,
 'clock_rate': 1530000,
 'default_block_size': 256,
 'device_id': 0,
 'eu_per_cu': 1,
 'max_blocks_per_grid': 2147483647,
 'max_blocks_per_multiprocessor': 32,
 'max_cache_size': 6291456,
 'max_threads_per_block': 1024,
 'max_threads_per_cu': 2048,
 'memory_clock_rate': 877000,
 'multiprocessor_count': 80,
 'name': 'Tesla V100-SXM2-16GB',
 'platform': 'cuda',
 'regs_per_block': 65536,
 'regs_per_multiprocessor': 65536,
 'shared_mem_per_block': 49152,
 'threads_per_warp': 32,
 'total_const_mem': 65536,
 'total_global_mem': 16911433728}

### Failures

Average number of unconverged tracks:

In [9]:
unconv = analyze.summarize_instances(summit.result['unconverged'])['mean']
unconv[unconv > 0].unstack('arch') 

Unnamed: 0_level_0,arch
problem,geo


### Timing tables

In [10]:
(fig, [run_ax, setup_ax]) = plt.subplots(nrows=2,
                                         gridspec_kw=dict(height_ratios=[3, 1]),
                                         subplot_kw=dict(yscale='log'))
summit.plot_results(run_ax, summed['total_time'])
run_ax.legend();
run_ax.set_ylabel('Run [s]')
run_ax.tick_params(labelbottom=False)
summit.plot_results(setup_ax, summed['setup_time'])
setup_ax.set_ylabel('Setup [s]')
analyze.annotate_metadata(run_ax, summit)
fig.savefig('plots/timing.pdf', transparent=True)
plt.close()

In [11]:
def float_fmt_transform(digits):
    format = "{{:.{}f}}".format(digits).format
    def transform(val):
        if np.isnan(val):
            return "---"
        return format(val)
    return transform

times = summed[('total_time', 'mean')].unstack()
times.style.format(float_fmt_transform(2))

Unnamed: 0_level_0,arch,cpu,gpu
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
cms2018,vecgeom,125.99,13.49
cms2018+field+msc,vecgeom,280.62,42.04
simple-cms+field,orange,77.51,3.39
simple-cms+field+msc,orange,103.77,3.74
simple-cms+field+msc,vecgeom,102.2,3.75
simple-cms+msc,orange,91.09,3.64
testem15,orange,62.25,2.76
testem15+field,orange,72.34,2.77
testem15+field+msc,orange,92.27,3.18
testem15+field+msc,vecgeom,89.53,2.77


In [12]:
event_rate = analyze.calc_event_rate(summit, summed)
event_rate['mean'].xs('simple-cms+field+msc', level='problem').unstack('arch')

arch,cpu,gpu
geo,Unnamed: 1_level_1,Unnamed: 2_level_1
orange,0.067457,1.870279
vecgeom,0.068496,1.868454


In [13]:
speedup = analyze.get_cpugpu_ratio(summed['total_time'])
speedup.dropna().style.format(float_fmt_transform(1))

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
cms2018,vecgeom,9.3,0.1
cms2018+field+msc,vecgeom,6.7,0.1
simple-cms+field,orange,22.9,2.0
simple-cms+field+msc,orange,27.7,2.0
simple-cms+field+msc,vecgeom,27.3,1.6
simple-cms+msc,orange,25.0,0.1
testem15,orange,22.5,2.4
testem15+field,orange,26.2,2.8
testem15+field+msc,orange,29.0,2.6
testem15+field+msc,vecgeom,32.3,1.9


In [14]:
(speedup.dropna() * 7).describe()

Unnamed: 0,mean,std
count,16.0,16.0
mean,164.155764,9.115157
std,51.141882,6.571193
min,46.721346,0.371418
25%,156.629167,2.195847
50%,177.920038,9.608961
75%,192.489749,13.845149
max,226.239632,19.329842


In [15]:
# Determine the fraction of action time spent in geometry routines
action_times_inst = analyze.unstack_subdict(summit.result['action_times'][summit.valid]).T
total_time_inst = summit.result['total_time']
geo_actions = [lab for lab in action_times_inst.index
               if lab.startswith('along-step-') or lab.startswith('geo-')]
geo_frac_inst = action_times_inst.loc[geo_actions].sum() / total_time_inst
geo_frac = analyze.summarize_instances(geo_frac_inst)

In [16]:
geo_frac['mean'].unstack('arch')

Unnamed: 0_level_0,arch,cpu,gpu
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
cms2018,vecgeom,0.454435,0.753648
cms2018+field+msc,vecgeom,0.69153,0.900786
simple-cms+field,orange,0.331382,0.255983
simple-cms+field+msc,orange,0.499504,0.351508
simple-cms+field+msc,vecgeom,0.490888,0.387711
simple-cms+msc,orange,0.430237,0.259241
testem15,orange,0.19643,0.0
testem15+field,orange,0.312785,0.0
testem15+field+msc,orange,0.463695,0.0
testem15+field+msc,vecgeom,0.445265,0.0


### Plots

In [17]:
problems = summit.problems()
problem_to_abbr = summit.problem_to_abbr(problems)
p_to_i = dict(zip(problems, itertools.count()))

In [18]:
fig, ax = plt.subplots()
summit.plot_results(ax, speedup)
ax.set_ylabel("Speedup (7-CPU / 1-GPU wall time)")
ax.set_ylim([0, None])
analyze.annotate_metadata(ax, summit);
fig.savefig('plots/speedups.pdf', transparent=True)
plt.close()

In [19]:
fig, axes = plt.subplots(nrows=2, figsize=(4,4), subplot_kw=dict(yscale='log'))
for (ax, q) in zip(axes, ['step', 'primary']):
    summit.plot_results(ax, analyze.inverse_summary(summed['avg_time_per_' + q]))
    ax.set_ylabel(q + ' per sec')
    if ax != axes[-1]:
        ax.tick_params(labelbottom=False)
    ax.legend()
fig.savefig('plots/steps-vs-primaries.png', dpi=300)
plt.close()

In [20]:
(fig, (time_ax, geo_ax)) = plt.subplots(
    nrows=2, figsize=(4, 4),
    gridspec_kw=dict(height_ratios=[3, 1])
)
time_ax.set_yscale('log')
summit.plot_results(time_ax, event_rate)
time_ax.set_ylabel(r"Throughput [event/s]")
time_ax.set_ylim([0.5 * event_rate['mean'].min(), None])
time_ax.legend()
time_ax.tick_params(labelbottom=False)
analyze.annotate_metadata(time_ax, summit)
summit.plot_results(geo_ax, geo_frac * 100)
geo_ax.set_ylabel("Geometry [%]")
geo_ax.set_ylim([0, 100])
fig.savefig('plots/throughput-geo.pdf', transparent=True)
plt.close()

## Action fraction pie charts

In [21]:
avg_time = summed[('total_time', 'mean')].T
mean_action_times = summit.action_times().xs('mean', axis=1, level=1).T

In [22]:
# mean_action_times.divide(avg_time, axis=1).dropna(how='all', axis=1).style.format(float_fmt_transform(2))

In [23]:
ACTION_PRIORITY = {
    # along-step: 0,
    "geo-boundary": (0, "zzz"),
    "extend-from-secondaries": (1, "ex"),
    "initialize-tracks": (1, "init"),
    "pre-step": (2, ""),
    "physics-discrete-select": (2, "b"),
    # other physics: 3
}
def get_action_priority(k):
    if k.startswith('along-step'):
        return (0, k)
    try:
        return ACTION_PRIORITY[k]
    except KeyError:
        return (3, k)

In [24]:
results = summit
def autopct_format(pctvalue):
    if pctvalue < 2:
        return ""
    return "{:1.1f}%".format(pctvalue)

for prob in ["cms2018", "cms2018+field+msc"]:
    geo = 'vecgeom'
    if prob not in mean_action_times.columns.get_level_values('problem'):
        print("Missing problem:", prob)
        continue
    r = results.load_results((prob,'vecgeom','gpu'), 0)
    temp = mean_action_times.xs((prob, geo), axis=1, level=('problem', 'geo')).dropna()
    actions = list(temp.index)
    priorities = [get_action_priority(a) for a in actions]
    (priorities, actions) = zip(*(sorted(zip(priorities, actions))))
    explode = [0.05 * p[0] for p in priorities]
    labels = [a if p[0] < 3 else "" for (p, a) in zip(priorities, actions)]
    for (arch, series) in temp.items():
        (fig, ax) = plt.subplots(figsize=(4, 4))
        (patches, labeltext, pctlabels) = ax.pie(
            [series[t] for t in actions], labels=labels, explode=explode,
            autopct=autopct_format, pctdistance=1.25, labeldistance=0.2, rotatelabels=True
        )
        ax.axis('equal')
        name = (prob, geo, arch)
        slashname = "/".join(name)
        fig.text(
            0.98, 0.02, f"{slashname}\n{results.version} on {results.system}",
            va='bottom', ha='right',
            fontstyle='italic', color=(0.5,)*3, size='xx-small',
            zorder=-100
        )
        dashname = "-".join(name)
        fig.savefig(f'plots/actions-{dashname}.pdf', transparent=True)
        plt.close()

## Plot per-step timing on GPU

In [25]:
cms = [summit.load_results((p, 'vecgeom', 'gpu'), 0)
       for p in ['cms2018', 'cms2018+field+msc']]

for plot, label in [(analyze.plot_counts, 'counts'),
                    (analyze.plot_accum_time, 'time')]:
    (fig, axes) = plt.subplots(ncols=2, figsize=(8, 2))
    
    for (i, ax, data) in zip(itertools.count(), axes, cms):
        objs = plot(ax, data)
        analyze.annotate_metadata(ax, data['_metadata'])
        if i == 0:
            objs['oax'].set_ylabel(None)
        elif i == 1:
            objs['ax'].set_ylabel(None)
    fig.savefig(f'plots/cms-{label}.pdf', transparent=True)
    plt.close()

## Crusher

In [26]:
crusher = analyze.Analysis('results/crusher')
print(crusher)

Analysis for Celeritas v0.3.1-rc.2 on crusher


In [27]:
# VecGeom failures aren't really failures; just missing capability
#failures = crusher.failures().xs('orange', level='geo').fillna(1)
#failures.groupby(['problem', 'arch']).count().unstack()

In [28]:
csum = analyze.summarize_instances(crusher.result[crusher.successful].dropna(how='all'))

In [29]:
csum[('total_time', 'mean')].unstack()

Unnamed: 0_level_0,arch,cpu,gpu
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
simple-cms+field,orange,95.791686,16.507036
simple-cms+field+msc,orange,124.784283,17.397853
simple-cms+msc,orange,150.650056,11.929869
testem15,orange,87.425035,12.051599
testem15+field,orange,109.056734,13.21814
testem15+field+msc,orange,118.768646,13.727174
testem3-flat,orange,169.542208,14.494695
testem3-flat+field,orange,116.058669,25.713278
testem3-flat+field+msc,orange,192.273562,41.235603
testem3-flat+msc,orange,211.340956,16.474185


In [30]:
rel_err = csum.xs('std', axis=1, level=1) / csum.xs('mean', axis=1, level=1)
high_err = rel_err > 0.02
rel_err[high_err].dropna(how='all').dropna(how='all', axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,avg_time_per_primary,avg_time_per_step,num_step_iters,pre_emptying_time,setup_time,slot_occupancy,total_time
problem,geo,arch,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
simple-cms+field,orange,cpu,0.202785,0.202755,,0.319013,,,0.202785
simple-cms+field,orange,gpu,0.031871,0.03194,,0.162729,0.122977,,0.031871
simple-cms+field+msc,orange,cpu,0.302894,0.303094,,0.41542,,,0.302894
simple-cms+field+msc,orange,gpu,,,,0.175177,0.041673,,
simple-cms+msc,orange,cpu,0.438676,0.438627,,0.649024,,,0.438676
simple-cms+msc,orange,gpu,,,,,0.139656,,
testem15,orange,cpu,0.339934,0.339934,,0.364978,0.025206,,0.339934
testem15,orange,gpu,0.039989,0.039938,,,0.210578,,0.039989
testem15+field,orange,cpu,0.425735,0.425643,,0.544824,,,0.425735
testem15+field,orange,gpu,0.032709,0.032657,,,0.147402,,0.032709


In [31]:
analyze.get_cpugpu_ratio(csum['total_time'])

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
simple-cms+field,orange,5.803082,1.191223
simple-cms+field+msc,orange,7.172396,2.175962
simple-cms+msc,orange,12.627972,5.541508
testem15,orange,7.254227,2.482962
testem15+field,orange,8.250536,3.522894
testem15+field+msc,orange,8.652083,3.003982
testem3-flat,orange,11.696845,3.701199
testem3-flat+field,orange,4.51357,1.935122
testem3-flat+field+msc,orange,4.662805,1.492901
testem3-flat+msc,orange,12.828614,5.90263


In [32]:
crusher_times = csum['total_time']
crusher_times

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,mean,std
problem,geo,arch,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
simple-cms+field,orange,cpu,8.0,95.791686,19.425101
simple-cms+field,orange,gpu,8.0,16.507036,0.526101
simple-cms+field+msc,orange,cpu,8.0,124.784283,37.79636
simple-cms+field+msc,orange,gpu,8.0,17.397853,0.298798
simple-cms+msc,orange,cpu,8.0,150.650056,66.08657
simple-cms+msc,orange,gpu,8.0,11.929869,0.137755
testem15,orange,cpu,8.0,87.425035,29.718733
testem15,orange,gpu,8.0,12.051599,0.481934
testem15+field,orange,cpu,8.0,109.056734,46.429279
testem15+field,orange,gpu,8.0,13.21814,0.432353


In [33]:
crusher_rates = analyze.calc_event_rate(crusher, csum)
summit_rates = analyze.calc_event_rate(summit, summed.loc[crusher_times.index])

counts = {
    ('summit', 'cpu'): 7,
    ('summit', 'gpu'): 1,
    ('crusher', 'cpu'): 8,
    ('crusher', 'gpu'): 1,
}

In [34]:
(crusher_rates['mean'] / summit_rates['mean']).unstack()

Unnamed: 0_level_0,arch,cpu,gpu
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
simple-cms+field,orange,0.809149,0.205418
simple-cms+field+msc,orange,0.831598,0.215127
simple-cms+msc,orange,0.604628,0.305198
testem15,orange,0.712082,0.229149
testem15+field,orange,0.663284,0.209192
testem15+field+msc,orange,0.776889,0.231501
testem3-flat,orange,0.624791,0.309055
testem3-flat+field,orange,1.063331,0.219407
testem3-flat+field+msc,orange,1.124989,0.191286
testem3-flat+msc,orange,0.905856,0.361064


In [35]:
fig, ax = plt.subplots()
ax.set_yscale('log')
for offset, color, machine, rates in [(-0.05, '#7A954F', 'Summit', summit_rates),
                                      (0.05, '#BC5544', 'Crusher', crusher_rates)]:
    for arch in ['cpu', 'gpu']:
        summary = rates.xs(arch, level='arch')
        index = np.array([p_to_i[p]
                          for p in summary.index.get_level_values('problem')], dtype=float)
        index += offset
    
        mark = analyze.ARCH_SHAPES[arch]
        count = counts[(machine.lower(), arch)]
        arch = arch.upper()
        ax.errorbar(index, summary['mean'], summary['std'],
                    capsize=0, fmt='none', ecolor=(0.2,)*3)
        scat = ax.scatter(index, summary['mean'], c=color, marker=mark,
                         label=f"{machine} ({count} {arch})")    
xax = ax.get_xaxis()
xax.set_ticks(np.arange(len(problems)))
xax.set_ticklabels(list(problem_to_abbr.values()), rotation=90)
grid = ax.grid()
ax.set_axisbelow(True)
ax.legend()
ax.set_ylabel(r"Event rate [1/s]")
analyze.annotate_metadata(ax, summit)
fig.savefig('plots/crusher-vs-summit.pdf')
plt.close()