In [1]:
import json
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import re
import sys
import itertools
from collections import namedtuple
from pathlib import Path

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

%load_ext autoreload
%autoreload 1
%aimport analyze

with open('plots/style.json') as f:
    mpl.rcParams.update(json.load(f))

## Wildstyle failures

In [2]:
ws = analyze.Analysis('results/wildstyle')
print(ws)

Analysis for Celeritas v0.3.2-rc.1 on wildstyle


In [3]:
ftab = analyze.make_failure_table(ws.failures())
ftab.to_frame()

Unnamed: 0,Failure


In [4]:
print("\n".join(k for (k, v) in ftab.items()
                if 'is_soft' in v))




In [5]:
summed = analyze.summarize_instances(ws.result[['unconverged']])

Unconverged tracks (from runs that didn't fail):

In [6]:
unconv = summed[('unconverged', 'mean')]
unconv[unconv > 0].unstack('arch') 

Unnamed: 0_level_0,arch
problem,geo


## Summit results

In [2]:
summit = analyze.Analysis('results/summit')
print(summit)
summed = analyze.summarize_instances(summit.result[summit.successful].dropna(how='all'))

Analysis for Celeritas v0.3.2 on summit


In [3]:
deets = summit.load_results(('testem3-flat','orange','gpu'), 0)
deets['system']['device']

{'capability_major': 7,
 'capability_minor': 0,
 'clock_rate': 1530000,
 'default_block_size': 256,
 'device_id': 0,
 'eu_per_cu': 1,
 'max_blocks_per_grid': 2147483647,
 'max_blocks_per_multiprocessor': 32,
 'max_cache_size': 6291456,
 'max_threads_per_block': 1024,
 'max_threads_per_cu': 2048,
 'memory_clock_rate': 877000,
 'multiprocessor_count': 80,
 'name': 'Tesla V100-SXM2-16GB',
 'platform': 'cuda',
 'regs_per_block': 65536,
 'regs_per_multiprocessor': 65536,
 'shared_mem_per_block': 49152,
 'threads_per_warp': 32,
 'total_const_mem': 65536,
 'total_global_mem': 16911433728}

### Failures

Average number of unconverged tracks:

In [4]:
unconv = analyze.summarize_instances(summit.result['unconverged'])['mean']
unconv[unconv > 0].unstack('arch') 

Unnamed: 0_level_0,arch
problem,geo


### Timing tables

In [5]:
(fig, [run_ax, setup_ax]) = plt.subplots(nrows=2,
                                         gridspec_kw=dict(height_ratios=[3, 1]),
                                         subplot_kw=dict(yscale='log'))
summit.plot_results(run_ax, summed['total_time'])
run_ax.legend();
run_ax.set_ylabel('Run [s]')
run_ax.tick_params(labelbottom=False)
summit.plot_results(setup_ax, summed['setup_time'])
setup_ax.set_ylabel('Setup [s]')
analyze.annotate_metadata(run_ax, summit)
plt.tight_layout()
fig.savefig('plots/timing.pdf', transparent=True)
plt.close()

In [3]:
times = summed[('total_time', 'mean')].unstack()
times.style.format(analyze.float_fmt_transform(2))

Unnamed: 0_level_0,arch,cpu,gpu,gpu+sync
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
cms2018,vecgeom,125.77,13.11,—
cms2018+field+msc,vecgeom,281.30,41.81,42.23
simple-cms+field,orange,77.35,3.12,—
simple-cms+field+msc,orange,103.73,3.57,—
simple-cms+field+msc,vecgeom,101.88,3.66,—
simple-cms+msc,orange,91.51,3.46,—
testem15,orange,61.66,2.76,—
testem15+field,orange,72.11,2.83,3.06
testem15+field,vecgeom,—,—,2.59
testem15+field+msc,orange,92.72,2.89,—


In [4]:
event_rate = analyze.calc_event_rate(summit, summed)

In [7]:
testem3 = event_rate['mean'].xs('testem3-flat+field+msc', level='problem').unstack('arch')

In [9]:
testem3 / testem3.loc[('vecgeom', 'cpu')]

arch,cpu,gpu,gpu+sync
geo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
orange,1.064366,28.589785,28.558007
vecgeom,1.0,15.481465,15.326692


In [10]:
speedup = analyze.get_cpugpu_ratio(summed['total_time'])
speedup.dropna().style.format(analyze.float_fmt_transform(1))

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
cms2018,vecgeom,9.6,0.1
cms2018+field+msc,vecgeom,6.7,0.1
simple-cms+field,orange,24.8,2.3
simple-cms+field+msc,orange,29.0,1.9
simple-cms+field+msc,vecgeom,27.8,1.6
simple-cms+msc,orange,26.5,0.2
testem15,orange,22.4,2.4
testem15+field,orange,25.5,2.6
testem15+field+msc,orange,32.1,3.2
testem15+field+msc,vecgeom,31.0,2.1


In [12]:
speedup['mean'].unstack('geo').describe()

geo,orange,vecgeom
count,10.0,6.0
mean,26.812873,19.372603
std,3.761416,10.16251
min,22.36267,6.727459
25%,24.369168,11.067115
50%,25.983358,20.525778
75%,28.48716,27.276708
max,33.730414,31.015356


In [10]:
#df = speedup.apply(np.vectorize(analyze.float_fmt_transform(1)))
speedup_out = np.full((len(speedup), 3), "", dtype=object)
_abbrev = summit.problem_to_abbr()
prev_prob = None
for (i, ((prob, geo), row)) in enumerate(speedup.iterrows()):
    if prob != prev_prob:
        abbr = _abbrev[prob]
        speedup_out[i, 0] = f"{prob} [{abbr}]"
    speedup_out[i, 1] = geo
    speedup_out[i, 2] = "{:.1f}× (±{:.1f})".format(*row)
    prev_prob = prob
    
headers = ["Problem", "Geometry", "Speedup"]
widths = np.vectorize(len)(np.concatenate([speedup_out, [headers]], axis=0))
col_widths = np.max(widths, axis=0)
fmt = f"| {{:<{col_widths[0]}}} | {{:<{col_widths[1]}}} | {{:>{col_widths[2]}}} |\n".format

with open("results/summit/speedup.md", "w") as f:
    f.write(fmt("Problem", "Geometry", "Speedup"))
    f.write(fmt(*["-"*w for w in col_widths]))
    for i in range(speedup_out.shape[0]):
        f.write(fmt(*speedup_out[i,:].tolist()))

In [11]:
_desc = (speedup['mean'].dropna() * 7).describe()
print("CPU:GPU equivalence: {min:.0f}× to {max:.0f}×".format(**_desc))

CPU:GPU equivalence: 47× to 236×


In [12]:
# Determine the fraction of action time spent in geometry routines
action_times_inst = analyze.unstack_subdict(summit.result['action_times'][summit.valid]).T
total_time_inst = summit.result['total_time']
geo_actions = [lab for lab in action_times_inst.index
               if lab.startswith('along-step-') or lab.startswith('geo-')]
geo_frac_inst = action_times_inst.loc[geo_actions].sum() / total_time_inst
geo_frac = analyze.summarize_instances(geo_frac_inst)

In [14]:
geo_frac['mean'].unstack('arch')[['cpu', 'gpu+sync']]

Unnamed: 0_level_0,arch,cpu,gpu+sync
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
cms2018,vecgeom,0.456218,
cms2018+field+msc,vecgeom,0.691245,0.902085
simple-cms+field,orange,0.329535,
simple-cms+field+msc,orange,0.498907,
simple-cms+field+msc,vecgeom,0.492256,
simple-cms+msc,orange,0.42977,
testem15,orange,0.197602,
testem15+field,orange,0.313485,
testem15+field+msc,orange,0.463364,
testem15+field+msc,vecgeom,0.447223,


### Plots

In [15]:
problems = summit.problems()
problem_to_abbr = summit.problem_to_abbr(problems)
p_to_i = dict(zip(problems, itertools.count()))

In [16]:
fig, ax = plt.subplots()
summit.plot_results(ax, speedup)
ax.set_ylabel("Speedup (7-CPU / 1-GPU wall time)")
ax.set_ylim([0, None])
analyze.annotate_metadata(ax, summit)
plt.tight_layout()
fig.savefig('plots/speedups.pdf', transparent=True)
fig.savefig('results/summit/speedup.png', transparent=False, dpi=150)
plt.close()

In [17]:
fig, axes = plt.subplots(nrows=2, figsize=(4,4), subplot_kw=dict(yscale='log'))
for (ax, q) in zip(axes, ['step', 'primary']):
    summit.plot_results(ax, analyze.inverse_summary(summed['avg_time_per_' + q]))
    ax.set_ylabel(q + ' per sec')
    if ax != axes[-1]:
        ax.tick_params(labelbottom=False)
    ax.legend()
plt.tight_layout()
fig.savefig('plots/steps-vs-primaries.pdf')
plt.close()

In [18]:
(fig, (time_ax, geo_ax)) = plt.subplots(
    nrows=2, #figsize=(4, 4),
    gridspec_kw=dict(height_ratios=[3, 1])
)
time_ax.set_yscale('log')
summit.plot_results(time_ax, event_rate)
time_ax.set_ylabel(r"Throughput [event/s]")
time_ax.set_ylim([0.5 * event_rate['mean'].min(), None])
time_ax.legend()
time_ax.tick_params(labelbottom=False)
analyze.annotate_metadata(time_ax, summit)
summit.plot_results(geo_ax, geo_frac * 100)
geo_ax.set_ylabel("Geometry [%]")
geo_ax.set_ylim([0, 100])
plt.tight_layout()
fig.savefig('plots/throughput-geo.pdf', transparent=True)
plt.close()

## Action fraction pie charts

In [3]:
avg_time = summed[('total_time', 'mean')].T
mean_action_times = summit.action_times().xs('mean', axis=1, level=1).T

In [5]:
for (prob, geo) in itertools.product(
        ["testem15+field", "testem3-flat+field+msc", "cms2018+field+msc"],
        ["vecgeom", "orange"]):
    try:
        plot_times = mean_action_times.xs((prob, geo), axis=1, level=('problem', 'geo')).dropna(axis=1, how='all')
    except KeyError:
        plot_times = pd.DataFrame()
    if plot_times.empty:
        print("Missing problem/geo:", prob, geo)
        continue
    md = {k: getattr(summit, k) for k in ["version", "system"]}
    pieplot = analyze.PiePlotter(plot_times)
    
    # Loop over CPU/GPU
    for arch in pieplot.times:
        (fig, ax) = plt.subplots(figsize=(3, 3), subplot_kw=dict(aspect="equal"),
                                 layout="constrained")
        pieplot(ax, arch)
        name = (prob, geo, arch)
        slashname = "/".join(name)
        fig.text(
            0.98, 0.1, f"{slashname}\n{md['version']} on {md['system']}",
            va='bottom', ha='right',
            fontstyle='italic', color=(0.75,)*4, size='xx-small',
#            zorder=-100
        )

        dashname = "-".join(name)        
        fig.savefig(f'plots/actions-{dashname}.pdf', transparent=True)
        plt.close()

Missing problem/geo: cms2018+field+msc orange


## Plot per-step timing on GPU

In [30]:
for p in ['cms2018', 'cms2018+field+msc']:
    data = summit.load_results((p, 'vecgeom', 'gpu'), 0)
    (fig, axes) = plt.subplots(nrows=2, figsize=(3, 4), sharex=True)
    for i, ax, plot in zip(itertools.count(),
                           axes,
                           [analyze.plot_counts, analyze.plot_accum_time_inv]):
        objs = plot(ax, data)
        analyze.annotate_metadata(ax, data['_metadata'])
        if i == 0:
            ax.set_xlabel(None)
    fig.savefig(f'plots/per-step-{p}.pdf', transparent=True)
    plt.tight_layout()
    plt.close()
    
    (fig, ax) = plt.subplots(figsize=(3, 2))
    analyze.plot_time_per_step(ax, data)
    plt.tight_layout()
    fig.savefig(f'plots/time-per-step-{p}.pdf', transparent=True)
    plt.close()

In [31]:
!open .

## Crusher

In [27]:
crusher = analyze.Analysis('results/crusher')
print(crusher)

Analysis for Celeritas v0.3.2-rc.1 on crusher


In [28]:
# VecGeom failures aren't really failures; just missing capability
#failures = crusher.failures().xs('orange', level='geo').fillna(1)
#failures.groupby(['problem', 'arch']).count().unstack()

In [29]:
csum = analyze.summarize_instances(crusher.result[crusher.successful].dropna(how='all'))

In [30]:
csum[('total_time', 'mean')].unstack()

Unnamed: 0_level_0,arch,cpu,gpu
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
simple-cms+field,orange,69.481841,21.3523
simple-cms+field+msc,orange,86.764187,18.147253
simple-cms+msc,orange,132.17943,15.268424
testem15,orange,85.156364,46.524777
testem15+field,orange,103.600624,18.58346
testem15+field+msc,orange,108.385038,15.38735
testem3-flat,orange,144.221822,15.395753
testem3-flat+field,orange,156.642303,26.384439
testem3-flat+field+msc,orange,215.270396,31.032743
testem3-flat+msc,orange,171.683129,17.14904


In [31]:
rel_err = csum.xs('std', axis=1, level=1) / csum.xs('mean', axis=1, level=1)
high_err = rel_err > 0.02
rel_err[high_err].dropna(how='all').dropna(how='all', axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,avg_time_per_primary,avg_time_per_step,num_step_iters,pre_emptying_time,setup_time,slot_occupancy,total_time
problem,geo,arch,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
simple-cms+field,orange,cpu,0.275322,0.275406,,0.058877,,,0.275322
simple-cms+field,orange,gpu,0.042716,0.042619,,0.369525,0.028559,,0.042716
simple-cms+field+msc,orange,cpu,0.323856,0.323853,,0.028052,,,0.323856
simple-cms+field+msc,orange,gpu,,,,0.142175,0.067381,,
simple-cms+msc,orange,cpu,0.312771,0.312517,,0.479313,,,0.312771
simple-cms+msc,orange,gpu,0.029446,0.02962,0.023591,,0.106972,0.023168,0.029446
testem15,orange,cpu,0.492642,0.492691,,0.664846,,,0.492642
testem15,orange,gpu,0.057418,0.057406,,,0.185061,,0.057418
testem15+field,orange,cpu,0.341683,0.34171,,0.538967,,,0.341683
testem15+field,orange,gpu,0.052608,0.052572,,,0.076118,,0.052608


In [32]:
analyze.get_cpugpu_ratio(csum['total_time'])

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
simple-cms+field,orange,3.254068,0.906636
simple-cms+field+msc,orange,4.78112,1.550076
simple-cms+msc,orange,8.657045,2.719646
testem15,orange,1.830344,0.907808
testem15+field,orange,5.574883,1.927291
testem15+field+msc,orange,7.043775,2.617844
testem3-flat,orange,9.367637,3.372142
testem3-flat+field,orange,5.93692,2.75278
testem3-flat+field+msc,orange,6.936879,2.139982
testem3-flat+msc,orange,10.011239,3.600818


In [33]:
crusher_times = csum['total_time']
crusher_times

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,mean,std
problem,geo,arch,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
simple-cms+field,orange,cpu,8.0,69.481841,19.129892
simple-cms+field,orange,gpu,8.0,21.3523,0.912084
simple-cms+field+msc,orange,cpu,8.0,86.764187,28.099084
simple-cms+field+msc,orange,gpu,8.0,18.147253,0.274085
simple-cms+msc,orange,cpu,8.0,132.17943,41.341899
simple-cms+msc,orange,gpu,8.0,15.268424,0.449586
testem15,orange,cpu,8.0,85.156364,41.951578
testem15,orange,gpu,8.0,46.524777,2.671343
testem15+field,orange,cpu,8.0,103.600624,35.398604
testem15+field,orange,gpu,8.0,18.58346,0.977644


In [34]:
crusher_rates = analyze.calc_event_rate(crusher, csum)
summit_rates = analyze.calc_event_rate(summit, summed.loc[crusher_times.index])

counts = {
    ('summit', 'cpu'): 7,
    ('summit', 'gpu'): 1,
    ('crusher', 'cpu'): 8,
    ('crusher', 'gpu'): 1,
}

In [35]:
(crusher_rates['mean'] / summit_rates['mean']).unstack()

Unnamed: 0_level_0,arch,cpu,gpu
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
simple-cms+field,orange,1.113305,0.149299
simple-cms+field+msc,orange,1.195534,0.200784
simple-cms+msc,orange,0.692312,0.218815
testem15,orange,0.724107,0.062677
testem15+field,orange,0.695997,0.146652
testem15+field+msc,orange,0.855457,0.187407
testem3-flat,orange,0.732642,0.271122
testem3-flat+field,orange,0.788517,0.20533
testem3-flat+field+msc,orange,1.004549,0.250785
testem3-flat+msc,orange,1.109988,0.323611


In [36]:
fig, ax = plt.subplots()
ax.set_yscale('log')
for offset, color, machine, rates in [(-0.05, '#7A954F', 'Summit', summit_rates),
                                      (0.05, '#BC5544', 'Crusher', crusher_rates)]:
    for arch in ['cpu', 'gpu']:
        summary = rates.xs(arch, level='arch')
        index = np.array([p_to_i[p]
                          for p in summary.index.get_level_values('problem')], dtype=float)
        index += offset
    
        mark = analyze.ARCH_SHAPES[arch]
        count = counts[(machine.lower(), arch)]
        arch = arch.upper()
        ax.errorbar(index, summary['mean'], summary['std'],
                    capsize=0, fmt='none', ecolor=(0.2,)*3)
        scat = ax.scatter(index, summary['mean'], c=color, marker=mark,
                         label=f"{machine} ({count} {arch})")    
xax = ax.get_xaxis()
xax.set_ticks(np.arange(len(problems)))
xax.set_ticklabels(list(problem_to_abbr.values()), rotation=90)
grid = ax.grid()
ax.set_axisbelow(True)
ax.legend()
ax.set_ylabel(r"Event rate [1/s]")
analyze.annotate_metadata(ax, summit)
plt.tight_layout()
fig.savefig('plots/crusher-vs-summit.pdf')
plt.close()