In [1]:
import json
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import re
import sys
import itertools
from collections import namedtuple
from pathlib import Path

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

%load_ext autoreload
%autoreload 1
%aimport analyze

with open('plots/style.json') as f:
    mpl.rcParams.update(json.load(f))

In [2]:
summit = analyze.Analysis('results/summit')
print(summit)
some_results = summit.load_results(('testem3-flat','orange','gpu'), 0)

Analysis for Celeritas v0.1.4 on summit


In [3]:
mp_per_gpu = some_results['system']['device']['multiprocessor_count']

In [4]:
inp = some_results['input']
primaries_per_event = inp['primary_gen_options']['primaries_per_event']
def calc_event_rate(summary):
    event_rate = analyze.inverse_summary(summary['avg_time_per_primary'])
    event_rate['mean'] /= primaries_per_event
    event_rate['std'] /= primaries_per_event
    return event_rate

In [5]:
failures = summit.failures()['stderr'].fillna(0)

for key, lines in failures.items():
    print("="*78)
    print(key)
    if lines:
        print("   " + " - \n".join(lines[-3:]))

('cms2018+field+msc', 'vecgeom', 'cpu', 0)
('cms2018+field+msc', 'vecgeom', 'cpu', 3)
('cms2018+field+msc', 'vecgeom', 'cpu', 5)
('simple-cms+field+msc', 'orange', 'cpu', 2)
('testem3-flat+field', 'orange', 'cpu', 0)
('testem3-flat+field', 'orange', 'cpu', 1)
ERROR:  One or more process (first noticed rank 0) terminated with signal 6
('testem3-flat+field', 'orange', 'cpu', 2)
('testem3-flat+field', 'orange', 'cpu', 3)
ERROR:  One or more process (first noticed rank 0) terminated with signal 6
('testem3-flat+field', 'orange', 'cpu', 5)
('testem3-flat+field', 'orange', 'cpu', 6)
ERROR:  One or more process (first noticed rank 0) terminated with signal 6
('testem3-flat+field', 'orange', 'gpu', 4)
('testem3-flat+field', 'orange', 'gpu', 5)
   celeritas: cuda error: an illegal memory access was encountered - 
[PID=117887, TID=-1][ 7/15]> /sw/summit/gcc/11.2.0-0/lib64/libgcc_s.so.1(_Unwind_RaiseException+0x3e8) [0x20000489c3f8] - 
ERROR:  One or more process (first noticed rank 0) terminated

In [6]:
failures.groupby(['problem', 'geo', 'arch']).count().unstack().fillna(0)

Unnamed: 0_level_0,arch,cpu,gpu
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
cms2018+field+msc,vecgeom,3.0,0.0
simple-cms+field+msc,orange,1.0,0.0
testem3-flat+field,orange,6.0,2.0
testem3-flat+msc,orange,12.0,12.0


In [7]:
summed = analyze.summarize_instances(summit.result[[
    'avg_steps_per_primary',
    'avg_time_per_primary',
    'avg_time_per_step',
    'num_steps',
    'total_time',
    'unconverged',
    'slot_occupancy']])
#summed.xs('mean', axis=1, level=1).to_csv('summit.csv')

In [8]:
a = {}
a.setdefault('foo', 0)

0

In [9]:
summit.result['num_primaries'].xs(('gpu', 0), level=('arch', 'instance'))

problem               geo    
cms2018+field+msc     vecgeom    9100.0
cms2018               vecgeom    9100.0
simple-cms+field+msc  orange     9100.0
                      vecgeom    9100.0
simple-cms+field      orange     9100.0
simple-cms+msc        orange     9100.0
testem15+field+msc    orange     9100.0
                      vecgeom    9100.0
testem15+field        orange     9100.0
testem15              orange     9100.0
testem3-flat+field    orange     9100.0
testem3-flat+msc      orange        NaN
testem3-flat          orange     9100.0
                      vecgeom    9100.0
Name: num_primaries, dtype: float64

In [10]:
summed[('total_time', 'mean')].unstack()

Unnamed: 0_level_0,arch,cpu,gpu
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
cms2018,vecgeom,96.847177,10.68253
cms2018+field+msc,vecgeom,697.069497,58.936256
simple-cms+field,orange,67.727103,9.431944
simple-cms+field+msc,orange,336.812132,14.71363
simple-cms+field+msc,vecgeom,86.150169,7.80324
simple-cms+msc,orange,73.585561,3.621243
testem15,orange,51.146285,2.51127
testem15+field,orange,57.532788,2.189932
testem15+field+msc,orange,71.797592,2.49357
testem15+field+msc,vecgeom,69.52407,2.590964


In [11]:
summed['avg_time_per_primary'].xs('testem3-flat', level='problem')

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std
geo,arch,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
orange,cpu,6.0,0.022527,0.000139
orange,gpu,6.0,0.000392,2.7e-05
vecgeom,cpu,6.0,0.020905,0.000207
vecgeom,gpu,6.0,0.000427,3.3e-05


In [12]:
unconv = summed['unconverged']
unconv[unconv['mean'] > 0]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,mean,std
problem,geo,arch,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
cms2018+field+msc,vecgeom,cpu,6.0,0.5,0.547723
cms2018+field+msc,vecgeom,gpu,6.0,3.666667,1.032796
simple-cms+field,orange,gpu,6.0,0.166667,0.408248
simple-cms+field+msc,orange,cpu,6.0,0.333333,0.816497
simple-cms+field+msc,orange,gpu,6.0,0.333333,0.516398


In [13]:
problems = summit.problems()
problem_to_abbr = summit.problem_to_abbr(problems)
p_to_i = dict(zip(problems, itertools.count()))



In [14]:
speedup = analyze.get_cpugpu_ratio(summed['total_time'])
fig, ax = plt.subplots()
summit.plot_results(ax, speedup)
ax.set_ylabel("Speedup (7-CPU / 1-GPU wall time)")
ax.set_ylim([0, None])
analyze.annotate_metadata(ax, summit);
fig.savefig('plots/speedups.pdf', transparent=True)
plt.close()



In [15]:
fig, axes = plt.subplots(nrows=2, figsize=(4,4), subplot_kw=dict(yscale='log'))
for (ax, q) in zip(axes, ['step', 'primary']):
    summit.plot_results(ax, analyze.inverse_summary(summed['avg_time_per_' + q]))
    ax.set_ylabel(q + ' per sec')
    ax.legend()
fig.savefig('plots/steps-vs-primaries.png', dpi=300)
plt.close()



In [16]:
event_rate = calc_event_rate(summed)

In [17]:
event_rate.xs('testem3-flat', level='problem')

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std
geo,arch,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
orange,cpu,6.0,0.034148,0.000211
orange,gpu,6.0,1.959849,0.132673
vecgeom,cpu,6.0,0.036796,0.000365
vecgeom,gpu,6.0,1.801984,0.137285


In [19]:
(fig, (time_ax, occ_ax)) = plt.subplots(
    nrows=2, figsize=(4, 4),
    gridspec_kw=dict(height_ratios=[3, 1])
)
time_ax.set_yscale('log')
summit.plot_results(time_ax, event_rate)
time_ax.set_ylabel(r"Event rate [1/s]")
time_ax.legend()
time_ax.set_xticklabels([])
summit.plot_results(occ_ax, summed['slot_occupancy'])
occ_ax.set_ylabel("Slot occupancy")
analyze.annotate_metadata(ax, summit)
fig.savefig('plots/rate-occupancy.pdf', transparent=True)
plt.close()



In [20]:
speedup.dropna().applymap("{:.1f}".format)

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
cms2018,vecgeom,9.1,0.3
cms2018+field+msc,vecgeom,11.8,8.2
simple-cms+field,orange,7.2,7.7
simple-cms+field+msc,orange,22.9,46.0
simple-cms+field+msc,vecgeom,11.0,3.1
simple-cms+msc,orange,20.3,1.7
testem15,orange,20.4,0.3
testem15+field,orange,26.3,3.8
testem15+field+msc,orange,28.8,3.6
testem15+field+msc,vecgeom,26.8,2.9


## Action fraction pie charts


In [21]:
mean_action_times = summit.action_times().xs('mean', axis=1, level=1).T
mean_action_times.sort_index(inplace=True)

In [22]:
for ext in ["", "+field+msc"]:
    prob_geo = ('cms2018'+ext, 'vecgeom')
    temp = mean_action_times.xs(prob_geo, axis=1, level=('problem', 'geo')).dropna()

    for (arch, series) in temp.items():
        (fig, ax) = plt.subplots(figsize=(4, 4))
        ax.pie(series, labels=series.index, autopct='%1.1f%%', pctdistance=0.85)
        ax.axis('equal')
        name = prob_geo + (arch,)
        slashname = "/".join(name)
        fig.text(
            0.98, 0.02, f"{slashname}\n{summit.version} on {summit.system}",
            va='bottom', ha='right',
            fontstyle='italic', color=(0.5,)*3, size='xx-small',
            zorder=-100
        )
        dashname = "-".join(name)
        fig.savefig(f'plots/{dashname}.pdf', transparent=True)
        plt.close()

### Plot per-step timing on GPU

In [23]:
cms = [summit.load_results((p, 'vecgeom', 'gpu'), 0)
       for p in ['cms2018', 'cms2018+field+msc']]

for plot, label in [(analyze.plot_counts, 'counts'),
                    (analyze.plot_accum_time, 'time')]:
    (fig, axes) = plt.subplots(ncols=2, figsize=(8, 2))
    
    for (i, ax, data) in zip(itertools.count(), axes, cms):
        objs = plot(ax, data)
        analyze.annotate_metadata(ax, data['_metadata'])
        if i == 0:
            objs['oax'].set_ylabel(None)
        elif i == 1:
            objs['ax'].set_ylabel(None)
    fig.savefig(f'plots/cms-{label}.pdf', transparent=True)
    plt.close()

## Crusher

In [24]:
crusher = analyze.Analysis('results/crusher')
print(crusher)

Analysis for Celeritas v0.1.4 on crusher


In [25]:
failures = crusher.failures().xs('orange', level='geo')['stderr'].fillna(0)

for key, lines in failures.items():
    print("="*78)
    print(key)
    if lines:
        print("   " + " - \n".join(lines[-3:]))

('simple-cms+field+msc', 'cpu', 0)
('simple-cms+field+msc', 'cpu', 2)
('simple-cms+field+msc', 'cpu', 4)
('simple-cms+field+msc', 'cpu', 5)
('simple-cms+field+msc', 'gpu', 0)
:0:rocdevice.cpp            :2614: 3507402658180 us: 107364: [tid:0x7fffd04ad700] Device::callbackQueue aborting with error : HSA_STATUS_ERROR_MEMORY_APERTURE_VIOLATION: The agent attempted to access memory beyond the largest legal address. code: 0x29 - 
srun: error: crusher093: task 0: Aborted
('simple-cms+field', 'cpu', 0)
('simple-cms+field', 'cpu', 2)
('simple-cms+field', 'cpu', 4)
('simple-cms+field', 'cpu', 6)
('simple-cms+field', 'cpu', 8)
('simple-cms+field', 'cpu', 10)
('simple-cms+field', 'cpu', 12)
('simple-cms+field', 'cpu', 14)
('testem3-flat+field', 'cpu', 2)
('testem3-flat+field', 'cpu', 3)
srun: error: crusher121: task 0: Segmentation fault (core dumped)
('testem3-flat+field', 'cpu', 5)
('testem3-flat+field', 'cpu', 6)
srun: error: crusher121: task 0: Segmentation fault
('testem3-flat+field', 'cpu'

In [28]:
failures.groupby(['problem', 'arch']).count().unstack()

arch,cpu,gpu
problem,Unnamed: 1_level_1,Unnamed: 2_level_1
simple-cms+field,8.0,
simple-cms+field+msc,4.0,1.0
testem3-flat+field,6.0,8.0
testem3-flat+msc,14.0,16.0


In [29]:
csum = analyze.summarize_instances(crusher.result[~crusher.invalid][[
    'avg_steps_per_primary',
    'avg_time_per_primary',
    'avg_time_per_step',
    'num_steps',
    'total_time',
    'unconverged',
    'slot_occupancy',
]])

In [30]:
csum[('total_time', 'mean')].unstack()

Unnamed: 0_level_0,arch,cpu,gpu
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
simple-cms+field,orange,337.671725,225.296983
simple-cms+field+msc,orange,49.518809,
simple-cms+msc,orange,47.82684,2.256435
testem15,orange,34.571178,1.703626
testem15+field,orange,38.395822,1.82751
testem15+field+msc,orange,43.890736,2.133515
testem3-flat,orange,56.468168,3.312281
testem3-flat+field,orange,74.357735,5.160903
testem3-flat+msc,orange,87.382646,


In [31]:
rel_err = csum.xs('std', axis=1, level=1) / csum.xs('mean', axis=1, level=1)
high_err = rel_err > 0.02
rel_err[high_err].dropna(how='all').dropna(how='all', axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,avg_time_per_primary,avg_time_per_step,total_time,unconverged,slot_occupancy
problem,geo,arch,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
simple-cms+field,orange,cpu,1.584988,1.584513,1.584988,1.85164,0.519781
simple-cms+field,orange,gpu,,,,0.039575,
simple-cms+field+msc,orange,cpu,0.065543,0.065546,0.065543,,
simple-cms+msc,orange,cpu,0.333632,0.333711,0.333632,,
simple-cms+msc,orange,gpu,,,,,0.032215
testem15,orange,cpu,0.287103,0.287142,0.287103,,
testem15+field,orange,cpu,0.339854,0.339912,0.339854,,
testem15+field+msc,orange,cpu,0.331431,0.331426,0.331431,,
testem3-flat,orange,cpu,0.284728,0.284898,0.284728,,
testem3-flat+field,orange,cpu,0.160357,0.160302,0.160357,,


In [32]:
analyze.get_cpugpu_ratio(csum['total_time'])

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
simple-cms+field,orange,1.498785,2.375568
simple-cms+field+msc,orange,,
simple-cms+msc,orange,21.195749,7.074387
testem15,orange,20.292699,5.82744
testem15+field,orange,21.009914,7.140897
testem15+field+msc,orange,20.572035,6.819173
testem3-flat,orange,17.048119,4.854843
testem3-flat+field,orange,14.407891,2.320423
testem3-flat+msc,orange,,


In [33]:
crusher_times = csum['total_time']
crusher_times

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,mean,std
problem,geo,arch,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
simple-cms+field,orange,cpu,8.0,337.671725,535.205634
simple-cms+field,orange,gpu,8.0,225.296983,1.12102
simple-cms+field+msc,orange,cpu,5.0,49.518809,3.245599
simple-cms+msc,orange,cpu,8.0,47.82684,15.956561
simple-cms+msc,orange,gpu,8.0,2.256435,0.021221
testem15,orange,cpu,8.0,34.571178,9.925499
testem15,orange,gpu,8.0,1.703626,0.01049
testem15+field,orange,cpu,8.0,38.395822,13.048956
testem15+field,orange,gpu,8.0,1.82751,0.008074
testem15+field+msc,orange,cpu,8.0,43.890736,14.546768


In [34]:
crusher_rates = calc_event_rate(csum)
summit_rates = calc_event_rate(summed.loc[crusher_times.index])

counts = {
    ('summit', 'cpu'): 7,
    ('summit', 'gpu'): 1,
    ('crusher', 'cpu'): 8,
    ('crusher', 'gpu'): 1,
}

In [35]:
(crusher_rates['mean'] / summit_rates['mean']).unstack()

Unnamed: 0_level_0,arch,cpu,gpu
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
simple-cms+field,orange,0.200571,0.041864
simple-cms+field+msc,orange,6.801701,
simple-cms+msc,orange,1.538583,1.604851
testem15,orange,1.479449,1.474073
testem15+field,orange,1.498413,1.198315
testem15+field+msc,orange,1.635826,1.168762
testem3-flat,orange,1.633999,1.078322
testem3-flat+field,orange,1.710334,0.699125
testem3-flat+msc,orange,,


In [37]:
fig, ax = plt.subplots()
ax.set_yscale('log')
for offset, color, machine, rates in [(-0.05, '#7A954F', 'Summit', summit_rates),
                                      (0.05, '#BC5544', 'Crusher', crusher_rates)]:
    for arch in ['cpu', 'gpu']:
        summary = rates.xs(arch, level='arch')
        index = np.array([p_to_i[p]
                          for p in summary.index.get_level_values('problem')], dtype=float)
        index += offset
    
        mark = analyze.ARCH_SHAPES[arch]
        count = counts[(machine.lower(), arch)]
        arch = arch.upper()
        ax.errorbar(index, summary['mean'], summary['std'],
                    capsize=0, fmt='none', ecolor=(0.2,)*3)
        scat = ax.scatter(index, summary['mean'], c=color, marker=mark,
                         label=f"{machine} ({count} {arch})")    
xax = ax.get_xaxis()
xax.set_ticks(np.arange(len(problems)))
xax.set_ticklabels(list(problem_to_abbr.values()), rotation=90)
grid = ax.grid()
ax.set_axisbelow(True)
ax.legend()
ax.set_ylabel(r"Event rate [1/s]")
analyze.annotate_metadata(ax, summit)
fig.savefig('plots/crusher-vs-summit.pdf')
plt.close()