In [1]:
import json
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import re
import sys
import itertools
from collections import namedtuple
from pathlib import Path

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

%load_ext autoreload
%autoreload 1
%aimport analyze

with open('plots/style.json') as f:
    mpl.rcParams.update(json.load(f))

In [2]:
summit = analyze.Analysis('results/summit')
print(summit)

Analysis for Celeritas v0.1.3+609057e6 or v0.1.2+fb3f5506 on summit


In [3]:
inp = summit.load_results(('testem3-flat','orange','gpu'), 0)['input']
primaries_per_event = inp['primary_gen_options']['primaries_per_event']
def calc_event_rate(summary):
    event_rate = analyze.inverse_summary(summary['avg_time_per_primary'])
    event_rate['mean'] /= primaries_per_event
    event_rate['std'] /= primaries_per_event
    return event_rate

In [4]:
summit.result.columns

Index(['action_times', 'active_hwm', 'avg_steps_per_primary',
       'avg_time_per_primary', 'avg_time_per_step', 'emptying_step',
       'num_primaries', 'num_step_iters', 'num_steps', 'pre_emptying_time',
       'queue_hwm', 'slot_occupancy', 'total_time', 'unconverged', 'exception',
       'failure'],
      dtype='object')

In [5]:
for key, lines in summit.failures()['stderr'].iteritems():
    print("="*78)
    print(key)
    print("   " + " - \n".join(lines[-3:]))

('cms2018+field+msc', 'vecgeom', 'cpu', 1)
   terminate called after throwing an instance of 'celeritas::RuntimeError' - 
  what():  celeritas: runtime error: track started outside the geometry at {nan,nan,nan} - 
6 ### address: 0x1b7400019564, ERROR:  One or more process (first noticed rank 0) terminated with signal 6
('cms2018+field+msc', 'vecgeom', 'cpu', 2)
   terminate called after throwing an instance of 'celeritas::RuntimeError' - 
  what():  celeritas: runtime error: track started outside the geometry at {nan,nan,nan} - 
ERROR:  One or more process (first noticed rank 0) terminated with signal 6
('cms2018+field+msc', 'vecgeom', 'cpu', 3)
   terminate called after throwing an instance of 'celeritas::RuntimeError' - 
  what():  celeritas: runtime error: track started outside the geometry at {nan,nan,nan} - 
ERROR:  One or more process (first noticed rank 0) terminated with signal 6
('cms2018+field+msc', 'vecgeom', 'cpu', 4)
   terminate called after throwing an instance of 'celer

In [6]:
summit.failures()['stderr'].groupby(['problem', 'geo', 'arch']).count().unstack()

Unnamed: 0_level_0,arch,cpu,gpu
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
cms2018+field+msc,vecgeom,4.0,
simple-cms+field+msc,orange,,1.0
testem3-flat+field,orange,2.0,4.0
testem3-flat+msc,orange,,1.0


In [7]:
summed = analyze.summarize_instances(summit.result[[
    'avg_steps_per_primary',
    'avg_time_per_primary',
    'avg_time_per_step',
    'num_steps',
    'total_time',
    'unconverged',
    'slot_occupancy']])
#summed.xs('mean', axis=1, level=1).to_csv('summit.csv')

In [44]:
a = {}
a.setdefault('foo', 0)

0

In [43]:
summit.result['num_primaries'].xs(('gpu', 0), level=('arch', 'instance'))

problem               geo    
cms2018+field+msc     vecgeom    9100.0
cms2018               vecgeom    9100.0
simple-cms+field+msc  orange     9100.0
                      vecgeom    9100.0
simple-cms+field      orange     9100.0
simple-cms+msc        orange     9100.0
testem15+field+msc    orange     9100.0
                      vecgeom    9100.0
testem15+field        orange     9100.0
testem15              orange     9100.0
testem3-flat+field    orange     9100.0
testem3-flat+msc      orange     9100.0
testem3-flat          orange     9100.0
                      vecgeom    9100.0
Name: num_primaries, dtype: float64

In [8]:
summed[('total_time', 'mean')].unstack()

Unnamed: 0_level_0,arch,cpu,gpu
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
cms2018,vecgeom,97.41865,10.4962
cms2018+field+msc,vecgeom,270.417427,63.749321
simple-cms+field,orange,69.291619,9.760228
simple-cms+field+msc,orange,90.273689,15.803793
simple-cms+field+msc,vecgeom,87.970383,15.538213
simple-cms+msc,orange,79.307975,3.552672
testem15,orange,51.653605,2.455199
testem15+field,orange,58.153961,2.185751
testem15+field+msc,orange,74.631013,2.618991
testem15+field+msc,vecgeom,72.374354,2.457692


In [9]:
summed['avg_time_per_primary'].xs('testem3-flat', level='problem')

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std
geo,arch,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
orange,cpu,6.0,0.022929,0.000628
orange,gpu,6.0,0.000399,3.3e-05
vecgeom,cpu,6.0,0.021264,7e-05
vecgeom,gpu,6.0,0.000422,3.3e-05


In [10]:
#rel_err = summed.xs('std', axis=1, level=1) / summed.xs('mean', axis=1, level=1)
#high_err = rel_err > 0.05
#rel_err[high_err].dropna(how='all').dropna(how='all', axis=1)

In [11]:
unconv = summed['unconverged']
unconv[unconv['mean'] > 0]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,mean,std
problem,geo,arch,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
cms2018+field+msc,vecgeom,gpu,6.0,97231.0,207.113495


In [12]:
summit.result.xs('cms2018+field+msc', level='problem')[['avg_steps_per_primary', 'slot_occupancy']]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,avg_steps_per_primary,slot_occupancy
geo,arch,instance,Unnamed: 3_level_1,Unnamed: 4_level_1
vecgeom,cpu,0,108719.910889,0.758465
vecgeom,cpu,1,,
vecgeom,cpu,2,,
vecgeom,cpu,3,,
vecgeom,cpu,4,,
vecgeom,gpu,0,64479.341099,0.273232
vecgeom,gpu,1,64364.29044,0.272745
vecgeom,gpu,2,64329.304505,0.272597
vecgeom,gpu,3,64320.830989,0.272561
vecgeom,gpu,4,64391.286374,0.272859


In [13]:
problems = summit.problems()
p_to_i = dict(zip(problems, itertools.count()))
g_to_color = {'orange': '#F6A75E', 'vecgeom': '#5785B7'}
arch_to_shape = {'gpu': 'x', 'cpu': 'o'}

In [14]:
problem_to_abbr = {'testem15': 'A',
 'testem15+field': 'AF',
 'testem15+field+msc': 'AFM',
 'simple-cms+msc': 'B',
 'simple-cms+field': 'BF',
 'simple-cms+field+msc': 'BFM',
 'testem3-flat': 'C',
 'testem3-flat+field': 'CF',
 'testem3-flat+msc': 'CM',
 'cms2018': 'Z',
 'cms2018+field+msc': 'ZFM'}
# TODO: revert when rerunning to get updated 
# problem_abbr = [problem_to_abbr[k] for k in problems]
problems = list(problem_to_abbr.keys())
p_to_i = dict(zip(problems, itertools.count()))
problem_abbr = list(problem_to_abbr.values())
print('\n'.join('\t'.join([v, k]) for (k, v) in problem_to_abbr.items()))

A	testem15
AF	testem15+field
AFM	testem15+field+msc
B	simple-cms+msc
BF	simple-cms+field
BFM	simple-cms+field+msc
C	testem3-flat
CF	testem3-flat+field
CM	testem3-flat+msc
Z	cms2018
ZFM	cms2018+field+msc


In [15]:
def plot_results(ax, summary):
    index = np.array([p_to_i[p]
                      for p in summary.index.get_level_values('problem')], dtype=float)
    index += [(0.1 if g == 'orange' else -0.05)
              for g in summary.index.get_level_values('geo')]
    color = np.array([g_to_color[g]
                      for g in summary.index.get_level_values('geo')])
    
    if 'arch' in summary.index.names:
        slc_mark = [(a.upper(), summary.index.get_level_values('arch') == a, arch_to_shape[a])
                    for a in ['cpu', 'gpu']]
        
    else:
        slc_mark = [(None, slice(None), 's')]

    result = []
    for lab, slc, mark in slc_mark:
        temp_idx = index[slc]
        temp_sum = summary.loc[slc]
        ax.errorbar(temp_idx, temp_sum['mean'], temp_sum['std'],
                    capsize=0, fmt='none', ecolor=(0.2,)*3)
        scat = ax.scatter(temp_idx, temp_sum['mean'], c=color[slc], marker=mark,
                         label=lab)
        result.append(scat)
    
    xax = ax.get_xaxis()
    xax.set_ticks(np.arange(len(problems)))
    xax.set_ticklabels(problem_abbr, rotation=90)
    grid = ax.grid()
    ax.set_axisbelow(True)
    return scat

In [16]:
speedup = analyze.get_cpugpu_ratio(summed['total_time'])
fig, ax = plt.subplots()
plot_results(ax, speedup)
ax.set_ylabel("Speedup (7-CPU / 1-GPU wall time)")
analyze.annotate_metadata(ax, summit);
fig.savefig('plots/speedups.pdf', transparent=True)
plt.close()

In [17]:
fig, axes = plt.subplots(nrows=2, figsize=(4,4), subplot_kw=dict(yscale='log'))
for (ax, q) in zip(axes, ['step', 'primary']):
    plot_results(ax, analyze.inverse_summary(summed['avg_time_per_' + q]))
    ax.set_ylabel(q + ' per sec')
    ax.legend()
fig.savefig('plots/steps-vs-primaries.png', dpi=300)
plt.close()

In [18]:
event_rate = calc_event_rate(summed)

In [19]:
event_rate.xs('testem3-flat', level='problem')

Unnamed: 0_level_0,Unnamed: 1_level_0,count,mean,std
geo,arch,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
orange,cpu,6.0,0.033548,0.000919
orange,gpu,6.0,1.92977,0.161997
vecgeom,cpu,6.0,0.036176,0.00012
vecgeom,gpu,6.0,1.82424,0.143885


In [20]:
(fig, (time_ax, occ_ax)) = plt.subplots(
    nrows=2, figsize=(4, 4),
    gridspec_kw=dict(height_ratios=[3, 1])
)
time_ax.set_yscale('log')
plot_results(time_ax, event_rate)
time_ax.set_ylabel(r"Event rate [1/s]")
time_ax.legend()
time_ax.set_xticklabels([])
plot_results(occ_ax, summed['slot_occupancy'])
occ_ax.set_ylabel("Slot occupancy")
analyze.annotate_metadata(ax, summit)
fig.savefig('plots/rate-occupancy.pdf', transparent=True)
plt.close()

In [21]:
speedup.dropna().applymap("{:.1f}".format)

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
cms2018,vecgeom,9.3,0.1
simple-cms+field,orange,7.1,1.8
simple-cms+field+msc,orange,5.7,1.7
simple-cms+field+msc,vecgeom,5.7,1.1
simple-cms+msc,orange,22.3,2.0
testem15,orange,21.0,0.1
testem15+field,orange,26.6,4.2
testem15+field+msc,orange,28.5,3.4
testem15+field+msc,vecgeom,29.4,3.3
testem3-flat,orange,25.9,2.3


## Action fraction pie charts


In [22]:
mean_action_times = summit.action_times().xs('mean', axis=1, level=1).T
mean_action_times.sort_index(inplace=True)

In [23]:
for ext in ["", "+field+msc"]:
    prob_geo = ('cms2018'+ext, 'vecgeom')
    temp = mean_action_times.xs(prob_geo, axis=1, level=('problem', 'geo')).dropna()

    for (arch, series) in temp.iteritems():
        (fig, ax) = plt.subplots(figsize=(4, 4))
        ax.pie(series, labels=series.index, autopct='%1.1f%%', pctdistance=0.85)
        ax.axis('equal')
        name = prob_geo + (arch,)
        slashname = "/".join(name)
        fig.text(
            0.98, 0.02, f"{slashname}\n{summit.version} on {summit.system}",
            va='bottom', ha='right',
            fontstyle='italic', color=(0.5,)*3, size='xx-small',
            zorder=-100
        )
        dashname = "-".join(name)
        fig.savefig(f'plots/{dashname}.pdf', transparent=True)
        plt.close()

### Plot per-step timing on GPU

In [24]:
cms = [summit.load_results((p, 'vecgeom', 'gpu'), 0)
       for p in ['cms2018', 'cms2018+field+msc']]

for plot, label in [(analyze.plot_counts, 'counts'),
                    (analyze.plot_accum_time, 'time')]:
    (fig, axes) = plt.subplots(ncols=2, figsize=(8, 2))
    
    for (i, ax, data) in zip(itertools.count(), axes, cms):
        objs = plot(ax, data)
        analyze.annotate_metadata(ax, data['_metadata'])
        if i == 0:
            objs['oax'].set_ylabel(None)
        elif i == 1:
            objs['ax'].set_ylabel(None)
    fig.savefig(f'plots/cms-{label}.pdf', transparent=True)
    plt.close()

## Crusher

In [25]:
crusher = analyze.Analysis('results/crusher')
print(crusher)

Analysis for Celeritas v0.1.3 on crusher


In [26]:
crusher_fail = crusher.failures().xs('orange', level='geo')

In [27]:
for key, lines in crusher_fail['stderr'].iteritems():
    print("="*78)
    print(key)
    print("   " + " - \n".join(lines[-3:]))

('simple-cms+field+msc', 'cpu', 0)
srun: error: crusher093: task 0: Exited with exit code 139
('simple-cms+field+msc', 'cpu', 4)
srun: error: crusher093: task 0: Exited with exit code 139
('simple-cms+field+msc', 'cpu', 5)
srun: error: crusher093: task 0: Exited with exit code 139
('simple-cms+field+msc', 'cpu', 6)
srun: error: crusher093: task 0: Exited with exit code 139
('simple-cms+field+msc', 'gpu', 0)
:0:rocdevice.cpp            :2614: 3507402658180 us: 107364: [tid:0x7fffd04ad700] Device::callbackQueue aborting with error : HSA_STATUS_ERROR_MEMORY_APERTURE_VIOLATION: The agent attempted to access memory beyond the largest legal address. code: 0x29 - 
srun: error: crusher093: task 0: Aborted
('simple-cms+field+msc', 'gpu', 1)
:0:rocdevice.cpp            :2614: 3507402134202 us: 107362: [tid:0x7fffd04ad700] Device::callbackQueue aborting with error : HSA_STATUS_ERROR_MEMORY_APERTURE_VIOLATION: The agent attempted to access memory beyond the largest legal address. code: 0x29 - 
sru

In [28]:
crusher_fail['stderr'].groupby(['problem', 'arch']).count().unstack()

arch,cpu,gpu
problem,Unnamed: 1_level_1,Unnamed: 2_level_1
simple-cms+field+msc,4,8
testem3-flat+field,3,4


In [29]:
csum = analyze.summarize_instances(crusher.result[~crusher.invalid][[
    'avg_steps_per_primary',
    'avg_time_per_primary',
    'avg_time_per_step',
    'num_steps',
    'total_time',
    'unconverged',
    'slot_occupancy',
]])

In [30]:
csum[('total_time', 'mean')].unstack()

Unnamed: 0_level_0,arch,cpu,gpu
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
simple-cms+field,orange,34.551608,15.079075
simple-cms+field+msc,orange,43.321737,
simple-cms+msc,orange,55.586648,3.49878
testem15,orange,24.370339,1.581837
testem15+field,orange,27.77194,1.678191
testem15+field+msc,orange,34.133479,1.980615
testem3-flat,orange,42.766508,3.147096
testem3-flat+field,orange,61.483369,4.275649
testem3-flat+msc,orange,91.91643,7.045224


In [31]:
rel_err = csum.xs('std', axis=1, level=1) / csum.xs('mean', axis=1, level=1)
high_err = rel_err > 0.02
rel_err[high_err].dropna(how='all').dropna(how='all', axis=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,avg_time_per_primary,avg_time_per_step,total_time,unconverged
problem,geo,arch,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
simple-cms+field,orange,gpu,0.24513,0.24505,0.24513,
simple-cms+field+msc,orange,cpu,0.030877,0.030836,0.030877,
simple-cms+msc,orange,cpu,,,,0.063717
simple-cms+msc,orange,gpu,,,,0.047361


In [32]:
analyze.get_cpugpu_ratio(csum['total_time'])

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
simple-cms+field,orange,2.291361,0.561929
simple-cms+field+msc,orange,,
simple-cms+msc,orange,15.887439,0.259167
testem15,orange,15.406354,0.057004
testem15+field,orange,16.548736,0.115434
testem15+field+msc,orange,17.233782,0.139494
testem3-flat,orange,13.589198,0.086494
testem3-flat+field,orange,14.379892,0.065346
testem3-flat+msc,orange,13.04663,0.111741


In [33]:
crusher_times = csum['total_time']
crusher_times

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,count,mean,std
problem,geo,arch,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
simple-cms+field,orange,cpu,8.0,34.551608,0.251363
simple-cms+field,orange,gpu,8.0,15.079075,3.696333
simple-cms+field+msc,orange,cpu,4.0,43.321737,1.337638
simple-cms+msc,orange,cpu,8.0,55.586648,0.872394
simple-cms+msc,orange,gpu,8.0,3.49878,0.015565
testem15,orange,cpu,8.0,24.370339,0.059719
testem15,orange,gpu,8.0,1.581837,0.004385
testem15+field,orange,cpu,8.0,27.77194,0.043435
testem15+field,orange,gpu,8.0,1.678191,0.011408
testem15+field+msc,orange,cpu,8.0,34.133479,0.054409


In [34]:
crusher_rates = calc_event_rate(csum)
summit_rates = calc_event_rate(summed.loc[crusher_times.index])

counts = {
    ('summit', 'cpu'): 7,
    ('summit', 'gpu'): 1,
    ('crusher', 'cpu'): 8,
    ('crusher', 'gpu'): 1,
}

In [35]:
(crusher_rates['mean'] / summit_rates['mean']).unstack()

Unnamed: 0_level_0,arch,cpu,gpu
problem,geo,Unnamed: 2_level_1,Unnamed: 3_level_1
simple-cms+field,orange,2.005453,0.64727
simple-cms+field+msc,orange,2.083797,
simple-cms+msc,orange,1.426745,1.015403
testem15,orange,2.119528,1.552119
testem15+field,orange,2.093983,1.302445
testem15+field+msc,orange,2.186446,1.322312
testem3-flat,orange,2.196039,1.15261
testem3-flat+field,orange,2.3014,0.84345
testem3-flat+msc,orange,2.161342,0.946883


In [36]:
fig, ax = plt.subplots()
ax.set_yscale('log')
for offset, color, machine, rates in [(-0.05, '#7A954F', 'Summit', summit_rates),
                                      (0.05, '#BC5544', 'Crusher', crusher_rates)]:
    for arch in ['cpu', 'gpu']:
        summary = rates.xs(arch, level='arch')
        index = np.array([p_to_i[p]
                          for p in summary.index.get_level_values('problem')], dtype=float)
        index += offset
    
        mark = arch_to_shape[arch]
        count = counts[(machine.lower(), arch)]
        arch = arch.upper()
        ax.errorbar(index, summary['mean'], summary['std'],
                    capsize=0, fmt='none', ecolor=(0.2,)*3)
        scat = ax.scatter(index, summary['mean'], c=color, marker=mark,
                         label=f"{machine} ({count} {arch})")    
xax = ax.get_xaxis()
xax.set_ticks(np.arange(len(problems)))
xax.set_ticklabels(problem_abbr, rotation=90)
grid = ax.grid()
ax.set_axisbelow(True)
ax.legend()
ax.set_ylabel(r"Event rate [1/s]")
analyze.annotate_metadata(ax, summit)
fig.savefig('plots/crusher-vs-summit.pdf')
plt.close()