In [None]:
import sys
sys.path += [".."]
import hepaccelerate

In [None]:
import json
import pandas
import matplotlib
import pickle

fig_width = 5
fig_height = 4
params = {
          #'backend': 'notebook',
          'text.latex.preamble': [r'\usepackage{gensymb}'],
          'axes.labelsize': 12,
          'axes.titlesize': 12,
          'font.size': 10,
          'text.usetex': False,
          'figure.figsize': [fig_width,fig_height],
          'font.family': 'serif',
          'image.cmap': "CMRmap",
}

matplotlib.rcParams.update(params)

import matplotlib.pyplot as plt
import itertools
import numpy as np

In [None]:
ss = open("../data/kernel_benchmarks.txt").readlines()

In [None]:
dd = []
for line in ss:
    if len(line) > 0:
        dd += [json.loads(line)]

In [None]:
df = pandas.DataFrame.from_dict(dd)
df = df.drop(columns=["use_avx"])
df

In [None]:
ms = df.groupby(["use_cuda", "num_threads"]).mean()
ms

In [None]:
es = df.groupby(["use_cuda", "num_threads"]).std()
es

In [None]:
vals = []
vals2 = []
cols = []
for icol, col in enumerate(sorted(ms.columns)):
    if col in ["memory_transfer", "memsize", "num_events", "max_in_offsets"]:
        continue
    cols += [col]
    vals += [ms[col].values[-1] / ms[col].values[0]]
    vals2 += [ms[col].values[-1] / ms[col].values[-2]]

In [None]:
plt.figure(figsize=(6,3))
plt.barh(range(len(vals)), vals, 0.7, color="orange")
plt.yticks(range(len(vals)), cols, fontsize=12)
plt.xlabel("CPU-thread to GPU speedup", fontsize=12)
plt.title("Individual physics kernel speedups:\n{0:.2E} events, {1:.0f} MB".format(df["num_events"].values[0], df["memsize"].values[0]/1024/1024), fontsize=14)
plt.tight_layout()
plt.savefig("../paper/plots/kernel_speedup.pdf", bbox_inches="tight")
plt.savefig("../paper/plots/kernel_speedup.png", bbox_inches="tight")

In [None]:
# Using multiple CPU threads for the same kernel
# plt.figure(figsize=(6,5))
# plt.barh(range(len(vals2)), vals2)
# plt.yticks(range(len(vals2)), cols, fontsize=12)
# plt.xlabel("GPU to CPU-thread speedup", fontsize=12)
# plt.title("Kernels: {0:.2E} events, {1:.0f} MB".format(df["num_events"].values[0], df["memsize"].values[0]/1024/1024), fontsize=14)
# plt.tight_layout()
# plt.xlim(0,39)
# plt.savefig("../paper/plots/kernel_speedup2.pdf", bbox_inches="tight")
# plt.savefig("../paper/plots/kernel_speedup2.png", bbox_inches="tight")

In [None]:
plt.figure(figsize=(6,5))
plt.set_cmap('CMRmap')

marker = itertools.cycle(('o', '.', '^', 's', 'd', 'D')) 
xs = np.arange(len(ms.index.levels[1]) + 1)
n = -0.23

i = 0
ncores = list(ms.index.levels[1])

print("ncores", ncores)
for icol, col in enumerate(sorted(ms.columns)):
    print(icol, i, col)
    if col in ["memory_transfer", "memsize", "num_events", "max_in_offsets"]:
        continue
    norm = ms[col].values[0]
    ratio = ms[col].values[-1] / ms[col].values[0]

    m = next(marker)
    color = plt.cm.hsv(icol / len(ms.columns))
    plt.bar(xs + n, ms[col].values/norm, width=0.1, label=col + ":\n1t: ${0:.0f} \pm {1:.0f}$ MHz, GPU {2:.1f}x".format(ms[col].values[0], es[col].values[0], ratio), color=color)
    plt.errorbar(xs + n, ms[col].values/norm, es[col].values/norm, lw=0, elinewidth=1, ms=0, color="black")
    n += 0.12

for i in range(6):
    kw = {}
    if i == 0:
        kw["label"] = "linear scaling with CPU threads"
    plt.plot([i-0.3, i+0.5], [ncores[i], ncores[i]], color="gray", lw=1, ls="--", **kw)
    
plt.legend(frameon=False, fontsize=10, ncol=1, loc=2)
#plt.yscale("log")
plt.ylim(0,200)
#plt.axhline(1, color="black", lw=1)
plt.xticks(xs, ["{0}t".format(x)for x in ms.index.levels[1]] + ["1 GPU"], fontsize=12, rotation=90)
plt.yticks(fontsize=12)
plt.xlabel("CPU threads / GPUs", fontsize=12)
plt.ylabel("speedup over single thread", fontsize=12)
plt.title("Kernels: {0:.2E} events, {1:.0f} MB".format(df["num_events"].values[0], df["memsize"].values[0]/1024/1024), fontsize=14)
plt.tight_layout()
plt.savefig("../paper/plots/kernel_benchmarks.pdf", bbox_inches="tight")
plt.savefig("../paper/plots/kernel_benchmarks.png", bbox_inches="tight")

# Full analysis benchmarks

In [None]:
ls = open("../data/analysis_benchmarks.txt").readlines()

In [None]:
dds = []
for l in ls:
    spl = l.strip().split()
    nev = int(spl[1])
    spd = float(spl[-2])
    scenario = spl[0].split(":")[0].split("_")[1:3]
    scenario[1] = int(scenario[1].split(".")[0][4:])
    dd = {"device": scenario[0], "njec": scenario[1], "speed": spd}
    dds += [dd]
df = pandas.DataFrame(dds)

In [None]:
df

In [None]:
#24 parallel CPU threads
df.loc[df["device"] == "cpu", "speed_thread"] = df.loc[df["device"] == "cpu", "speed"] / 24

#8 GPUs, 2 streams per GPU
df.loc[df["device"] == "gpu", "speed_thread"] = df.loc[df["device"] == "gpu", "speed"] / 16

In [None]:
df["time_per_billion_events_hours"] = (1e9 / df["speed_thread"]) / 3600

In [None]:
gdf = df.groupby(["device", "njec"])
ms = gdf.mean()

In [None]:
ms

In [None]:
df[df["device"] == "gpu"]["speed_thread"].values / df[df["device"] == "cpu"]["speed_thread"].values

In [None]:
vals1 = nev/df["speed_thread"].values / 60
vals2 = nev/df["speed"].values / 60

In [None]:
plt.figure(figsize=(5,4))
plt.set_cmap('CMRmap')
#plt.suptitle("Total runtime for {0:.2E} events on one workstation:\n 28-thread E5-2697 v3, 8x GTX 1080".format(nev), fontsize=14, y=1.00, va="bottom")

# plt.subplot(1,2,1)
# plt.title("IO-dominated workflow,\n" + "speedup {0:.1f}x".format(vals2[0]/vals2[2]), fontsize=12)
# plt.bar(range(2), [vals2[0], vals2[2]])
# plt.ylabel("Runtime (hours)", fontsize=12)
# plt.xticks([0, 1], ["CPU only", "CPU+GPU"], fontsize=12)

#plt.subplot(1,2,2)
plt.title(
    "Analysis runtime on a multi-GPU system:\n{0:.2E} events,".format(nev) +
    "\nGPU speedup {0:.1f}x".format(vals2[1]/vals2[3]),
    fontsize=14
)
plt.barh(range(2), [vals2[1], vals2[3]], 0.7, color="orange")
plt.xlabel("Runtime (minutes)", fontsize=12)
plt.yticks([0, 1], ["CPU only\n(24 threads)", "CPU+8xGPU\n(16 streams)"], fontsize=12)

plt.tight_layout()
plt.savefig("../paper/plots/analysis_benchmark.pdf", bbox_inches="tight")
plt.savefig("../paper/plots/analysis_benchmark.png", bbox_inches="tight")