### Setup

In [None]:
import numpy as np, pandas as pd
import matplotlib.pyplot as plt, colorsys
from matplotlib import rcParams as rc
import os, subprocess, platform

Ngpu_runs = 10 #Set to 100 for more accurate results, much smaller standard deviation.
Ncpu_runs = 10 #Set to 100 for more accurate results, much smaller standard deviation.
Ncpu_warmup = 5 #Warmup caches and branch predictor.
Ngpu_warmup = 1 #No branch prediction on GPU, but if prefetching is disabled, the first run will fetch data.
#Change this number if the simulation is taking too long.
#Setting this number to -1 will reduce the batch sizes by 1 power of 2
OFFSET_BS = 0

hostname = platform.node()

# Benchmark result filenames
path = f'/{os.getcwd()}/output/{hostname}/'
buildpath = f'/{os.getcwd()}/build/'
fname_base = f'{path}base.csv'
fname_one_gpu_v0 = f'{path}one_gpu_v0.csv'
fname_one_gpu_v1 = f'{path}one_gpu_v1.csv'
fname_multi_gpu_v0 = f'{path}multi_gpu_v0.csv'
fname_multi_gpu_v1 = f'{path}multi_gpu_v1.csv'
fname_multi_gpu_weak = f'{path}multi_gpu_weak.csv'
fname_single_gpu_bs = f'{path}single_gpu_bs.csv'
fname_single_gpu_bs = f'{path}single_gpu_bs.csv'
fname_base_pipeline = f'{path}base_pipeline.csv'
fname_full_pipeline = f'{path}full_pipeline.csv'


# Run the command and capture its output
if os.system('which nvidia-smi') == 0:
    output = subprocess.check_output(['nvidia-smi', '-L'])

    # Convert the byte string to a regular string
    output_str = output.decode('utf-8')

    # Count the number of lines in the output
    num_gpus = len(output_str.strip().split('\n'))
else:
    num_gpus = 0


# Print the number of GPUs found
print(f'Found {num_gpus} GPUs on {hostname}')

if not os.path.exists("output"):
    os.mkdir("output")

if not os.path.exists("output/" + hostname):
    os.mkdir("output/" + hostname)

if not os.path.exists("output/" + hostname + "/figures"):
    os.mkdir("output/" + hostname + "/figures")

rc["legend.markerscale"] = 2.0
rc["legend.framealpha"] = 0
rc["legend.labelspacing"] = 0.1
rc['figure.figsize'] = (20,10)
rc['axes.autolimit_mode'] = 'data'
rc['axes.xmargin'] = 0
rc['axes.ymargin'] = 0.10
rc['axes.titlesize'] = 30
rc['axes.labelsize'] = 24
rc['xtick.labelsize'] = 20
rc['ytick.labelsize'] = 20
rc['axes.grid'] = True
rc['grid.linestyle'] = '-'
rc['grid.alpha'] = 0.2
rc['legend.fontsize'] = 20
rc['legend.loc'] = 'upper left'
rc["figure.autolayout"] = True
rc["savefig.dpi"] = 300

colors = ["#1f77b4", "#d62728", "#9467bd", "#8c564b", "#e377c2"]
CD = { "Baseline" : colors[0], "GPU_V0" : colors[1], "GPU_V1" : colors[2], "2 GPU_V0" :  colors[3] ,"2 GPU_V1" : colors[4]}

def adjust_brightness(color, amount):
    """Adjust the brightness of a color by a given amount (-1 to 1)."""
    # Convert the color to the RGB color space
    r, g, b = tuple(int(color[i:i+2], 16) for i in (1, 3, 5))
    # Convert the color to the HLS color space
    h, l, s = colorsys.rgb_to_hls(r/255, g/255, b/255)
    # Modify the lightness value
    l = max(0, min(1, l + amount))
    # Convert the color back to RGB and return it
    r, g, b = tuple(round(c * 255) for c in colorsys.hls_to_rgb(h, l, s))
    return f"#{r:02x}{g:02x}{b:02x}"
# Modify the brightness of the colors
colors = [adjust_brightness(color, 0.2) for color in colors]

CD = { "Baseline" : 'r', "GPU_V0" : colors[2], "GPU_V1" : colors[3], "2 GPU_V0" :  colors[4] ,"2 GPU_V1" : colors[0], "Dual" : f'#7570b3', "Generate" : f'#d95f02', "Projection" : f'#e7298a', "Tutte" : f'#66a61e', "Opt" : f'#8931EF' }

KName0 = r"SYCL Kernel 0"
KName1 = r"SYCL Kernel 1"

def source_and_get_environment(script_path, base_environment=None):
    """Source script and return the updated environment."""
    if base_environment is None:
        base_environment = os.environ.copy()
    command = ['/bin/bash', '-c', f'source {script_path} && env']
    proc = subprocess.Popen(command, stdout=subprocess.PIPE, env=base_environment)
    output, _ = proc.communicate()
    env = dict((line.split("=", 1) for line in output.decode().splitlines() if "=" in line))
    return env

env = source_and_get_environment('/opt/intel/oneapi/setvars.sh')


### Create files to store results, if they already exist, empty them.

In [None]:
def reset_file(filename):
    # Check if the file already exists
    if os.path.isfile(filename):
        # If it does, empty it by opening it in write mode with the 'truncate' option
        with open(filename, 'w', newline='') as f:
            f.truncate()
    else:
        # If it doesn't, create an empty file by opening it in write mode
        with open(filename, 'w', newline='') as f:
            pass

reset_file(fname_base)
reset_file(fname_one_gpu_v0)
reset_file(fname_one_gpu_v1)
reset_file(fname_multi_gpu_v0)
reset_file(fname_multi_gpu_v1)
reset_file(fname_multi_gpu_weak)

### Validation

In [None]:
#Validate SYCL kernels against baseline dualisation

process = subprocess.Popen(['/bin/bash', '-c', f'{buildpath}validation/sycl/sycl_validation gpu'], env=env).wait()

### Run the batch size experiment

In [None]:
reset_file(fname_single_gpu_bs)


for i in range(0,20):
    if(num_gpus>0):
        proc = subprocess.Popen(['/bin/bash', '-c', f'{buildpath}benchmarks/sycl/dualisation gpu {200} {2**(i)} {Ngpu_runs} {Ngpu_warmup} 1 1 {fname_single_gpu_bs}'], env=env); proc.wait()

### Plot the batch size experiment

In [None]:
df_single_gpu_bs = pd.read_csv(fname_single_gpu_bs)

import matplotlib.ticker as ticker
from matplotlib.ticker import MaxNLocator
from mpl_toolkits.axes_grid1.inset_locator import (inset_axes, mark_inset)
fig,ax = plt.subplots(figsize=(15,10))
def add_line(ax, BS, T, SD, label, color, marker, linestyle):
    ax.plot(BS, T, marker=marker, color=color, label=label, linestyle=linestyle)
    ax.fill_between(BS, T - SD, T + SD, alpha=0.1, color='k')
 
ax.set_yscale('log')
ax.set_xscale('log')
ax.set_ylabel("Time / Graph [ns]")
ax.set_xlabel("Batch Size [1]")
if(num_gpus>0):
    add_line(ax, df_single_gpu_bs["BS"].to_numpy(), df_single_gpu_bs["T"].to_numpy(), df_single_gpu_bs["TSD"].to_numpy(), "Lockstep Parallel Dualization", CD["GPU_V1"], 'o', ':')
ax.legend(loc='best')
plt.savefig(f'{path}figures/batch_size_benchmark.pdf', bbox_inches='tight')

### Run the benchmarks

In [None]:
reset_file(fname_base)
reset_file(fname_one_gpu_v0)
reset_file(fname_one_gpu_v1)
reset_file(fname_multi_gpu_v0)
reset_file(fname_multi_gpu_v1)
reset_file(fname_multi_gpu_weak)

import subprocess
import os

def source_and_get_environment(script_path, base_environment=None):
    """Source script and return the updated environment."""
    if base_environment is None:
        base_environment = os.environ.copy()
    command = ['/bin/bash', '-c', f'source {script_path} && env']
    proc = subprocess.Popen(command, stdout=subprocess.PIPE, env=base_environment)
    output, _ = proc.communicate()
    env = dict((line.split("=", 1) for line in output.decode().splitlines() if "=" in line))
    return env

env = source_and_get_environment('/opt/intel/oneapi/setvars.sh')

for i in range(20,201,2):
    os.system(f'{buildpath}benchmarks/baseline {i} {2**(6+OFFSET_BS)} {Ncpu_runs} {Ncpu_warmup} 0 {fname_base}')
    if(num_gpus>0):
        proc = subprocess.Popen(['/bin/bash', '-c', f'{buildpath}benchmarks/sycl/dualisation gpu {i} {2**(20+OFFSET_BS)} {Ngpu_runs} {Ngpu_warmup} 0 1 {fname_one_gpu_v0}'], env=env);  proc.wait()
        proc = subprocess.Popen(['/bin/bash', '-c', f'{buildpath}benchmarks/sycl/dualisation gpu {i} {2**(20+OFFSET_BS)} {Ngpu_runs} {Ngpu_warmup} 1 1 {fname_one_gpu_v1}'], env=env); proc.wait()
        proc = subprocess.Popen(['/bin/bash', '-c', f'{buildpath}benchmarks/sycl/dualisation gpu {i} {2**(20+OFFSET_BS)} {Ngpu_runs} {Ngpu_warmup} 0 {num_gpus} {fname_multi_gpu_v0}'], env=env); proc.wait()
        proc = subprocess.Popen(['/bin/bash', '-c', f'{buildpath}benchmarks/sycl/dualisation gpu {i} {2**(20+OFFSET_BS)} {Ngpu_runs} {Ngpu_warmup} 1 {num_gpus} {fname_multi_gpu_v1}'], env=env); proc.wait()
        proc = subprocess.Popen(['/bin/bash', '-c', f'{buildpath}benchmarks/sycl/dualisation gpu {i} {num_gpus*2**(20+OFFSET_BS)} {Ngpu_runs} {Ngpu_warmup} 1 {num_gpus} {fname_multi_gpu_weak}'], env=env); proc.wait()



### Baseline Benchmark Plot

In [None]:
df_base = pd.read_csv(fname_base)

fig, ax = plt.subplots(figsize=(15,15), nrows=2, sharex=True, dpi=200)
ax[0].plot(df_base["N"].to_numpy(), df_base["T"].to_numpy()/1e3, 'o:', color=CD["Baseline"], label="Baseline Sequential Dualisation")
ax[0].fill_between(df_base["N"].to_numpy(), (df_base["T"].to_numpy() - df_base["TSD"].to_numpy())/1e3, (df_base["T"].to_numpy() + df_base["TSD"].to_numpy())/1e3, alpha=0.1, color='k')
ax[0].set_ylabel(r"Time / Graph [$\mu$s]")
ax[0].legend()

ax[1].plot(df_base["N"].to_numpy(), df_base["T"].to_numpy() / df_base["N"].to_numpy(), 'o:', color=CD["Baseline"], label="Baseline Sequential Dualisation")
ax[1].fill_between(df_base["N"].to_numpy(), (df_base["T"].to_numpy() - df_base["TSD"].to_numpy()*2) / df_base["N"].to_numpy(), (df_base["T"].to_numpy()+df_base["TSD"].to_numpy()*2) / df_base["N"].to_numpy(), color='k', alpha=0.1, label=r"2$\sigma$")
ax[1].set_ylabel(r"Time / Node [ns]")
ax[1].set_xlabel(r"Cubic Graph Size [# Nodes]")
ax[1].legend()
plt.savefig(path + "figures/baseline.pdf", bbox_inches='tight')


### Plot Dual GPU Benchmark of V0 and V1 Kernels (CUDA)

In [None]:
if(num_gpus>0):
    df0 = pd.read_csv(fname_multi_gpu_v0)
    df1 = pd.read_csv(fname_multi_gpu_v1)

    fig, ax = plt.subplots(figsize=(15,15), nrows=2, sharex=True, dpi=200)
    ax[0].plot(df0["N"].to_numpy(), df1["T"].to_numpy(), 'o:', color=CD["2 GPU_V1"], label=KName1)
    ax[0].plot(df0["N"].to_numpy(), df0["T"].to_numpy(), 'o:', color=CD["2 GPU_V0"], label=KName0)
    ax[0].fill_between(df0["N"].to_numpy(), (df0["T"].to_numpy() - df0["TSD"].to_numpy()*2), (df0["T"].to_numpy()+df0["TSD"].to_numpy()*2), color='k', alpha=0.1, label=r"2$\sigma$")
    ax[0].fill_between(df0["N"].to_numpy(), (df1["T"].to_numpy() - df1["TSD"].to_numpy()*2), (df1["T"].to_numpy()+df1["TSD"].to_numpy()*2), color='k', alpha=0.1)
    ylow = ax[0].get_ylim()[0]
    yhigh = ax[0].get_ylim()[1]
    ax[0].vlines(96, ylow, yhigh, color=CD["2 GPU_V0"], linestyle='--', label=r"Saturation Kernel 0")
    ax[0].vlines(188, ylow, yhigh, color=CD["2 GPU_V1"], linestyle='--', label=r"Saturation Kernel 1")
    ax[0].set_ylabel(r"Time / Graph [ns]")
    ax[0].set_ymargin(0.0)
    ax[0].legend(loc="upper left")

    ax[1].plot(df0["N"].to_numpy(), df1["T"].to_numpy()*1e3 / df0["N"].to_numpy(), 'o:', color=CD["2 GPU_V1"], label=KName1)
    ax[1].plot(df0["N"].to_numpy(), df0["T"].to_numpy()*1e3 / df0["N"].to_numpy(), 'o:', color=CD["2 GPU_V0"], label=KName0)
    ax[1].fill_between(df0["N"].to_numpy(), (df0["T"].to_numpy() - df0["TSD"].to_numpy()*2)*1e3 / df0["N"].to_numpy(), (df0["T"].to_numpy()+df0["TSD"].to_numpy()*2)*1e3 / df0["N"].to_numpy(), color='k', alpha=0.1, label=r"2$\sigma$")
    ax[1].fill_between(df0["N"].to_numpy(), (df1["T"].to_numpy() - df1["TSD"].to_numpy()*2)*1e3 / df0["N"].to_numpy(), (df1["T"].to_numpy()+df1["TSD"].to_numpy()*2)*1e3 / df0["N"].to_numpy(), color='k', alpha=0.1)
    ylow = ax[1].get_ylim()[0]
    yhigh = ax[1].get_ylim()[1]
    ax[1].vlines(96, ylow, yhigh, color=CD["2 GPU_V0"], linestyle='--', label=r"Kernel 0 Saturation")
    ax[1].vlines(188, ylow, yhigh, color=CD["2 GPU_V1"], linestyle='--', label=r"Kernel 1 Saturation")
    ax[1].legend(bbox_to_anchor=(0.5, 0.9))
    ax[1].set_ymargin(0.0)
    ax[1].set_xlabel(r"Cubic Graph Size [# Nodes]")
    ax[1].set_ylabel(r"Time / Node [ps]")
    plt.savefig(path + "figures/cuda_kernel_benchmark.pdf", bbox_inches='tight')

### Plot Strong and Weak Scaling of the V1 Kernel (CUDA) from 1 to 2 GPUs 

In [None]:
if(num_gpus>1):
    df1 = pd.read_csv(fname_one_gpu_v1)
    df3 = pd.read_csv(fname_multi_gpu_v1)
    df2 = pd.read_csv(fname_multi_gpu_weak)
    def std_div(a,b, a_std, b_std):
        return a/b * np.sqrt((a_std/a)**2 + (b_std/b)**2)
    fig, ax     = plt.subplots(figsize=(15, 15), nrows=2, sharex=True)
    ax[0].plot(df1["N"].to_numpy(), df1["T"].to_numpy(), 'o:',  color=CD["GPU_V1"], label=f"1 GPU, $B_s = 2^{{{20+OFFSET_BS}}}$")
    ax[0].plot(df3["N"].to_numpy(), df3["T"].to_numpy(), 'o:',  color=CD["2 GPU_V1"], label=f"2 GPUs $B_s = 2^{{{21+OFFSET_BS}}}$")
    ax[0].plot(df2["N"].to_numpy(), df2["T"].to_numpy(), 'x--',  color=CD["2 GPU_V1"], label=f"2 GPUs $B_s = 2^{{{20+OFFSET_BS}}}$")
    ax[0].fill_between(df1["N"].to_numpy(), df1["T"].to_numpy() - df1["TSD"].to_numpy()*1, df1["T"].to_numpy() + df1["TSD"].to_numpy()*1, alpha=0.1, color='k', label=r"1$\sigma$")
    ax[0].fill_between(df3["N"].to_numpy(), df3["T"].to_numpy() - df3["TSD"].to_numpy()*1, df3["T"].to_numpy() + df3["TSD"].to_numpy()*1, alpha=0.1, color='k')
    ax[0].fill_between(df2["N"].to_numpy(), df2["T"].to_numpy() - df2["TSD"].to_numpy()*1, df2["T"].to_numpy() + df2["TSD"].to_numpy()*1, alpha=0.1, color='k')
    ax[0].set_ylabel("Time / Graph [ns]")
    ax[0].legend(loc='upper left')


    #Plot speedup
    ax[1].plot(df1["N"].to_numpy(), df1["T"].to_numpy()/df3["T"].to_numpy(), 'o:',  color=CD["2 GPU_V1"], label=f"2 GPUs $B_s = 2^{{{21+OFFSET_BS}}}$")
    ax[1].plot(df1["N"].to_numpy(), df1["T"].to_numpy()/df2["T"].to_numpy(), 'x--',  color=CD["2 GPU_V1"], label=f"2 GPUs $B_s = 2^{{{20+OFFSET_BS}}}$")
    std_1 = std_div(df1["T"].to_numpy(), df3["T"].to_numpy(), df1["TSD"].to_numpy(), df3["TSD"].to_numpy())
    std_2 = std_div(df1["T"].to_numpy(), df2["T"].to_numpy(), df1["TSD"].to_numpy(), df2["TSD"].to_numpy())
    ax[1].fill_between(df1["N"].to_numpy(), df1["T"].to_numpy()/df3["T"].to_numpy() - std_1, df1["T"].to_numpy()/df3["T"].to_numpy() + std_1, alpha=0.1, color='k', label=r"1$\sigma$")
    ax[1].fill_between(df1["N"].to_numpy(), df1["T"].to_numpy()/df2["T"].to_numpy() - std_2, df1["T"].to_numpy()/df2["T"].to_numpy() + std_2, alpha=0.1, color='k')
    ax[1].hlines(2, 20, 200, linestyles='dashed', color='k', label=r"Perfect Scaling")
    ax[1].set_ylabel("Speedup [1]")
    ax[1].set_xlabel(r"Number of Nodes [\#]")
    ax[1].set_ylim(0.95,num_gpus*1.05)
    ax[1].legend(loc='lower right', ncol=2)
    plt.savefig(path + "figures/cuda_scaling.pdf", bbox_inches='tight')

    

In [None]:

#Benchmarking full pipeline with sequential dualisation
reset_file(fname_base_pipeline)
reset_file(fname_full_pipeline)
Nbuckygen = 1000000
for i in range(72,201,2):
    proc = subprocess.Popen(['/bin/bash', '-c', f'{buildpath}benchmarks/sycl/baseline_pipeline gpu {i} {Nbuckygen} {10000} {5} {Ngpu_warmup} 1 1 {fname_base_pipeline}'], env=env).wait()
    proc = subprocess.Popen(['/bin/bash', '-c', f'{buildpath}benchmarks/sycl/pipeline gpu {i} {Nbuckygen} {10000} {5} {Ngpu_warmup} 1 1 {fname_full_pipeline}'], env=env).wait()



In [None]:
import matplotlib.ticker as ticker

df_base_pipeline = pd.read_csv(fname_base_pipeline)

fig, ax = plt.subplots(figsize=(20,10), nrows=1, sharex=True, dpi=150)

opt = df_base_pipeline["T_opt"].to_numpy()
opt_sd = df_base_pipeline["TSD_opt"].to_numpy()
tutte = df_base_pipeline["T_tutte"].to_numpy()
tutte_sd = df_base_pipeline["TSD_tutte"].to_numpy()
project = df_base_pipeline["T_project"].to_numpy()
project_sd = df_base_pipeline["TSD_project"].to_numpy()
overhead = df_base_pipeline["T_overhead"].to_numpy()
overhead_sd = df_base_pipeline["TSD_overhead"].to_numpy()
gen = df_base_pipeline["T_gen"].to_numpy()
gen_sd = df_base_pipeline["TSD_gen"].to_numpy()
dual = df_base_pipeline["T_dual"].to_numpy()
dual_sd = df_base_pipeline["TSD_dual"].to_numpy()
natoms = df_base_pipeline["N"].to_numpy()


parallel = opt + tutte + project
parallel_sd = np.sqrt(opt_sd**2 + tutte_sd**2 + project_sd**2)
total = parallel + overhead + gen + dual

def plot_line_normalized(ax, x, y, y_sd, label, color, marker, linestyle, mfc_bool=True):
    if mfc_bool:
        ax.plot(x, 1e2* y/total, marker=marker, color=color, label=label, linestyle=linestyle, mfc=color) #Normalized to total time, shown as percentage
    else:
        ax.plot(x, 1e2* y/total, marker=marker, color=color, label=label, linestyle=linestyle, mfc="None") #Normalized to total time, shown as percentage
    ax.fill_between(x, 1e2*(y - y_sd)/total, 1e2*(y + y_sd)/total, alpha=0.1, color='k')


plot_line_normalized(ax, natoms, gen, gen_sd, "Isomer-space graph generation", CD["Generate"], 'o', ':', False)
plot_line_normalized(ax, natoms, dual, dual_sd, "Baseline Sequential Dualization", CD["Dual"], 'o', ':', False)
plot_line_normalized(ax, natoms, parallel, parallel_sd, "Lockstep-parallel geometry optimization", "k", '*', ':')
plot_line_normalized(ax, natoms, overhead, overhead_sd, "Overhead", "blue", 'o', ':')

ax.set_ylabel(r"Time / Graph [$\mu$s]")
ax.legend()
ax.set_xlabel(r"Isomerspace $C_N$ [1]")
ax.set_ylim(0,100)
#percentage formatting
ax.yaxis.set_major_formatter(ticker.PercentFormatter())



In [None]:
import matplotlib.ticker as ticker

df_full_pipeline = pd.read_csv(fname_full_pipeline)

fig, ax = plt.subplots(figsize=(20,10), nrows=1, sharex=True, dpi=150)

opt = df_full_pipeline["T_opt"].to_numpy()
opt_sd = df_full_pipeline["TSD_opt"].to_numpy()
tutte = df_full_pipeline["T_tutte"].to_numpy()
tutte_sd = df_full_pipeline["TSD_tutte"].to_numpy()
project = df_full_pipeline["T_project"].to_numpy()
project_sd = df_full_pipeline["TSD_project"].to_numpy()
overhead = df_full_pipeline["T_overhead"].to_numpy()
overhead_sd = df_full_pipeline["TSD_overhead"].to_numpy()
gen = df_full_pipeline["T_gen"].to_numpy()
gen_sd = df_full_pipeline["TSD_gen"].to_numpy()
dual = df_full_pipeline["T_dual"].to_numpy()
dual_sd = df_full_pipeline["TSD_dual"].to_numpy()
natoms = df_full_pipeline["N"].to_numpy()


parallel = opt + tutte + project
parallel_sd = np.sqrt(opt_sd**2 + tutte_sd**2 + project_sd**2)
total = parallel + overhead + gen + dual

def plot_normalized_line(ax, x, y, y_sd, label, color, marker, linestyle, mfc_bool=True):
    if mfc_bool:
        ax.plot(x, 1e2* y/total, marker=marker, color=color, label=label, linestyle=linestyle, mfc=color) #Normalized to total time, shown as percentage
    else:
        ax.plot(x, 1e2* y/total, marker=marker, color=color, label=label, linestyle=linestyle, mfc="None") #Normalized to total time, shown as percentage
    ax.fill_between(x, 1e2*(y - y_sd)/total, 1e2*(y + y_sd)/total, alpha=0.1, color='k')


plot_normalized_line(ax, natoms, gen, gen_sd, "Isomer-space graph generation", CD["Generate"], 'o', ':', False)
plot_normalized_line(ax, natoms, parallel, parallel_sd, "Lockstep-parallel geometry optimization", "k", '*', ':')
plot_normalized_line(ax, natoms, overhead, overhead_sd, "Overhead", "blue", 'o', ':')
plot_normalized_line(ax, natoms, dual, dual_sd, "Baseline Sequential Dualization", CD["Dual"], '*', ':')

ax.set_ylabel(r"Time / Graph [$\mu$s]")
ax.legend()
ax.set_xlabel(r"Isomerspace $C_N$ [1]")
ax.set_ylim(0,100)
#percentage formatting
ax.yaxis.set_major_formatter(ticker.PercentFormatter())


In [None]:
import matplotlib.ticker as ticker

df_full_pipeline = pd.read_csv(fname_full_pipeline)

fig, ax = plt.subplots(figsize=(20,10), nrows=1, sharex=True, dpi=150)

opt = df_full_pipeline["T_opt"].to_numpy()
opt_sd = df_full_pipeline["TSD_opt"].to_numpy()
tutte = df_full_pipeline["T_tutte"].to_numpy()
tutte_sd = df_full_pipeline["TSD_tutte"].to_numpy()
project = df_full_pipeline["T_project"].to_numpy()
project_sd = df_full_pipeline["TSD_project"].to_numpy()
overhead = df_full_pipeline["T_overhead"].to_numpy()
overhead_sd = df_full_pipeline["TSD_overhead"].to_numpy()
gen = df_full_pipeline["T_gen"].to_numpy()
gen_sd = df_full_pipeline["TSD_gen"].to_numpy()
dual = df_full_pipeline["T_dual"].to_numpy()
dual_sd = df_full_pipeline["TSD_dual"].to_numpy()
natoms = df_full_pipeline["N"].to_numpy()


parallel = opt + tutte + project
parallel_sd = np.sqrt(opt_sd**2 + tutte_sd**2 + project_sd**2)
total = parallel + overhead + gen + dual

def plot_absolute_line(ax, x, y, y_sd, label, color, marker, linestyle, mfc_bool=True):
    if mfc_bool:
        ax.plot(x,  y/1e3, marker=marker, color=color, label=label, linestyle=linestyle, mfc=color) #Normalized to total time, shown as percentage
    else:
        ax.plot(x,  y/1e3, marker=marker, color=color, label=label, linestyle=linestyle, mfc="None") #Normalized to total time, shown as percentage
    ax.fill_between(x, (y - y_sd)/1e3, (y + y_sd)/1e3, alpha=0.1, color='k')


plot_absolute_line(ax, natoms, parallel, parallel_sd, "Lockstep-parallel geometry optimization", "k", '*', ':')
plot_absolute_line(ax, natoms, gen, gen_sd, "Isomer-space graph generation", CD["Generate"], 'o', ':', False)
plot_absolute_line(ax, natoms, overhead, overhead_sd, "Overhead", "blue", 'o', ':')
plot_absolute_line(ax, natoms, dual, dual_sd, "Lockstep-parallel dualization", CD["Dual"], '*', ':')

ax.set_ylabel(r"Time / Graph [$\mu$s]")
ax.legend()
ax.set_xlabel(r"Isomerspace $C_N$ [1]")
ax.set_yscale('log')
#percentage formatting

### Speedup of the V1 Kernel (SYCL) on 1 GPU compared to sequential CPU dualization

In [None]:
#Speedup of dualization plot
fig, ax = plt.subplots(figsize=(20,10), nrows=1, sharex=True, dpi=150)

sequential = df_base_pipeline["T_dual"].to_numpy()
sequential_sd = df_base_pipeline["TSD_dual"].to_numpy()
parallel = df_full_pipeline["T_dual"].to_numpy()
parallel_sd = df_full_pipeline["TSD_dual"].to_numpy()
natoms = df_full_pipeline["N"].to_numpy()

speedup = sequential / parallel
speedup_sd = np.sqrt((sequential_sd/sequential)**2 + (parallel_sd/parallel)**2) * speedup

ax.plot(natoms, speedup, 'o:', color="r", label="Speedup")
ax.fill_between(natoms, speedup - speedup_sd, speedup + speedup_sd, alpha=0.1, color='k')
ax.set_ylabel(r"Speedup [1]")
ax.legend()
ax.set_xlabel(r"Isomerspace $C_N$ [1]")
