In [6]:
import pandas as pd
import numpy as np
import scipy.stats as scist
import pickle
pd.set_option('display.max_rows', 500)

In [7]:
import plotly.graph_objects as go
import plotly.io as pio
pio.renderers.default = "iframe"

In [8]:
BENCHMARK_DIR = "/home/shein/Documents/cs518_project/benchmarking"
BACKEND_DIRS = ["cuda", "hsa", "ze"]
TIMING_DATA_BASE_FILENAME = "vmm_timing_data.csv"

In [9]:
col_names = ["alloc_size", "phys_create", "va_create", "map", "access", "unmap", "va_free", "phys_free"]

In [10]:
data = {}
for backend_dir in BACKEND_DIRS:
    data[backend_dir] = pd.read_csv(BENCHMARK_DIR + "/" + backend_dir + "/" + "new_" + backend_dir + "_" + TIMING_DATA_BASE_FILENAME, header=None, names = col_names, on_bad_lines="skip")

In [11]:
df_cuda, df_hsa, df_ze = data["cuda"], data["hsa"], data["ze"]

In [51]:
df_cuda.shape

(81920, 8)

In [52]:
df_hsa.shape

(81920, 8)

In [53]:
df_ze.shape

(81110, 8)

In [12]:
## Arc A770 has 16GB so it failed the last few allocs
alloc_sizes = df_ze["alloc_size"].unique()
function_names = ["phys_create", "va_create", "map", "access", "unmap", "va_free", "phys_free"]

In [56]:
df_cuda.head(20)

Unnamed: 0,alloc_size,phys_create,va_create,map,access,unmap,va_free,phys_free
0,2097152,27136,1280,1792,43008,27904,1024,19200
1,2097152,28672,1792,1792,40192,31488,768,19456
2,2097152,29696,4352,1792,40192,27392,1024,19200
3,2097152,28672,1536,2048,40192,32512,768,19456
4,2097152,27904,1792,2304,39936,27904,1024,18688
5,2097152,165888,1792,1792,41472,28160,1024,19712
6,2097152,27904,1536,1792,43520,28160,1024,19456
7,2097152,27136,2048,2048,40704,27648,768,19200
8,2097152,28416,1536,1792,45312,28416,1280,18944
9,2097152,28416,1536,1792,45824,27904,1024,20224


In [13]:
agg_stats = ["count", "mean", "sem", "std", "median", "min", "max"]

In [14]:
cuda_grouped = df_cuda.groupby("alloc_size").agg(agg_stats)

In [15]:
cuda_grouped.to_pickle("stats_cuda.pkl")

In [16]:
cuda_grouped.loc[2097152, ("phys_create", ["mean", "sem"])]

phys_create  mean    27648.000000
             sem       589.973709
Name: 2097152, dtype: float64

In [17]:
hsa_grouped = df_hsa.groupby("alloc_size").agg(agg_stats)

In [18]:
hsa_grouped.to_pickle("stats_hsa.pkl")

In [19]:
ze_grouped = df_ze.groupby("alloc_size").agg(agg_stats)

In [20]:
ze_grouped.to_pickle("stats_ze.pkl")

In [71]:
ze_grouped

Unnamed: 0_level_0,phys_create,phys_create,phys_create,phys_create,phys_create,phys_create,phys_create,va_create,va_create,va_create,...,va_free,va_free,va_free,phys_free,phys_free,phys_free,phys_free,phys_free,phys_free,phys_free
Unnamed: 0_level_1,count,mean,sem,std,median,min,max,count,mean,sem,...,median,min,max,count,mean,sem,std,median,min,max
alloc_size,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2097152,10,7500.8,269.982584,853.759893,7040.0,6656,8704,10,870.4,167.218500,...,768.0,512,768,10,5324.8,91.906813,290.634861,5120.0,5120,5888
4194304,10,8115.2,298.178649,942.923680,8704.0,6912,8960,10,588.8,39.104646,...,768.0,512,1024,10,5145.6,207.098216,654.902062,5120.0,4352,6656
6291456,10,7372.8,243.760757,770.839196,7168.0,6656,8960,10,614.4,56.603730,...,768.0,512,768,10,4992.0,95.405567,301.698893,4992.0,4352,5376
8388608,10,7168.0,170.666667,539.695387,7168.0,6656,8448,10,537.6,25.600000,...,640.0,512,768,10,5120.0,152.648907,482.718229,5120.0,4608,6144
10485760,10,7577.6,270.386850,855.038297,7296.0,6400,8704,10,716.8,151.691851,...,768.0,512,768,10,5196.8,175.089361,553.681176,4992.0,4608,6400
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17001611264,10,6963.2,231.503530,732.078441,6656.0,6400,8704,10,563.2,34.133333,...,768.0,512,1024,10,4428.8,126.857120,401.157437,4480.0,3840,5120
17003708416,10,7014.4,213.162598,674.079323,6656.0,6400,8192,10,640.0,57.243340,...,768.0,512,1024,10,4582.4,172.575986,545.733187,4352.0,3840,5376
17005805568,10,6656.0,66.098916,209.023125,6656.0,6400,6912,10,588.8,39.104646,...,768.0,512,768,10,4608.0,166.345290,526.029995,4608.0,3840,5120
17007902720,10,7168.0,343.460041,1086.116016,6656.0,6144,8960,10,512.0,76.324454,...,768.0,512,1024,10,4582.4,196.266667,620.649695,4480.0,3840,5632


In [21]:
all_stats = {"cuda": cuda_grouped, "hsa": hsa_grouped, "ze": ze_grouped}

In [22]:
### Computing mean and 95% confidence intervals for each alloc size for each backend

## Mapping from backend -> alloc size -> function -> {mean: float, sem: float}
key_data = {}
for backend in ["cuda", "hsa", "ze"]:
    key_data[backend] = {}
    stats = all_stats[backend]
    for size in alloc_sizes:
        key_data[backend][size] = {}
        for f in function_names:
            mean = stats.loc[size, (f, "mean")]
            sem = stats.loc[size, (f, "sem")]
            count = stats.loc[size, (f, "count")]
            key_data[backend][size][f] = {"mean": mean, "sem": sem, "count": count}


In [23]:
with open("key_data.pkl", "wb") as out_file:
    pickle.dump(key_data, out_file)

### Making Plots

In [24]:
def get_stats_data(all_data, backend, function_name, alloc_sizes):
    backend_data = all_data[backend]
    mean, sem, count = [], [], []
    for size in alloc_sizes:
        mean.append(backend_data[size][function_name]["mean"])
        sem.append(backend_data[size][function_name]["sem"])
        count.append(backend_data[size][function_name]["count"])
    return {"mean": mean, "sem": sem, "count": count}

In [25]:
def get_sem_coeff(ci_pct, two_tailed=True):
    critical_val = ci_pct
    if two_tailed:
        critical_val += (1 - ci_pct) / 2
    return scist.norm.ppf(critical_val)

In [26]:
## where all_data is returned from get_key_data(all_stats)
## backends is a dict from {backend_name: [backend_label, backend_color]
## function name is a string in function_names
## alloc_sizes is an array 
## ci is a confidence interval to plot if not 0. Between [0, 100)
def create_plot(all_data, backends, function_name, alloc_sizes, ci=0):

    fig = go.Figure()
    
    for backend_name, meta in backends.items():
        backend_data = get_stats_data(all_data, backend_name, function_name, alloc_sizes)
        backend_label, backend_color = meta[0], meta[1]
        mean, sem, count = backend_data["mean"], backend_data["sem"], backend_data["count"]
        fig.add_trace(go.Scatter(x = alloc_sizes,
                                 y = mean,
                                 name = backend_label,
                                 line=dict(color=backend_color, width=3)
                                 )
                      
                     )

        if ci > 0:
            ### compute confidence interval
            sem_coeff = get_sem_coeff(ci)
            ci_low = [m[i] - sem_coeff * sem[i] for i in range(len(mean))]
            ci_high = [m[i] + sem_coeff * sem[i] for i in range(len(mean))]
            ## from: https://stackoverflow.com/questions/70076213/how-to-add-95-confidence-interval-for-a-line-chart-in-plotly
            fig.add_traces([go.Scatter(x = alloc_sizes, 
                                       y = ci_low,
                                       mode = 'lines', 
                                       line_color = 'rgba(0,0,0,0)',
                                       showlegend = False
                                      ),
                            go.Scatter(x = alloc_sizes,
                                       y = ci_high,
                                       mode = 'lines',
                                       line_color = 'rgba(0,0,0,0)',
                                       name = str(ci * 100) + '% confidence interval',
                                       fill='tonexty',
                                       fillcolor = backend_color
                                      )])
            

    fig.update_layout(
        title = dict(text=function_name.upper(), font=dict(size=36)),
        xaxis = dict(title ="Allocation Size (bytes)"),
        yaxis = dict(title ="Time (ns)")
    )
    return fig
    

In [33]:
backends = {"cuda": ["Cuda", "green"], "hsa": ["ROCm", "red"], "ze": ["Level Zero", "blue"]}
function_name = "access"
plot = create_plot(key_data, backends, function_name, alloc_sizes)

In [34]:
plot.show()

In [161]:
plot.layout

Layout({
    'template': '...',
    'title': {'font': {'size': 36}, 'text': 'UNMAP', 'x': 0.5},
    'xaxis': {'title': {'text': 'Allocation Size (bytes)'}},
    'yaxis': {'title': {'text': 'Time (ns)'}}
})