In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

#df = pd.read_csv('../experiments/sw-data/amd.csv')
df = pd.read_csv('../experiments/data-sw/sigmax.csv')
#df = pd.read_csv('../experiments/sw-data/gpu_sigmax.csv')
#df = pd.read_csv('../experiments/sw-data/debug.csv')
#df = pd.read_csv('../experiments/sw-data/power_battery.csv')

df['time'] = df['time_alloc'] + df['time_parse'] + df['time_walk']
del df['bytes_out']
df['throughput'] = df['bytes_in'] / df['time']
# df.set_index(['framework', 'api', 'allocated', 'max_values', 'num_jsons'], inplace=True)
df.sort_values(['max_values', 'throughput'], inplace=True)

display(df)

Unnamed: 0,framework,api,allocated,num_jsons,bytes_in,time_alloc,time_parse,time_walk,max_values,time,throughput
3,RapidJSON,DOM,False,29086022,536870925,1.515e-06,4.43035,1.011e-06,1,4.430353,121180200.0
4,RapidJSON,DOM (in situ),False,29086022,536870925,1.059e-06,3.85299,1.005e-06,1,3.852992,139338700.0
9,cuDF,read_json,False,29086022,536870925,1.324e-06,3.39442,4.75e-07,1,3.394422,158162700.0
0,simdjson,DOM,False,29086022,536870925,8.06e-07,1e-06,2.2903,1,2.290302,234410500.0
1,simdjson,DOM,True,29086022,536870925,0.00199077,1e-06,2.08247,1,2.084462,257558500.0
5,RapidJSON,SAX,False,29086022,536870925,3.045e-06,2.04053,1.242e-06,1,2.040534,263103100.0
6,RapidJSON,SAX,True,29086022,536870925,0.00117669,1.80429,1.107e-06,1,1.805468,297358300.0
2,simdjson,DOM (no keys),True,29086022,536870925,0.00154706,1e-06,1.76052,1,1.762068,304682200.0
7,Custom,whitespaces,False,29086022,536870925,0.0011408,1.33566,1.179e-06,1,1.336802,401608400.0
8,Boost Spirit.X3,whitespace,False,29086022,536870925,0.00114207,1.09546,1.039e-06,1,1.096603,489576300.0


In [4]:
dg = df.groupby(['framework', 'max_values', 'bytes_in']).agg({'throughput':'max'})
dg.sort_values(by=['framework', 'max_values'], inplace=True)
dg['Throughput (GB/s)'] = dg.throughput * 1e-9
del dg['throughput']

In [None]:
dh = dg.unstack('framework')
dh.index.rename(inplace=True, names=['Max. values', 'Bytes in'])
dh = dh.droplevel(0, axis=1)
dh = dh.reindex(dh.mean().sort_values(ascending=False).index, axis=1)
display(dh)

In [None]:
import matplotlib.pyplot as plt

plt.rcParams.update({
    "text.usetex": True,
    "font.family": "serif",
    "font.serif": ["Palatino"],
    "font.size": 12
})

#dh.columns.rename(inplace=True, name='Framework')
#dh.index.rename(inplace=True, name='Max. values')

fig, ax = plt.subplots(figsize=(6, 6))
colors = ['#dc7ec0', '#4878d0', '#6acc64', '#d65f5f', '#d5bb67']

y_labels = [x[0] for x in dh.index.to_list()]
y_ticks = np.arange(0, len(y_labels))

impls = dh.columns.to_list()
bars = []
height = 1/(len(impls)+1)

for i, impl in enumerate(impls):
    x = dh[impl].to_numpy()
    y = np.arange(0, len(x))
    c = colors[i]
    if impl == "cuDF":
        c = '#AAAAAA'
        a = 0.4
        lw = 1.5
        ec = '#AAAAAA'
    else:
        a = 1
        lw = 0

    b = ax.barh(y=y+i*height, 
                width=x, 
                height=height, 
                color=c, 
                alpha=a, 
                linewidth=lw, 
                edgecolor=ec,
                ls='--')
    bars.append(b)

ax.set_ylim(-height, len(y_labels)-height)
ax.set_yticks(y_ticks + 0.5)
ax.set_yticklabels(y_labels)
ax.set_ylabel("Max. no. voltage values")
ax.invert_yaxis()

ax.set_xlabel("Throughput (GB/s)")

ax.grid(which='both')

ax.legend(bars, impls, ncol=3, bbox_to_anchor=(0.018, 1.0), frameon=False)

fig.savefig('software.pdf')