In [15]:
import pandas as pd
import humanfriendly as hmf
import plotly.express as px
import seaborn as sns

In [16]:
# See bench.sh and bench-to-csv.pl for exporting `make bench` results to CSV
df = pd.read_csv('benchmark.csv', skipinitialspace=True)
df = df.sort_values(by=['instance'])

print(df)

      instance   size     method       time
0    c6a.large    16K    openssl   0m0.288s
19   c6a.large    64M    go-simd   0m4.153s
18   c6a.large    64M  go-native  0m14.832s
17   c6a.large    64M       node   0m4.229s
16   c6a.large    64M    openssl   0m5.092s
..         ...    ...        ...        ...
110  t4g.large    16M    go-simd   0m1.263s
111  t4g.large    64M    openssl   0m5.687s
112  t4g.large    64M       node   0m4.345s
104  t4g.large  1024K       node   0m0.129s
102  t4g.large   256K    go-simd   0m0.019s

[140 rows x 4 columns]


In [17]:
# https://aws.amazon.com/ec2/pricing/on-demand/
def get_cost_per_instance_min(row):
    if 'c6a.large' in row['instance']:
        return 0.0765 / 60
    elif 'c6g.large' in row['instance']:
        return 0.068 / 60
    elif 'c6i.large' in row['instance']:
        return 0.085 / 60   
    elif 'c7g.large' in row['instance']:
        return 0.0723 / 60
    elif 't4g.large' in row['instance']:
        return 0.0672 / 60
    elif 't2.large' in row['instance']:
        return 0.0928 / 60
    elif 't3.large' in row['instance']:
        return 0.0832 / 60                          

In [18]:
df['instance_cost_per_min'] = df.apply(lambda row: get_cost_per_instance_min(row), axis=1)

In [19]:
print(df)

      instance   size     method       time  instance_cost_per_min
0    c6a.large    16K    openssl   0m0.288s               0.001275
19   c6a.large    64M    go-simd   0m4.153s               0.001275
18   c6a.large    64M  go-native  0m14.832s               0.001275
17   c6a.large    64M       node   0m4.229s               0.001275
16   c6a.large    64M    openssl   0m5.092s               0.001275
..         ...    ...        ...        ...                    ...
110  t4g.large    16M    go-simd   0m1.263s               0.001120
111  t4g.large    64M    openssl   0m5.687s               0.001120
112  t4g.large    64M       node   0m4.345s               0.001120
104  t4g.large  1024K       node   0m0.129s               0.001120
102  t4g.large   256K    go-simd   0m0.019s               0.001120

[140 rows x 5 columns]


In [20]:
def convert_to_number(df, cols):
    for col in cols:
        df[col] = df[col].apply(lambda x: hmf.parse_size(x))
    return df

def parse_time_syntax(t):
    a = t.split('m')

    time = 0
    if(int(a[0]) > 0):
        time = hmf.parse_timespan("{}m".format(a[0]))
        time += hmf.parse_timespan(a[1])
    else:
        time += hmf.parse_timespan(a[1])

    return time
    
def convert_from_time(df, cols):
    for col in cols:
        df[col] = df[col].apply(lambda x: parse_time_syntax(x))
    return df

In [21]:
number_cols = ['time']
df_machinereadable = convert_from_time(df, number_cols)

number_cols = ['size']
df_machinereadable = convert_to_number(df, number_cols)


In [22]:
print(df)

      instance      size     method    time  instance_cost_per_min
0    c6a.large     16000    openssl   0.288               0.001275
19   c6a.large  64000000    go-simd   4.153               0.001275
18   c6a.large  64000000  go-native  14.832               0.001275
17   c6a.large  64000000       node   4.229               0.001275
16   c6a.large  64000000    openssl   5.092               0.001275
..         ...       ...        ...     ...                    ...
110  t4g.large  16000000    go-simd   1.263               0.001120
111  t4g.large  64000000    openssl   5.687               0.001120
112  t4g.large  64000000       node   4.345               0.001120
104  t4g.large   1024000       node   0.129               0.001120
102  t4g.large    256000    go-simd   0.019               0.001120

[140 rows x 5 columns]


In [23]:
#df_machinereadable = df.query("method = 'go-simd'")
df_machinereadable = df_machinereadable.reindex(sorted(df.columns), axis=1)
df_stats = df_machinereadable.groupby(['instance', 'size', 'method']).describe().mean()
for c in ['time']:
    print(df_stats[c]['mean'])
#print(df_stats)


2.792007142857143


In [24]:
for m in df_machinereadable['method'].unique(): 
    fig = px.box(df_machinereadable[df_machinereadable['method'] == m], x="instance", y="time", title=f"Instance speed for SHA {m} operations over range of block sizes")
    fig.show() 
    fig.write_html(f'datavis/exp1_speed_data_{m}.html')
    fig.write_image(f'assets/exp1_speed_data_{m}.png')

In [25]:
for m in df_machinereadable['method'].unique():
    fig = px.box(df_machinereadable[df_machinereadable['method'] == m], x="instance", y="instance_cost_per_min", title=f"Cost/performance ratio for SHA {m} operations over a range of block sizes (us-east-1)")
    fig.show()
    fig.write_html(f'datavis/exp1_price_data_{m}.html')    
    fig.write_image(f'assets/exp1_price_data_{m}.png')