In [9]:
import os
import pandas as pd

df = pd.read_csv(os.path.join('results', 'results-summary.csv'))

In [10]:
sorted(set(df.name.values))

{'bson',
 'cbor',
 'json',
 'msgpack',
 'orjson',
 'parquet',
 'pickle',
 'protobuf',
 'rapidjson',
 'ujson'}

In [11]:
# parquet & protobuf requires tuples
df = df[(df['dtype'] != 'tuples') | (df['name'] == 'parquet') | (df['name'] == 'protobuf')]

In [12]:
# single serde results
single_df = df[df['items'] == 1].copy()

# 1M objects serde results
million_df = df[df['items'] == 1_000_000].copy()
million_df['avg_object'] = million_df['avg'] / 1_000_000
million_df['avg_serde_object'] = million_df['avg_serde'] / 1_000_000
million_df['avg_serialized_size'] = million_df['serialized_size'] / 1_000_000

# 1M serialization results
ser_mil_df = million_df[million_df['fn'] == 'dump']

In [13]:
single_df.head()

Unnamed: 0,name,dtype,fn,items,avg,avg_serde,baseline-ratio,baseline-speedup,serialized_size
0,bson,dicts,dump,1,3.5e-05,2.3e-05,152.1739,0.6571,498
5,bson,dicts,load,1,1.1e-05,2.3e-05,61.1111,1.6364,498
10,cbor,dicts,dump,1,5e-06,5e-06,21.7391,4.6,435
15,cbor,dicts,load,1,6e-06,5e-06,33.3333,3.0,435
30,json,dicts,dump,1,2.3e-05,2.1e-05,100.0,1.0,934


In [14]:
million_df.head()

Unnamed: 0,name,dtype,fn,items,avg,avg_serde,baseline-ratio,baseline-speedup,serialized_size,avg_object,avg_serde_object,avg_serialized_size
4,bson,dicts,dump,1000000,25.494268,18.243811,253.6047,0.3943,446883139,2.5e-05,1.8e-05,446.883139
9,bson,dicts,load,1000000,10.993354,18.243811,77.6202,1.2883,446883139,1.1e-05,1.8e-05,446.883139
14,cbor,dicts,dump,1000000,3.369372,6.608316,33.5169,2.9836,372329891,3e-06,7e-06,372.329891
19,cbor,dicts,load,1000000,9.847261,6.608316,69.528,1.4383,372329891,1e-05,7e-06,372.329891
34,json,dicts,dump,1000000,10.052758,12.107881,100.0,1.0,781815927,1e-05,1.2e-05,781.815927


In [15]:
ser_mil_df.head()

Unnamed: 0,name,dtype,fn,items,avg,avg_serde,baseline-ratio,baseline-speedup,serialized_size,avg_object,avg_serde_object,avg_serialized_size
4,bson,dicts,dump,1000000,25.494268,18.243811,253.6047,0.3943,446883139,2.5e-05,1.8e-05,446.883139
14,cbor,dicts,dump,1000000,3.369372,6.608316,33.5169,2.9836,372329891,3e-06,7e-06,372.329891
34,json,dicts,dump,1000000,10.052758,12.107881,100.0,1.0,781815927,1e-05,1.2e-05,781.815927
54,msgpack,dicts,dump,1000000,2.375187,5.7842,23.6272,4.2324,372092463,2e-06,6e-06,372.092463
74,orjson,dicts,dump,1000000,1.552512,5.36606,15.4436,6.4752,432798085,2e-06,5e-06,432.798085


In [16]:
import altair as alt


def make_bars_chart(df, title, scale_title=None, avg_col='avg', sqrt_scale=True,
                   width=50, height=300, multi_bar=True, legend=True, labels=True):
    source = df.copy()

    if source[avg_col].median() > 1:
        avg_scale = 'secs'
        avg_coef = 1
    elif source[avg_col].median() > 0.001:
        avg_scale = 'ms'
        avg_coef = 1e3
    else:
        avg_scale = 'µs'
        avg_coef = 1e6

    source[avg_scale] = (df[avg_col] * avg_coef).round(2)
    
    if not scale_title:
        scale_titles = {
            'secs': 'seconds',
            'ms': 'milliseconds (1e−3 secs)',
            'µs': 'microseconds (1e−6 secs)',
        }
        scale_title = scale_titles[avg_scale]
    
    if sqrt_scale:
        y_scale = alt.Scale(type='sqrt')
    else:
        y_scale = alt.Scale()
    
    if multi_bar:
        x_val = 'fn:N'
        facet_kwds = {'column':'name:N'}
    else:
        x_val = 'name:N'
        facet_kwds = {}

    if legend:
        legend = alt.Legend()
    else:
        legend = None

    chart = alt.Chart(
        width=width,
        height=height,
    ).mark_bar(
        stroke='transparent',
        size=20,
    ).encode(
        alt.X(x_val, scale=alt.Scale(), axis=alt.Axis(title='', labels=labels)),
        alt.Y(f'{avg_scale}:Q', scale=y_scale, axis=alt.Axis(title=scale_title, grid=False)),
        color=alt.Color(x_val, scale=alt.Scale(range=["#FF7B06", "#094AFB"]), legend=legend),
    )

    text = chart.mark_text(
        color='black',
        dx = 0,
        dy = -2,
    ).encode(
        text=f'{avg_scale}:Q'
    )

    return alt.layer(chart, text, data=source).facet(
        **facet_kwds
    ).configure_axis(
        domainWidth=0.8
    ).configure_view(
        stroke='transparent'
    ).properties(
        title=title
    )

In [17]:
make_bars_chart(
    single_df,
    title='Serialization Time of a Single Object',
    sqrt_scale=True,
    labels=False,
)

In [18]:
make_bars_chart(
    million_df,
    avg_col='avg_object',
    title='Average Object Serialization Time (1M objects)',
    sqrt_scale=True,
    labels=False,
)

In [19]:
make_bars_chart(
    ser_mil_df,
    avg_col='avg_serde_object',
    title='Average Object Serialize & Deserialize Time (1M objects)',
    sqrt_scale=True,
    width=500,
    height=250,
    multi_bar=False,
    legend=False,
)

In [20]:
make_bars_chart(
    ser_mil_df,
    avg_col='avg_serialized_size',
    scale_title='bytes',
    title='Average Object Serialized Size (1M objects)',
    sqrt_scale=False,
    width=500,
    height=250,
    multi_bar=False,
    legend=False,
)