In [None]:
%load_ext autoreload
%autoreload 2
import dt4dds_benchmark
import plotly.express as px
import pandas as pd
import numpy as np

data = dt4dds_benchmark.analysis.Dataset.combine(*[dt4dds_benchmark.pipelines.HDF5Manager(f'./data/{w}/{s}.hdf5').get_data() for s in (
    'aeon_high',
    'aeon_low',
    'aeon_medium',
    'fountain_high',
    'fountain_low',
    'fountain_medium',
    'goldman_default',
    'rs_high',
    'rs_low',
    'rs_medium',
    'hedges_low',
    'hedges_medium',
    'yinyang_default',
) for w in (
    'insertion',
    'deletion',
    'substitution',
)])

### check fit of simulation results

In [None]:
for c in data.separate_by_parameters(['codec.type', 'codec.name', 'clustering.name', 'clustering.type', 'metadata.name']):
    c.fit('workflow.overall_rate').plot(title_columns=['codec.type', 'codec.name', 'clustering.name', 'clustering.type', 'metadata.name']).show()

### get the threshold values by codec, clustering, and scenario

In [None]:
df = data.get_fits_by_group(['codec.type', 'codec.name', 'clustering.name', 'clustering.type', 'metadata.name'], on='workflow.overall_rate', additional_agg={'code_rate': 'mean'})
df['code_rate'] = df['code_rate'].map('{:.2f}'.format)

df

### plot substitution, deletion, and insertion

In [None]:
plotdf = df.loc[df['metadata.name'].isin(['substitution', 'deletion', 'insertion'])].copy()
plotdf['id'] = plotdf['codec.type'] + '_' + plotdf['codec.name'] + '_' + plotdf['clustering.type']
plotdf['codec.type'] = plotdf['codec.type'].str.replace('YinYang', 'YY').replace('Goldman', 'GM')
plotdf = plotdf.loc[plotdf['clustering.type'] != 'BasicSet'].copy()

plotdf

In [None]:
fig = dt4dds_benchmark.analysis.plotting.tiered_bar(
    plotdf.sort_values(['codec.type', 'code_rate', 'metadata.name']),
    "codec.type",
    "code_rate",
    "threshold",
    color_by = "metadata.name",
    # error_upper = "threshold_50%",
    # error_lower = "threshold_99%",
    color_discrete_map={'substitution': '#e6550d', 'deletion': '#3182bd', 'insertion': '#2ca25f'},
)
fig.update_yaxes(
    title_text='Error rate per nt',
    tickformat=",.0%",
    range=[0, 0.15],
    # type="log",
    dtick=0.05,
)
fig.update_layout(
    width=320,
    height=120,
    margin=dict(l=0, r=2, t=2, b=30),
    showlegend=False,
)
fig.add_hline(
    y=plotdf.loc[plotdf['codec.type'] == 'YY', 'threshold'].mean(),
    line_dash='solid',
    line_color='#252525',
    line_width=1,
)


fig = dt4dds_benchmark.analysis.plotting.standardize_plot(fig)
fig.update_xaxes(
    tickfont_size=28/3, 
    tickangle=0,
)
fig.show()
fig.write_image(f'./figures/individual_plot.svg')
fig.write_image(f'./figures/individual_plot.png', scale=2)

### Check duration and memory constraints

In [None]:
performancedf = pd.merge(data.combined_performances, data.results)
# performancedf = performancedf.drop(performancedf.loc[performancedf['decoding_success'] == False].index)
performancedf = performancedf.drop(performancedf.loc[performancedf['metadata.name'] == 'dropout'].index)
performancedf = performancedf.drop(performancedf.loc[performancedf['identifier'] != b'decoding'].index)
performancedf['code_rate'] = performancedf['code_rate'].map('{:.2f}'.format)
performancedf.loc[performancedf['code_rate'] == '1.51', 'code_rate'] = '1.50'
performancedf.loc[performancedf['code_rate'] == '1.01', 'code_rate'] = '1.00'
performancedf.loc[performancedf['clustering.type'] == 'BasicSet', 'clustering.type'] = 'Naive'
performancedf.loc[performancedf['clustering.type'] != 'Naive', 'clustering.type'] = 'Clustering'

for codec in performancedf['codec.type'].unique():
    idf = performancedf.loc[(performancedf['codec.type'] == codec)].copy()
    idf['codec.order'] = idf['codec.name'].map({'high': 0, 'medium': 1, 'low': 2, 'default': 3})
    idf = idf.sort_values(['codec.order', 'clustering.type'], ascending=False)
    idf['duration'] = idf['duration'] / 60
    idf['name'] = idf['code_rate'] + " bit/nt"

    fig = px.scatter(
        idf,
        x='workflow.overall_rate',
        y='duration',
        color='metadata.name',
        facet_col='name',
        facet_row='clustering.type',
        facet_row_spacing=0.2,
        facet_col_spacing=0.075,
        symbol='decoding_success',
        color_discrete_map={'overall': '#636363', 'substitution': '#de2d26', 'insertion': '#31a354', 'deletion': '#3182bd'},
        symbol_map={True: 'circle', False: 'circle-open'},
    )
    fig.add_hline(y=60, line_dash="dot", line_color="black", line_width=2)
    fig.update_xaxes(matches=None)
    fig.for_each_xaxis(lambda xaxis: xaxis.update(range=[0, 0.15], minor_dtick=0.025, dtick=0.05))
    fig.for_each_yaxis(lambda yaxis: yaxis.update(range=[0, 61], minor_dtick=10))
    fig.update_xaxes(title='Error rate per nt', row=1)
    fig.update_yaxes(title='Runtime / min', col=1)
    fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
    fig.update_layout(
        width=315,
        height=200,
        margin=dict(l=0, r=10, t=20, b=10),
        showlegend=False,
    )
    fig = dt4dds_benchmark.analysis.plotting.standardize_plot(fig)
    fig.show()
    fig.write_image(f'./figures/duration_{codec}.svg')
    fig.write_image(f'./figures/duration_{codec}.png', scale=2)

    fig = px.scatter(
        idf,
        x='workflow.overall_rate',
        y='memory_value',
        color='metadata.name',
        facet_col='name',
        facet_row='clustering.type',
        facet_row_spacing=0.2,
        facet_col_spacing=0.075,
        symbol='decoding_success',
        color_discrete_map={'overall': '#636363', 'substitution': '#de2d26', 'insertion': '#31a354', 'deletion': '#3182bd'},
        symbol_map={True: 'circle', False: 'circle-open'},
    )
    fig.add_hline(y=8, line_dash="dot", line_color="black", line_width=2)
    fig.update_xaxes(matches=None)
    fig.for_each_xaxis(lambda xaxis: xaxis.update(range=[0, 0.15], minor_dtick=0.025, dtick=0.05))
    fig.for_each_yaxis(lambda yaxis: yaxis.update(range=[0, 8.1], minor_dtick=1))
    fig.update_xaxes(title='Error rate per nt', row=1)
    fig.update_yaxes(title='Memory / GB', col=1)
    fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
    fig.update_layout(
        width=315,
        height=200,
        margin=dict(l=0, r=10, t=20, b=10),
        showlegend=False,
    )
    fig = dt4dds_benchmark.analysis.plotting.standardize_plot(fig)
    fig.show()
    fig.write_image(f'./figures/memory_{codec}.svg')
    fig.write_image(f'./figures/memory_{codec}.png', scale=2)