In [None]:
%load_ext autoreload
%autoreload 2
import dt4dds_benchmark
import plotly.express as px
import pandas as pd

data = dt4dds_benchmark.analysis.Dataset.combine(*[dt4dds_benchmark.pipelines.HDF5Manager(f'./data/{w}/{s}.hdf5').get_data() for s in (
    'aeon_high',
    'aeon_low',
    'aeon_medium',
    'fountain_high',
    'fountain_low',
    'fountain_medium',
    'goldman_default',
    'rs_high',
    'rs_low',
    'rs_medium',
    'hedges_low',
    'hedges_medium',
    'yinyang_default',
) for w in (
    'serialdilution',
    'serialpcr',
    'downsampling',
)])

### group by codec type/settings and workflow

In [None]:
df = data.get_aggregated_results('decoding_success', additional_agg={'code_rate': 'mean'})
df['code_rate'] = df['code_rate'].map('{:.2f}'.format)
df.loc[df['code_rate'] == "1.51", 'code_rate'] = "1.50"
df.loc[df['code_rate'] == "1.01", 'code_rate'] = "1.00"
df['workflow.iterations'] = df[['workflow.n_pcrs', 'workflow.n_dilutions', 'workflow.coverage']].max(axis=1)
df = df.groupby(['workflow.type', 'workflow.iterations', 'codec.type', 'codec.name', 'code_rate'])['decoding_success'].max().reset_index()

df

In [None]:
px.scatter(
    df,
    x="workflow.iterations",
    y="decoding_success",
    color="codec.name",
    facet_row="workflow.type",
    facet_col="codec.type"
)

### plot the thresholds with references as comparison

In [None]:
# select highest iteration for each codec
plot_df = df.loc[(df['workflow.type'] == 'SerialDilution') & (df['decoding_success'] == 1)].copy()
plot_df = plot_df.groupby(['workflow.type', 'codec.type', 'codec.name', 'code_rate'])['workflow.iterations'].max().reset_index()
plot_df['workflow.iterations'] += 1 # DNA-Fountain paper counts the master pool as first dilution

# append a row to the df for the goldman codec with a workflow.iterations of 0 to ensure it shows up in the plot
if 'Goldman' not in plot_df['codec.name'].values:
    plot_df = pd.concat([plot_df, pd.DataFrame({'workflow.type': 'SerialDilution', 'codec.type': 'Goldman', 'codec.name': 'default', 'code_rate': '0.34', 'workflow.iterations': 0}, index=[0])])
plot_df['codec.type'] = plot_df['codec.type'].str.replace('YinYang', 'YY').replace('Goldman', 'GM')

fig = dt4dds_benchmark.analysis.plotting.tiered_bar(
    plot_df.sort_values(['codec.type', 'code_rate', 'workflow.type']),
    "codec.type",
    "code_rate",
    "workflow.iterations",
    color_by = "workflow.type",
    color_discrete_map = {
        'SerialDilution': '#3182bd',
    },
)
fig.update_layout(
    width=320,
    height=100,
    margin=dict(l=50, r=2, t=10, b=30),
    showlegend=False,
)
fig.update_yaxes(title_text='Dilution iterations', range=[1, 7.1], dtick=2, minor_dtick=1, tick0=1)
fig.add_hline(y=4, line_dash='dot', line_color='black', line_width=2)
fig.add_hline(y=5, line_dash='dash', line_color='black', line_width=2)
fig.add_hline(y=6, line_dash='solid', line_color='black', line_width=2)

fig = dt4dds_benchmark.analysis.plotting.standardize_plot(fig)
fig.update_xaxes(
    tickfont_size=28/3, 
    tickangle=0,
)
fig.show()
fig.write_image(f'./figures/workflows_dilution.svg')
fig.write_image(f'./figures/workflows_dilution.png', scale=2)

In [None]:
# select highest iteration for each codec
plot_df = df.loc[(df['workflow.type'] == 'SerialPCR') & (df['decoding_success'] == 1)].copy()
plot_df = plot_df.groupby(['workflow.type', 'codec.type', 'codec.name', 'code_rate'])['workflow.iterations'].max().reset_index()

# append a row to the df for the goldman codec with a workflow.iterations of 0 to ensure it shows up in the plot
if 'Goldman' not in plot_df['codec.name'].values:
    plot_df = pd.concat([plot_df, pd.DataFrame({'workflow.type': 'SerialPCR', 'codec.type': 'Goldman', 'codec.name': 'default', 'code_rate': '0.34', 'workflow.iterations': 0}, index=[0])])
plot_df['codec.type'] = plot_df['codec.type'].str.replace('YinYang', 'YY').replace('Goldman', 'GM')

fig = dt4dds_benchmark.analysis.plotting.tiered_bar(
    plot_df.sort_values(['codec.type', 'code_rate', 'workflow.type']),
    "codec.type",
    "code_rate",
    "workflow.iterations",
    color_by = "workflow.type",
    color_discrete_map = {
        'SerialPCR': '#e6550d',
    },
)
fig.update_layout(
    width=320,
    height=100,
    margin=dict(l=50, r=2, t=10, b=30),
    showlegend=False,
)
fig.update_yaxes(title_text='PCR iterations', range=[1, 9.1], dtick=4, minor_dtick=1, tick0=1)
fig.add_hline(y=9, line_dash='dot', line_color='black', line_width=2)


fig = dt4dds_benchmark.analysis.plotting.standardize_plot(fig)
fig.update_xaxes(
    tickfont_size=28/3, 
    tickangle=0,
)
fig.show()
fig.write_image(f'./figures/workflows_pcr.svg')
fig.write_image(f'./figures/workflows_pcr.png', scale=2)

In [None]:
# select lowest coverage for each codec
plot_df = df.loc[(df['workflow.type'] == 'Downsampling') & (df['decoding_success'] == 1)].copy()
plot_df = plot_df.groupby(['workflow.type', 'codec.type', 'codec.name', 'code_rate'])['workflow.iterations'].min().reset_index()

# append a row to the df for the yin yang codec with a workflow.iterations of 0 to ensure it shows up in the plot
if 'YinYang' not in plot_df['codec.name'].values:
    plot_df = pd.concat([plot_df, pd.DataFrame({'workflow.type': 'Downsampling', 'codec.type': 'YinYang', 'codec.name': 'default', 'code_rate': '1.85', 'workflow.iterations': 0}, index=[0])])
plot_df['codec.type'] = plot_df['codec.type'].str.replace('YinYang', 'YY').replace('Goldman', 'GM')

fig = dt4dds_benchmark.analysis.plotting.tiered_bar(
    plot_df.sort_values(['codec.type', 'code_rate', 'workflow.type']),
    "codec.type",
    "code_rate",
    "workflow.iterations",
    color_by = "workflow.type",
    color_discrete_map = {
        'Downsampling': '#31a354',
    },
)
fig.update_layout(
    width=320,
    height=100,
    margin=dict(l=50, r=2, t=10, b=30),
    showlegend=False,
)
fig.update_yaxes(title_text='Sequencing depth', range=[0, 10], dtick=5, minor_dtick=1, tick0=0)
fig.add_hline(y=4, line_dash='solid', line_color='black', line_width=2)


fig = dt4dds_benchmark.analysis.plotting.standardize_plot(fig)
fig.update_xaxes(
    tickfont_size=28/3, 
    tickangle=0,
)
fig.show()
fig.write_image(f'./figures/workflows_downsampling.svg')
fig.write_image(f'./figures/workflows_downsampling.png', scale=2)