In [1]:
import pandas as pd
import statsmodels.formula.api as smf
import statsmodels.api as sm
import plotly.express as px
import plotly.graph_objects as pg
import numpy as np
import sklearn.metrics
import scipy.stats

In [2]:
%load_ext autoreload
%autoreload 2
import dt4dds.analysis as analysis

data = analysis.GroupAnalysis([
    ('M_exp', analysis.ErrorAnalysis("../../data/DoT/Bunny_M")),
    ('P_exp', analysis.ErrorAnalysis("../../data/DoT/Bunny_P")),
    ('F1_exp', analysis.ErrorAnalysis("../../data/DoT/Bunny_F1")),
    ('F2_exp', analysis.ErrorAnalysis("../../data/DoT/Bunny_F2")),
    ('F3_exp', analysis.ErrorAnalysis("../../data/DoT/Bunny_F3")),
    ('F4_exp', analysis.ErrorAnalysis("../../data/DoT/Bunny_F4")),
    ('F5_exp', analysis.ErrorAnalysis("../../data/DoT/Bunny_F5")),
    ('M_sim', analysis.ErrorAnalysis("../../data/bunny_generations/Bunny_M")),
    ('P_sim', analysis.ErrorAnalysis("../../data/bunny_generations/Bunny_P")),
    ('F1_sim', analysis.ErrorAnalysis("../../data/bunny_generations/Bunny_F1")),
    ('F2_sim', analysis.ErrorAnalysis("../../data/bunny_generations/Bunny_F2")),
    ('F3_sim', analysis.ErrorAnalysis("../../data/bunny_generations/Bunny_F3")),
    ('F4_sim', analysis.ErrorAnalysis("../../data/bunny_generations/Bunny_F4")),
    ('F5_sim', analysis.ErrorAnalysis("../../data/bunny_generations/Bunny_F5")),
])

# error rates

In [3]:
plot_data = data.data['overall_error_rates'].copy()
plot_data[['Condition', 'Source']] = plot_data.group.str.split('_', expand=True)
plot_data = plot_data.pivot(index=['type', 'read', 'Condition'], columns='Source', values='rate')
plot_data = plot_data.reset_index()

In [4]:
fig = px.scatter(
    plot_data,
    x=plot_data["sim"]*1e3,
    y=plot_data["exp"]*1e3,
    color="type",
    hover_data=['Condition'],
    color_discrete_sequence=["#3182bd", "#756bb1", "#31a354", "#e6550d"]
)


fig.update_layout(
    template='simple_white',
    height=330,
    width=330,
    showlegend=False,
    margin=dict(l=0, r=10, t=10, b=0),
    font_family="Inter",
    legend_font_size=28/3,
)


fig.update_xaxes(
    title='Model error rate / 10<sup>-3</sup> nt<sup>-1</sup>',
    range=[0, 17.5],
    dtick=5,
    minor_ticks="outside", 
    minor_dtick=2.5,
    title_font_family="Inter",
    title_font_size=28/3, 
    tickfont_size=28/3
)
fig.update_yaxes(
    title='Experimental error rate / 10<sup>-3</sup> nt<sup>-1</sup>',
    range=[0, 17.5],
    dtick=5,
    minor_ticks="outside", 
    minor_dtick=2.5,
    title_font_family="Inter",
    title_font_size=28/3, 
    tickfont_size=28/3
)


fig.add_trace(
    pg.Scatter(
        x=np.linspace(0, 100), 
        y=1.2*np.linspace(0, 100),
        line_color="#dddddd",
        line_width=1,
        fill='tonexty', 
        fillcolor="#dddddd",
    )
)
fig.add_trace(
    pg.Scatter(
        x=np.linspace(0, 100), 
        y=0.8*np.linspace(0, 100),
        line_color="#dddddd",
        line_width=1,
    )
)

fig.data = fig.data[::-1]

fig.add_trace(
    pg.Scatter(
        x=np.linspace(0, 100), 
        y=np.linspace(0, 100),
        line_color="#000000",
        line_width=1,
    )
)

fig.add_annotation(
    x=3, y=15,
    text=f"R2: {sklearn.metrics.r2_score(plot_data['exp'], plot_data['sim']):0.2f}<br>MAE: {sklearn.metrics.mean_absolute_error(1e3*plot_data['exp'], 1e3*plot_data['sim']):0.2f}<br>COR: {scipy.stats.pearsonr(plot_data['exp'], plot_data['sim'])[0]:0.2f}",
    showarrow=False,
    align="left",
    font_family="Inter",
    font_size=28/3, 
)


fig.show()
fig.write_image("comparison_errors.svg")

# coverage bias

In [5]:
%load_ext autoreload
%autoreload 2
import dt4dds.analysis as analysis

data = analysis.DistributionAnalysis({
    'M_exp': '../../data/DoT/Bunny_M/scafstats.txt',
    'P_exp': '../../data/DoT/Bunny_P/scafstats.txt',
    'F1_exp': '../../data/DoT/Bunny_F1/scafstats.txt',
    'F2_exp': '../../data/DoT/Bunny_F2/scafstats.txt',
    'F3_exp': '../../data/DoT/Bunny_F3/scafstats.txt',
    'F4_exp': '../../data/DoT/Bunny_F4/scafstats.txt',
    'F5_exp': '../../data/DoT/Bunny_F5/scafstats.txt',
    'M_sim': '../../data/bunny_generations/Bunny_M/scafstats.txt',
    'P_sim': '../../data/bunny_generations/Bunny_P/scafstats.txt',
    'F1_sim': '../../data/bunny_generations/Bunny_F1/scafstats.txt',
    'F2_sim': '../../data/bunny_generations/Bunny_F2/scafstats.txt',
    'F3_sim': '../../data/bunny_generations/Bunny_F3/scafstats.txt',
    'F4_sim': '../../data/bunny_generations/Bunny_F4/scafstats.txt',
    'F5_sim': '../../data/bunny_generations/Bunny_F5/scafstats.txt',
})

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Missing sequences: 70, 0.58%
Missing sequences: 90, 0.75%
Missing sequences: 185, 1.54%
Missing sequences: 136, 1.13%
Missing sequences: 191, 1.59%
Missing sequences: 336, 2.80%
Missing sequences: 240, 2.00%
Missing sequences: 36, 0.30%
Missing sequences: 59, 0.49%
Missing sequences: 105, 0.88%
Missing sequences: 159, 1.32%
Missing sequences: 240, 2.00%
Missing sequences: 365, 3.04%
Missing sequences: 439, 3.66%


Unnamed: 0_level_0,mean,total
exp,Unnamed: 1_level_1,Unnamed: 2_level_1
F1_exp,139.7155,1676586.0
F1_sim,332.255333,3987064.0
F2_exp,147.329667,1767956.0
F2_sim,332.1615,3985938.0
F3_exp,163.638333,1963660.0
F3_sim,332.151833,3985822.0
F4_exp,150.436667,1805240.0
F4_sim,331.990667,3983888.0
F5_exp,645.079667,7740956.0
F5_sim,331.971667,3983660.0


In [6]:
plot_data = data.data.copy()
plot_data.drop(plot_data.loc[plot_data.assignedReads == 0].index, inplace=True)
plot_data[['Condition', 'Source']] = plot_data.exp.str.split('_', expand=True)

fig = px.histogram(
    plot_data, 
    x="x",  
    barmode='overlay',
    facet_row="Source",
    facet_row_spacing=0.05,
    facet_col="Condition",
    color_discrete_sequence=["#525252", "#cccccc"],
    range_x=[0, 2], 
    range_y=[0, 3], 
    opacity=0.65,
    histnorm='probability density'
)
fig.update_traces(xbins={'start': 0.0, 'end': 4, 'size': 0.1}, selector=dict(type='histogram'))

colors = ["#525252", "#737373"]
for i, source in enumerate(plot_data.Source.unique()):
    for j, cond in enumerate(plot_data.Condition.unique()):
        window = plot_data.loc[(plot_data.Source == source) & (plot_data.Condition == cond)].copy()
        fig.add_annotation(x=1.1, y=1.65,
            text=f"σ = {np.log(window.x).std():0.2f}",
            showarrow=False,
            yshift=0, 
            col=1+j,
            row=2-i,
            font_family="Inter",
            font_size=28/3,
        )

fig.for_each_xaxis(lambda xaxis: xaxis.update(dtick=1))
fig.for_each_yaxis(lambda yaxis: yaxis.update(dtick=1))
fig.update_traces(marker=dict(line_width=0), selector=dict(type='histogram')) 
fig.update_xaxes(
    title_text='',
    title_font_family="Inter",
    title_font_size=28/3, 
    tickfont_size=28/3,
)
fig.update_xaxes(
    title_text='Norm. cov.', 
    row=1
)
fig.update_yaxes(
    title_text='', 
    title_font_family="Inter",
    title_font_size=28/3, 
    tickfont_size=28/3,
)
fig.update_yaxes(
    title_text='Probability density', 
    col=1
)
fig.update_layout(
    template="simple_white", 
    height=300, 
    width=600,
    showlegend=False, 
    margin=dict(l=0, r=0, t=20, b=0),
    font_family="Inter",
    legend_font_size=28/3,
)
fig.show()
fig.write_image("comparison_coverage.svg")

In [7]:
plot_data = data.data.copy()
plot_data.drop(plot_data.loc[plot_data.assignedReads == 0].index, inplace=True)
plot_data['lnx'] = np.log(plot_data.x)

def std(group):
    d = {}
    d['std'] = np.std(group['lnx'])
    return pd.Series(d, index=['std'])


plot_data = plot_data.groupby("exp").apply(std).reset_index()

plot_data[['Condition', 'Source']] = plot_data.exp.str.split('_', expand=True)
plot_data = plot_data.pivot(index=['Condition'], columns='Source', values='std')
plot_data = plot_data.reset_index()

fig = px.scatter(
    plot_data,
    x="sim",
    y="exp",
    hover_data=['Condition'],
)


fig.update_layout(
    template='simple_white',
    height=330,
    width=330,
    showlegend=False,
    margin=dict(l=0, r=10, t=10, b=0),
    font_family="Inter",
    legend_font_size=28/3,
)


fig.update_xaxes(
    title='Model bias',
    range=[0, 2.2],
    dtick=0.5,
    minor_ticks="outside", 
    minor_dtick=0.25,
    title_font_family="Inter",
    title_font_size=28/3, 
    tickfont_size=28/3
)
fig.update_yaxes(
    title='Experimental bias',
    range=[0, 2.2],
    dtick=0.5,
    minor_ticks="outside", 
    minor_dtick=0.25,
    title_font_family="Inter",
    title_font_size=28/3, 
    tickfont_size=28/3
)


fig.add_trace(
    pg.Scatter(
        x=np.linspace(0, 10), 
        y=1.2*np.linspace(0, 10),
        line_color="#dddddd",
        line_width=1,
        fill='tonexty', 
        fillcolor="#dddddd",
    )
)
fig.add_trace(
    pg.Scatter(
        x=np.linspace(0, 10), 
        y=0.8*np.linspace(0, 10),
        line_color="#dddddd",
        line_width=1,
    )
)

fig.data = fig.data[::-1]

fig.add_trace(
    pg.Scatter(
        x=np.linspace(0, 10), 
        y=np.linspace(0, 10),
        line_color="#000000",
        line_width=1,
    )
)

fig.add_annotation(
    x=0.35, y=2,
    text=f"R2: {sklearn.metrics.r2_score(plot_data['exp'], plot_data['sim']):0.2f}<br>MAE: {sklearn.metrics.mean_absolute_error(plot_data['exp'], plot_data['sim']):0.2f}<br>COR: {scipy.stats.pearsonr(plot_data['exp'], plot_data['sim'])[0]:0.2f}",
    showarrow=False,
    align="left",
    font_family="Inter",
    font_size=28/3, 
)


fig.show()
fig.write_image("comparison_bias.svg")