In [None]:
import numpy as np
import pandas as pd
import scipy.stats
import plotly.express as px
import plotly.graph_objects as pg
from scipy.optimize import curve_fit

import dt4dds.analysis.dataaggregation as analysis

import sys
sys.path.append('..')
import plotting

In [None]:
data = analysis.GroupAnalysis([
    ('Lietard_Normal', analysis.ErrorAnalysis("../data_experimental/Photolithographic_Lietard/normal/analysis", local=True, paired=False)),
    ('Lietard_Capped', analysis.ErrorAnalysis("../data_experimental/Photolithographic_Lietard/capped/analysis", local=True, paired=False)),
    ('Lietard_Spaced', analysis.ErrorAnalysis("../data_experimental/Photolithographic_Lietard/spaced/analysis", local=True, paired=False)),
    ('Lietard_highdensity', analysis.ErrorAnalysis("../data_experimental/Photolithographic_Lietard/high_density/analysis", local=True, paired=False)),
    ('Antkowiak_File1', analysis.ErrorAnalysis("../data_experimental/Photolithographic_Antkowiak/File1/analysis", local=True, paired=False)),
    ('Antkowiak_File2', analysis.ErrorAnalysis("../data_experimental/Photolithographic_Antkowiak/File2/analysis", local=True, paired=False)),
    ('Antkowiak_File3', analysis.ErrorAnalysis("../data_experimental/Photolithographic_Antkowiak/raw/File3/analysis", local=True, paired=False)),
])
order = ['Lietard_Normal', 'Lietard_Capped', 'Lietard_Spaced', 'Lietard_highdensity', 'Antkowiak_File1', 'Antkowiak_File2', 'Antkowiak_File3']

# Positional error rates

In [None]:
limits = {
    'Normal': (8, 51),
    'Capped': (8, 51),
    'Spaced': (8, 51),
    'File1': (7, 40),
}
order = ['Normal', 'Capped', 'Spaced', 'File1']

## Deletions

In [None]:
for errortype in ['substitutions', 'insertions', 'deletions']:


    plot_data = data.data[f'{errortype}_by_refposition'].copy()

    # plot_data['skip'] = False
    # for group, (lower, upper) in limits.items():
    #     plot_data.loc[(plot_data.group == group) & ((plot_data.position < lower) | (plot_data.position > upper)), 'skip'] = True

    fig = px.line(
        plot_data,
        x='position',
        y='rate',
        color="exp",
    )


    fig.update_layout(
        height=200,
        width=300,
        showlegend=False,
        margin=dict(l=0, r=10, t=10, b=0),
    )

    fig = plotting.standardize_plot(fig)
    fig.show()

    # display(plot_data.loc[plot_data.skip == False].groupby(['group']).agg(mean=('rate', 'mean'), std=('rate', 'std')))

In [None]:
plot_data = data.data['deletions_by_refposition_by_type'].copy()

plot_data['skip'] = False
for group, (lower, upper) in limits.items():
    plot_data.loc[(plot_data.group == group) & ((plot_data.position < lower) | (plot_data.position > upper)), 'skip'] = True


fig = px.line(
    plot_data,
    x='position',
    y='ratio',
    color="type",
    facet_row="exp",
)
fig.update_traces(
    line={"dash": "dash"}
)

for i, group in enumerate(order):
    for j, type in enumerate(plot_data.type.unique()):
        fig.add_trace(
            pg.Scatter(
                x=plot_data.loc[(plot_data.group == group) & (plot_data.skip == False) & (plot_data.type == type), 'position'],
                y=plot_data.loc[(plot_data.group == group) & (plot_data.skip == False) & (plot_data.type == type), 'ratio'],
                mode="lines",
            ),
            row=len(order)-i,
            col=1
        )


fig.update_layout(
    template='simple_white',
    height=300,
    width=400,
    showlegend=False,
    margin=dict(l=0, r=0, t=0, b=0),
    font_family="Inter",
    legend_font_size=28/3,
)

fig.show()

In [None]:
overview_df = plot_data.loc[plot_data.skip != True].groupby(['group', 'type']).agg(mean=('ratio', 'mean'), std=('ratio', 'std')).reset_index()

fig = px.bar(overview_df,
    y="mean",
    x="type",
    error_y="std",
    color='group',
    barmode='group',
)


fig.show()

display(overview_df.groupby(['type']).agg(mean=('mean', 'mean')))

## Substitutions

In [None]:
plot_data = data.data['substitutions_by_refposition'].copy()

plot_data['skip'] = False
for group, (lower, upper) in limits.items():
    plot_data.loc[(plot_data.group == group) & ((plot_data.position < lower) | (plot_data.position > upper)), 'skip'] = True

fig = px.line(
    plot_data,
    x='position',
    y='rate',
    color="exp",
)
fig.update_traces(
    line={"dash": "dash"}
)

for group in order:
    fig.add_trace(
        pg.Scatter(
            x=plot_data.loc[(plot_data.group == group) & (plot_data.skip == False), 'position'],
            y=plot_data.loc[(plot_data.group == group) & (plot_data.skip == False), 'rate'],
            mode="lines",
        ),
    )




fig.update_layout(
    template='simple_white',
    height=300,
    width=400,
    showlegend=False,
    margin=dict(l=0, r=0, t=0, b=0),
    font_family="Inter",
    legend_font_size=28/3,
)

fig.show()

display(plot_data.loc[plot_data.skip == False].groupby(['group']).agg(mean=('rate', 'mean'), std=('rate', 'std')))

In [None]:
plot_data = data.data['substitutions_by_refposition_by_type'].copy()

plot_data['skip'] = False
for group, (lower, upper) in limits.items():
    plot_data.loc[(plot_data.group == group) & ((plot_data.position < lower) | (plot_data.position > upper)), 'skip'] = True


fig = px.line(
    plot_data,
    x='position',
    y='ratio',
    color="type",
    facet_row="exp",
)
fig.update_traces(
    line={"dash": "dash"}
)

for i, group in enumerate(order):
    for j, type in enumerate(plot_data.type.unique()):
        fig.add_trace(
            pg.Scatter(
                x=plot_data.loc[(plot_data.group == group) & (plot_data.skip == False) & (plot_data.type == type), 'position'],
                y=plot_data.loc[(plot_data.group == group) & (plot_data.skip == False) & (plot_data.type == type), 'ratio'],
                mode="lines",
            ),
            row=len(order)-i,
            col=1
        )


fig.update_layout(
    template='simple_white',
    height=300,
    width=400,
    showlegend=False,
    margin=dict(l=0, r=0, t=0, b=0),
    font_family="Inter",
    legend_font_size=28/3,
)

fig.show()

In [None]:
overview_df = plot_data.loc[plot_data.skip != True].groupby(['group', 'type']).agg(mean=('ratio', 'mean'), std=('ratio', 'std')).reset_index()

fig = px.bar(overview_df,
    y="mean",
    x="type",
    error_y="std",
    color='group',
    barmode='group',
)


fig.show()

overview_df['capped'] = False
overview_df.loc[overview_df.group == 'Capped', 'capped'] = True
display(overview_df.groupby(['capped', 'type']).agg(mean=('mean', 'mean')))

## Insertions

In [None]:
plot_data = data.data['insertions_by_refposition'].copy()

plot_data['skip'] = False
for group, (lower, upper) in limits.items():
    plot_data.loc[(plot_data.group == group) & ((plot_data.position < lower) | (plot_data.position > upper)), 'skip'] = True

fig = px.line(
    plot_data,
    x='position',
    y='rate',
    color="exp",
)
fig.update_traces(
    line={"dash": "dash"}
)

for group in order:
    fig.add_trace(
        pg.Scatter(
            x=plot_data.loc[(plot_data.group == group) & (plot_data.skip == False), 'position'],
            y=plot_data.loc[(plot_data.group == group) & (plot_data.skip == False), 'rate'],
            mode="lines",
        ),
    )




fig.update_layout(
    template='simple_white',
    height=300,
    width=400,
    showlegend=False,
    margin=dict(l=0, r=0, t=0, b=0),
    font_family="Inter",
    legend_font_size=28/3,
)

fig.show()

display(plot_data.loc[plot_data.skip == False].groupby(['group']).agg(mean=('rate', 'mean'), std=('rate', 'std')))

In [None]:
plot_data = data.data['insertions_by_refposition_by_type'].copy()

plot_data['skip'] = False
for group, (lower, upper) in limits.items():
    plot_data.loc[(plot_data.group == group) & ((plot_data.position < lower) | (plot_data.position > upper)), 'skip'] = True


fig = px.line(
    plot_data,
    x='position',
    y='ratio',
    color="type",
    facet_row="exp",
)
fig.update_traces(
    line={"dash": "dash"}
)

for i, group in enumerate(order):
    for j, type in enumerate(plot_data.type.unique()):
        fig.add_trace(
            pg.Scatter(
                x=plot_data.loc[(plot_data.group == group) & (plot_data.skip == False) & (plot_data.type == type), 'position'],
                y=plot_data.loc[(plot_data.group == group) & (plot_data.skip == False) & (plot_data.type == type), 'ratio'],
                mode="lines",
            ),
            row=len(order)-i,
            col=1
        )


fig.update_layout(
    template='simple_white',
    height=300,
    width=400,
    showlegend=False,
    margin=dict(l=0, r=0, t=0, b=0),
    font_family="Inter",
    legend_font_size=28/3,
)

fig.show()

In [None]:
overview_df = plot_data.groupby(['group', 'type']).agg(mean=('ratio', 'mean'), std=('ratio', 'std')).reset_index()

fig = px.bar(overview_df,
    y="mean",
    x="type",
    error_y="std",
    color='group',
    barmode='group',
)


fig.show()

display(overview_df.groupby(['type']).agg(mean=('mean', 'mean')))