In [1]:
import plotly.graph_objects as pg
import pandas as pd

In [2]:
%load_ext autoreload
%autoreload 2
import dt4dds.analysis as analysis

data = analysis.GroupAnalysis([
    ('GCall', analysis.ErrorAnalysis("../../data/PCR/15c_Genscript_GCall")),
    ('GCall', analysis.ErrorAnalysis("../../data/PCR/20c_Genscript_GCall")),
    ('GCall', analysis.ErrorAnalysis("../../data/PCR/25c_Genscript_GCall")),
    ('GCfix', analysis.ErrorAnalysis("../../data/PCR/10c_Genscript_GCfix")),
    ('GCfix', analysis.ErrorAnalysis("../../data/PCR/15c_Genscript_GCfix")),
    ('GCfix', analysis.ErrorAnalysis("../../data/PCR/20c_Genscript_GCfix")),
    ('GCfix', analysis.ErrorAnalysis("../../data/PCR/25c_Genscript_GCfix")),
    ('GCall', analysis.ErrorAnalysis("../../data/Aging/0a_Genscript_GCall")),
    ('GCall', analysis.ErrorAnalysis("../../data/Aging/0b_Genscript_GCall")),
    ('GCall', analysis.ErrorAnalysis("../../data/Aging/2d_Genscript_GCall")),
    ('GCall', analysis.ErrorAnalysis("../../data/Aging/4d_Genscript_GCall")),
    ('GCall', analysis.ErrorAnalysis("../../data/Aging/7d_Genscript_GCall")),
    ('GCfix', analysis.ErrorAnalysis("../../data/Aging/0a_Genscript_GCfix")),
    ('GCfix', analysis.ErrorAnalysis("../../data/Aging/0b_Genscript_GCfix")),
    ('GCfix', analysis.ErrorAnalysis("../../data/Aging/2d_Genscript_GCfix")),
    ('GCfix', analysis.ErrorAnalysis("../../data/Aging/4d_Genscript_GCfix")),
    ('GCfix', analysis.ErrorAnalysis("../../data/Aging/7d_Genscript_GCfix")),
])

In [3]:
def hex_to_rgb(hex_color: str) -> tuple:
    hex_color = hex_color.lstrip("#")
    return int(hex_color[0:2], 16), int(hex_color[2:4], 16), int(hex_color[4:6], 16)



plot_data = data.data[f"deletions_by_refposition"].copy()

plot_data['position_read'] = plot_data['position']

for exp in plot_data.exp.unique():
    plot_data.loc[(plot_data['exp'] == exp) & (plot_data['read'] == "1"), 'position'] = plot_data.loc[(plot_data['exp'] == exp) & (plot_data['read'] == "1"), 'position'].max() - plot_data.loc[(plot_data['exp'] == exp) & (plot_data['read'] == "1"), 'position']




colors = ["#08519c", "#a50f15"]

fig = pg.Figure()
fig.update_layout(
    xaxis=dict(
        anchor='y',
        title_text='Position in synthesis direction', 
        dtick=20, 
        range=[0, 120],
        minor_ticks="outside", 
        minor_dtick=10,
        tickfont_size=28/3, 
    ),
    yaxis = dict(
        anchor='x',
        title_text='Deletion rate / 10<sup>-3</sup> nt<sup>-1</sup>',
        range=[0, 60],
        dtick=10, 
        minor_ticks="outside", 
        minor_dtick=5,
        tickfont_size=28/3, 
    ),
    xaxis2=dict(
        domain=[0.2, 0.75],
        anchor='y2',
        title_text="",
        range=[0, 110],
        dtick=40,
        minor_ticks="outside", 
        minor_dtick=20,
        tickfont_size=28/3, 
    ),
    yaxis2 = dict(
        domain=[0.6, 0.75],
        anchor='x2',
        title_text="",
        range=[0.25, 1.25],
        dtick=0.5,
        minor_ticks="outside", 
        minor_dtick=0.25,
        tickfont_size=28/3, 
    )
)




for i, group in enumerate(plot_data.group.unique()):
    idata = plot_data.loc[(plot_data['group'] == group)]

    maxdata = idata.groupby('position').agg({'rate': 'max'}).reset_index()
    fig.add_trace(
        pg.Scatter(
            x=maxdata['position'],
            y=1000*maxdata.rolling(3, min_periods=1, on="position", center=True)['rate'].mean(),
            line_width=0,
        )
    )
    mindata = idata.groupby('position').agg({'rate': 'min'}).reset_index()
    fig.add_trace(
        pg.Scatter(
            x=mindata['position'],
            y=1000*mindata.rolling(3, min_periods=1, on="position", center=True)['rate'].mean(),
            fill='tonexty',
            line_width=0,
            fillcolor=f"rgba{(*hex_to_rgb(colors[i]), 0.25)}"
        )
    )

    idata = plot_data.loc[(plot_data['group'] == group)]
    meandata = idata.groupby('position').agg({'rate': 'median'}).reset_index()

    fig.add_trace(
        pg.Scatter(
            x=meandata['position'],
            y=1000*meandata['rate'],
            line_color=colors[i],
            line_width=1.5,
        )

    )




fig.add_shape(type="rect",
    x0=7, y0=8, x1=18, y1=35,
    fillcolor="#999999",
    layer='below'
)

fig.add_annotation(
    x=27,
    y=25,
    text=f"index<br>region",
    align='left',
    showarrow=False,
    font_color='#999999',
    font_family="Inter", 
    font_size=28/3,
)

mean = plot_data.loc[plot_data.group == "GCall"].copy().groupby('position')['rate'].mean().mean()*1000
fig.add_annotation(
    x=70,
    y=mean-7,
    text=f"mean: {mean:.1f} · 10<sup>-3</sup> nt<sup>-1</sup>",
    showarrow=False,
    font_color=colors[0],
    font_family="Inter", 
    font_size=28/3,
)

mean = plot_data.loc[plot_data.group == "GCfix"].copy().groupby('position')['rate'].mean().mean()*1000
fig.add_annotation(
    x=70,
    y=mean+1,
    text=f"mean: {mean:.1f} · 10<sup>-3</sup> nt<sup>-1</sup>",
    showarrow=False,
    font_color=colors[1],
    font_family="Inter", 
    font_size=28/3,
)

fig.update_traces(
    marker=dict(), 
    selector=dict(mode='markers'),
)

fig.update_layout(
    template="simple_white", 
    height=330, 
    width=330, 
    showlegend=False, 
    margin=dict(l=0, r=10, t=0, b=0),
    font_family="Inter",
    legend_font_size=28/3,
)
fig.update_xaxes(
    title_font_family="Inter", 
    title_font_size=28/3, 
)
fig.update_yaxes(
    title_font_family="Inter", 
    title_font_size=28/3, 
)
fig.show()
fig.write_image("error_rates.svg")

In [4]:
for group in plot_data.group.unique():

    meandata = plot_data.loc[plot_data.group == group].copy()

    position_group = meandata.groupby('position')['rate'].mean()
    print(f"{group}: {position_group.mean()}")
    rel_rate = position_group/position_group.mean()
    pd.DataFrame(rel_rate).to_csv(f"del_rate_Genscript_{group}.csv", index=False)

GCall: 0.011810834555930547
GCfix: 0.016279815974109397


In [5]:
for group in plot_data.group.unique():
    meandata = plot_data.loc[(plot_data.group == group) & (plot_data.position <= 102)].copy()
    position_group = meandata.groupby('position')['rate'].mean()
    print(f"{group}: {position_group.mean()}")

GCall: 0.011810834555930547
GCfix: 0.01355371009343366
