In [1]:
import numpy as np
import plotly.express as px
import plotly.graph_objects as pg
import scipy.stats
import pandas as pd
import dt4dds.analysis as analysis
rng = np.random.default_rng()

In [2]:
def preprocess(data, delta_cycles):
    df = data.data.copy()
    df.drop(df.loc[df.assignedReads <= 10].index, inplace=True)
    df_wide = df.pivot(index="#name", values="x", columns="exp")
    df_wide['x/x'] = df_wide['postPCR']/df_wide['prePCR']
    df_wide['eff_ratio'] = np.power(df_wide['x/x'], 1.0/delta_cycles)

    return df_wide


data = analysis.DistributionAnalysis({
    'prePCR': '../../data/PCR/15c_Twist_GCall/scafstats.txt',
    'postPCR': '../../data/PCR/90c_Twist_GCall/scafstats.txt',
})
df_TWall = preprocess(data, 90-15)
df_TWall['group'] = "Twist_GCall"

data = analysis.DistributionAnalysis({
    'prePCR': '../../data/PCR/15c_Twist_GCfix/scafstats.txt',
    'postPCR': '../../data/PCR/90c_Twist_GCfix/scafstats.txt',
})
df_TWfix = preprocess(data, 90-15)
df_TWfix['group'] = "Twist_GCfix"

df = pd.concat([df_TWall, df_TWfix])


Missing sequences: 1, 0.01%
Missing sequences: 49, 0.41%


Unnamed: 0_level_0,mean,total
exp,Unnamed: 1_level_1,Unnamed: 2_level_1
postPCR,123.78,1485360.0
prePCR,164.276333,1971316.0


Missing sequences: 3, 0.03%
Missing sequences: 51, 0.43%


Unnamed: 0_level_0,mean,total
exp,Unnamed: 1_level_1,Unnamed: 2_level_1
postPCR,123.230667,1478768.0
prePCR,133.779167,1605350.0


In [3]:
fig = px.histogram(
    df, 
    x=["prePCR", "postPCR"], 
    histnorm='probability density',
    barmode="overlay",
    range_x=[0, 3],
    range_y=[0, 2],
    nbins=250,
    opacity=0.65,
    facet_row="group",
    facet_row_spacing=0.05,
    color_discrete_sequence=['#d94701', '#fdbe85']
)
fig.update_traces(xbins={'start': 0.0, 'end': 4, 'size': 0.15}, selector=dict(type='histogram'))
for anno in fig['layout']['annotations']:
    anno['text']=''

colors = ['#973100', '#c33f00']
for j, group in enumerate(df.group.unique()):
    for i, iset in enumerate(["prePCR", "postPCR"]):
        shape, loc, scale = scipy.stats.lognorm.fit(df.loc[df['group'] == group, iset].dropna(), loc=0, scale=1)
        print(shape, loc, scale)
        x = np.linspace(0.05, 3, 5000)
        fig.add_trace(
            pg.Scatter(
                x=x,
                y=scipy.stats.lognorm.pdf(x, scale=scale, s=shape, loc=loc),
                mode="lines",
                showlegend=False,
                line=dict(color=colors[i], width=2)
            ), 
            row=2-j,
            col=1,
        )




fig.update_layout(
    template="simple_white", 
    height=350, 
    width=160*2, 
    showlegend=False, 
    margin=dict(l=0, r=10, t=0, b=0),
    font_family="Inter",
    legend_font_size=28/3,
)
fig.update_xaxes(
    title_text='',
    minor_ticks="outside", 
    minor_dtick=0.5,
    dtick=1,
)
fig.update_xaxes(
    title_text='Normalized coverage', 
    title_font_family="Inter", 
    title_font_size=28/3, 
    tickfont_size=28/3, 
    row=1
)
fig.update_yaxes(
    title_text='Probability density',
    dtick=1, 
    title_font_family="Inter", 
    title_font_size=28/3, 
    tickfont_size=28/3, 
    minor_ticks="outside", 
    minor_dtick=0.5,
)
fig.update_traces(marker=dict(line_width=0), selector=dict(type='histogram')) 
fig.show()
fig.write_image("cov_dist.svg")

0.1667182764681776 -0.6903857561162059 1.6671210355045742
0.11191692651155988 -2.2164585115278417 3.2049443884206115
0.13317642445497585 -0.913536559820758 1.896862795601995
0.0853537324951884 -2.9116647067656345 3.906292364812393


In [4]:
fig = px.histogram(
    df, 
    x="eff_ratio", 
    histnorm='probability density', 
    nbins=50, 
    range_y=[0, 100],
    facet_row="group",
    facet_row_spacing=0.05,
    range_x=[0.96, 1.04],
    color_discrete_sequence=['#fd8d3c'],
)
fig.update_traces(xbins={'start': 0.9, 'end': 1.1, 'size': 0.003}, selector=dict(type='histogram'))
for anno in fig['layout']['annotations']:
    anno['text']=''


for j, group in enumerate(df.group.unique()):
    mean, std = df.loc[df['group'] == group, 'eff_ratio'].mean(), df.loc[df['group'] == group, 'eff_ratio'].std()
    print(std)
    x = np.linspace(0.95, 1.05, 5000)
    fig.add_trace(
        pg.Scatter(
            x=x,
            y=scipy.stats.norm.pdf(x, mean, std),
            mode="lines",
            showlegend=False,
            line=dict(color=colors[i], width=2)
        ), 
        row=2-j,
        col=1,
    )
    fig.add_annotation(x=0.97, y=60,
        text=f"σ = {std:.4f}",
        showarrow=False,
        yshift=0, 
        xanchor="left",
        font=dict(color="#222222", family="Inter", size=28/3),
        row=2-j,
        col=1,
    )

fig.update_yaxes(
    side='right',
    title_font_family="Inter", 
    title_font_size=28/3, 
    tickfont_size=28/3, 
    minor_ticks="outside", 
    minor_dtick=25,
)
fig.update_xaxes(
    title_text='', 
    dtick=0.04,
    title_font_family="Inter", 
    title_font_size=28/3, 
    tickfont_size=28/3, 
    minor_ticks="outside", 
    minor_dtick=0.02,
)
fig.update_xaxes(
    title_text='Normalized efficiency', 
    title_font_family="Inter", 
    title_font_size=28/3, 
    tickfont_size=28/3, 
    dtick=0.04,
    row=1
)
fig.update_traces(marker_line_width=0, selector=dict(type='histogram')) 
fig.update_layout(
    template='simple_white',
    height=350, 
    width=160*2, 
    showlegend=False, 
    margin=dict(l=10, r=0, t=0, b=0),
    font_family="Inter",
    legend_font_size=28/3,
)
fig.show()
fig.write_image("eff_dist.svg")

0.005147535109033896
0.004756447336133924
