In [2]:
import numpy as np
from scipy.optimize import curve_fit
import plotly.graph_objects as pg
import plotly.express as px
import pandas as pd
import statsmodels.formula.api as smf
import statsmodels.stats.api as sms
import statsmodels.api as sm

# Calibration curves

In [3]:
data = {
    'Twist (GCall)': ['cal_Twist_GCall.csv', 45.0/100/100],
    'Twist (GCfix)': ['cal_Twist_GCfix.csv', 48.0/100/100],
    'Genscript (GCall)': ['cal_Genscript_GCall.csv', 0.0464],
    'Genscript (GCfix)': ['cal_Genscript_GCfix.csv', 21.6],
}

dfs = []
for name, info in data.items():
    df = pd.read_csv(info[0])
    df['c'] = info[1]/df['Dilution']
    df['name'] = name
    dfs.append(df)

df = pd.concat(dfs)
df.sort_values(by="name", inplace=True)

fig = px.scatter(
    df, 
    x="c", 
    y="Ct", 
    color="name", 
    log_x=True,
    trendline='ols',
    trendline_options={'log_x': True}
)


fig.update_layout(
    template="simple_white", 
    height=330, 
    width=330, 
    # showlegend=False, 
    margin=dict(l=0, r=10, t=10, b=0),
    font_family="Inter",
    legend_font_size=28/3,
)
fig.update_xaxes(
    title_text='Concentration / ng uL<sup>-1</sup>', 
    range=[-8, 0],
    dtick=2,
    title_font_family="Inter", 
    title_font_size=28/3, 
    tickfont_size=28/3, 
    minor_ticks="outside", 
    minor_dtick=1,
    showexponent = 'all',
    exponentformat = 'e'
)
fig.update_yaxes(
    title_text='Cycle threshold', 
    range=[2.5, 30],
    dtick=5, 
    title_font_family="Inter", 
    title_font_size=28/3, 
    tickfont_size=28/3, 
    minor_ticks="outside", 
    minor_dtick=2.5
)

fig.update_layout(legend=dict(
    title="Pool",
    yanchor="top",
    y=1.0,
    xanchor="right",
    x=1.0
))

fig.show()
fig.write_image("cal_curve.svg")

In [8]:
names = []
slopes = []
intercepts = []
R2s = []

for index, row in px.get_trendline_results(fig).iterrows():
    names.append(row[0])
    slopes.append(row[1].params[1])
    intercepts.append(row[1].params[0])
    R2s.append(row[1].rsquared)

cal = pd.DataFrame.from_dict({'name': names, 'slope': slopes, 'intercept': intercepts, 'R2': R2s}).set_index('name')
cal['eff'] = 10**(-1/cal.slope)-1
cal

Unnamed: 0_level_0,slope,intercept,R2,eff
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Genscript (GCall),-3.493166,0.80764,0.999292,0.933184
Genscript (GCfix),-3.465238,1.898962,0.999884,0.943482
Twist (GCall),-3.659714,1.438254,0.999462,0.876054
Twist (GCfix),-3.713143,1.408371,0.999672,0.859147


# Initial abundance

In [4]:
data = {
    'Twist (GCall)': ['amp_Twist_GCall.csv', 2*5*1000],
    'Twist (GCfix)': ['amp_Twist_GCfix.csv', 2*5*1000],
    'Genscript (GCall)': ['amp_Genscript_GCall.csv', 2*5*50],
    'Genscript (GCfix)': ['amp_Genscript_GCfix.csv', 2*5*50],
}

names = []
means = []
stds = []
cs = []

for name, info in data.items():
    df = pd.read_csv(info[0])
    names.append(name)
    means.append(df['Ct'].mean())
    stds.append(df['Ct'].std())
    slope, intercept = cal.loc[name, 'slope'], cal.loc[name, 'intercept']
    cs.append(info[1]*10**((df['Ct'].mean()-intercept)/slope))

init_abundance = pd.DataFrame.from_dict({'name': names, 'Ct_mean': means, 'Ct_std': stds, 'c0': cs}).set_index("name")
init_abundance

Unnamed: 0_level_0,Ct_mean,Ct_std,c0
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Twist (GCall),16.069375,0.030429,1.004879
Twist (GCfix),15.497083,0.025211,1.605897
Genscript (GCall),15.971562,0.14328,0.022801
Genscript (GCfix),16.518333,0.075515,0.030207


# Decay

In [10]:
data = {
    'Twist (GCall)': 'decay_Twist_GCall.csv',
    'Twist (GCfix)': 'decay_Twist_GCfix.csv',
    'Genscript (GCall)': 'decay_Genscript_GCall.csv',
    'Genscript (GCfix)': 'decay_Genscript_GCfix.csv',
}

dfs = []
for name, info in data.items():
    df = pd.read_csv(info)
    slope, intercept = cal.loc[name, 'slope'], cal.loc[name, 'intercept']
    df['c'] = 10**((df['Ct']-intercept)/slope)
    df['t'] = df['Time']/24
    c0 = df.loc[df.Time == 0.0, 'c'].mean()
    df['rel_c'] = df['c']/c0
    df['name'] = name
    
    dfs.append(df)


df = pd.concat(dfs)
df.sort_values(by="name", inplace=True)

def wavg(group):
    d = {}
    d['c_mean'] = group.rel_c.mean()
    d['c_std'] = group.rel_c.std()
    return pd.Series(d, index=['c_mean', 'c_std'])

df_aggregate = df.groupby(['name', 't'], as_index=False).apply(wavg)

fig = px.scatter(
    df_aggregate,
    x="t",
    y='c_mean',
    error_y='c_std',
    color="name",
    log_y=True,
    trendline='ols',
    trendline_options={'log_y': True, "add_constant": False},
)




fig.update_layout(
    template="simple_white", 
    height=330, 
    width=330, 
    # showlegend=False, 
    margin=dict(l=0, r=10, t=10, b=0),
    font_family="Inter",
    legend_font_size=28/3,
)
fig.update_xaxes(
    title_text='Time / d', 
    range=[-0.1, 7.5],
    dtick=2,
    title_font_family="Inter", 
    title_font_size=28/3, 
    tickfont_size=28/3, 
    minor_ticks="outside", 
    minor_dtick=1,
    showexponent = 'all',
    exponentformat = 'e'
)
fig.update_yaxes(
    title_text='Relative concentration', 
    range=[-3, 0.5],
    dtick=1, 
    title_font_family="Inter", 
    title_font_size=28/3, 
    tickfont_size=28/3, 
    minor_ticks="outside", 
    minor_dtick=0.5
)

fig.update_layout(legend=dict(
    title="Pool",
    yanchor="top",
    y=1.0,
    xanchor="right",
    x=1.0
))

fig.show()
fig.write_image("decay.svg")

In [11]:
names = []
slopes = []
slope_std = []
R2s = []

for index, row in px.get_trendline_results(fig).iterrows():
    names.append(row[0])
    slopes.append(row[1].params[0])
    slope_std.append(row[1].bse[0])
    R2s.append(row[1].rsquared)

decay = pd.DataFrame.from_dict({'name': names, 'slope': slopes, 'slope_std': slope_std, 'R2': R2s}).set_index('name')
decay['k'] = decay.slope*np.log(10)
decay['k_std'] = decay.slope_std*np.log(10)
decay['tau'] = -np.log(2)/(decay.slope*np.log(10))
decay['tau_std'] = -decay['tau']*decay.slope_std*np.log(10)/(decay.slope*np.log(10))
decay

Unnamed: 0_level_0,slope,slope_std,R2,k,k_std,tau,tau_std
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Genscript (GCall),-0.322806,0.032176,0.971057,-0.743289,0.074088,0.93254,0.092951
Genscript (GCfix),-0.312835,0.028564,0.9756,-0.720329,0.065771,0.962264,0.087861
Twist (GCall),-0.302107,0.0497,0.924905,-0.695627,0.114438,0.996436,0.163925
Twist (GCfix),-0.365316,0.051263,0.944222,-0.841171,0.118037,0.824026,0.115631


In [7]:
for name in df_aggregate.name.unique():
    tau = decay.loc[name, 'tau']
    df_aggregate.loc[df_aggregate.name == name, 'n_taus'] = df_aggregate.loc[df_aggregate.name == name, 't']/tau

df_aggregate

Unnamed: 0,name,t,c_mean,c_std,n_taus
0,Genscript (GCall),0.0,1.0,0.187495,0.0
1,Genscript (GCall),1.954833,0.113007,0.012505,2.096246
2,Genscript (GCall),3.954833,0.105133,0.101023,4.240926
3,Genscript (GCall),6.607625,0.006052,0.002785,7.08562
4,Genscript (GCfix),0.0,1.0,0.043283,0.0
5,Genscript (GCfix),2.131917,0.083276,0.017034,2.215521
6,Genscript (GCfix),4.972208,0.0272,0.00553,5.167197
7,Genscript (GCfix),6.90625,0.009419,0.000868,7.177083
8,Twist (GCall),0.0,1.0,0.283875,0.0
9,Twist (GCall),2.010375,0.073631,0.007801,2.017566
