In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
from scipy import stats

In [2]:
df = pd.read_csv('Lplasmid compiled stds.csv')

In [3]:
df = df[df['Conc (fg/ul)'] != 0]

In [5]:
df[(df['CT']=='Undetermined') | (df['CT']==0)]

Unnamed: 0,Run,Amp,Conc (fg/ul),CT


In [6]:
df['CT'] = df['CT'].astype(float)

In [7]:
df = df[(df['CT']!='Undetermined') & (df['CT']!=0)]

In [8]:
df[(df['CT']=='Undetermined') | (df['CT']==0)]

Unnamed: 0,Run,Amp,Conc (fg/ul),CT


In [9]:
df['Conc (pg/ul)'] = df['Conc (fg/ul)']/1000

In [10]:
df['Quantity'] = np.log10(df['Conc (pg/ul)'])

In [11]:
df.head(10)

Unnamed: 0,Run,Amp,Conc (fg/ul),CT,Conc (pg/ul),Quantity
0,1,2,1000000.0,8.77,1000.0,3.0
1,1,2,1000000.0,8.83,1000.0,3.0
2,1,2,100000.0,12.457,100.0,2.0
3,1,2,100000.0,12.572,100.0,2.0
4,1,2,10000.0,15.717,10.0,1.0
5,1,2,10000.0,15.664,10.0,1.0
6,1,2,1000.0,18.95,1.0,0.0
7,1,2,1000.0,18.975,1.0,0.0
8,1,2,100.0,22.889,0.1,-1.0
9,1,2,100.0,22.831,0.1,-1.0


In [24]:
df = df.sort_values(by=['Amp','Run','Quantity'],ascending=True)

In [25]:
df_grp = df.groupby(by=['Amp','Run','Quantity']).agg({'CT':['mean','std','count']},)

In [26]:
df_grp.round(decimals=3)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,CT,CT,CT
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,count
Amp,Run,Quantity,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
1,2,-1.0,31.675,0.386,2
1,2,0.0,28.205,0.030,2
1,2,1.0,24.684,0.161,2
1,2,2.0,21.321,0.009,2
1,2,3.0,17.842,0.022,2
...,...,...,...,...,...
10,11,-3.0,28.908,0.219,2
10,11,-2.0,26.423,0.082,2
10,11,-1.0,23.353,0.034,2
10,11,1.0,18.215,0.054,2


In [27]:
amps = df_grp.index.get_level_values(0).unique()
dict_amp = {}
for amp in amps:
    print('Amplicon ' + str(amp))
    dict_amp['Amp'+str(amp)] = df_grp.loc[df_grp.index.get_level_values(0) == amp]
    print(dict_amp['Amp'+str(amp)])

Amplicon 1
                         CT                
                       mean       std count
Amp Run Quantity                           
1   2   -1.0      31.675000  0.386080     2
         0.0      28.205000  0.029698     2
         1.0      24.684500  0.160513     2
         2.0      21.320500  0.009192     2
         3.0      17.842500  0.021920     2
    4   -4.0      38.422500  0.012021     2
        -3.0      36.570000       NaN     1
         0.0      27.863667  0.101943     3
         2.0      20.861000  0.107764     3
    5   -3.0      38.394000       NaN     1
        -2.0      35.767000       NaN     1
        -1.0      31.177000       NaN     1
         0.0      28.044000       NaN     1
         1.0      24.715000       NaN     1
         2.0      20.981000       NaN     1
         3.0      17.267000       NaN     1
    6   -2.0      36.821000  0.272345     3
        -1.0      33.216333  0.409346     3
         0.0      30.650333  0.153871     3
         1.0      26.

In [48]:
dict_amp

{'Amp1':                          CT                
                        mean       std count
 Amp Run Quantity                           
 1   2   -1.0      31.675000  0.386080     2
          0.0      28.205000  0.029698     2
          1.0      24.684500  0.160513     2
          2.0      21.320500  0.009192     2
          3.0      17.842500  0.021920     2
     4   -4.0      38.422500  0.012021     2
         -3.0      36.570000       NaN     1
          0.0      27.863667  0.101943     3
          2.0      20.861000  0.107764     3
     5   -3.0      38.394000       NaN     1
         -2.0      35.767000       NaN     1
         -1.0      31.177000       NaN     1
          0.0      28.044000       NaN     1
          1.0      24.715000       NaN     1
          2.0      20.981000       NaN     1
          3.0      17.267000       NaN     1
     6   -2.0      36.821000  0.272345     3
         -1.0      33.216333  0.409346     3
          0.0      30.650333  0.153871     3
  

In [107]:
# testing syntax to get the quantity
dict_amp['Amp1'].index.get_level_values(2)

Float64Index([-1.0,  0.0,  1.0,  2.0,  3.0, -4.0, -3.0,  0.0,  2.0, -3.0, -2.0,
              -1.0,  0.0,  1.0,  2.0,  3.0, -2.0, -1.0,  0.0,  1.0,  2.0, -2.0,
              -1.0,  0.0,  1.0,  2.0],
             dtype='float64', name='Quantity')

In [138]:
# testing syntax to get the mean CT value for each amp per run (here it's amp1, run2)
dict_amp['Amp1'].loc[1,2]['CT']['mean']

Quantity
-1.0    31.6750
 0.0    28.2050
 1.0    24.6845
 2.0    21.3205
 3.0    17.8425
Name: mean, dtype: float64

In [134]:
# testing syntax to get the quantity for each amp per run (here it's amp1, run2)
dict_amp['Amp1'].loc[1,2].index

Float64Index([-1.0, 0.0, 1.0, 2.0, 3.0], dtype='float64', name='Quantity')

In [143]:
# testing syntax because there is some disagreement in the manually calcualted slope and the one in python
# turns out that the x and y values for the stats.linregress are swapped
y = [31.6750,28.2050,24.6845,21.3205,17.8425]
x = [-1,0,1,2,3]

In [202]:
# testing syntax to extract the number in Amp1, Amp2,..., Amp10
int(re.findall(r"\d+", "Amp1")[0])

1

In [205]:
# testing syntax to return the linear regression statistics using the syntax to extract the numbers
run_num = 2
slope, intercept, r_value, p_value, std_err = stats.linregress(
    x = dict_amp['Amp1'].loc[int(re.findall(r"\d+", "Amp1")[0]),run_num].index,#['CT']['mean'],
    y = dict_amp['Amp1'].loc[int(re.findall(r"\d+", "Amp1")[0]),run_num]['CT']['mean']
)

In [206]:
# correct value
slope,intercept

(-3.4549499999999997, 28.20045)

In [172]:
dict_amp['Amp1'].loc[1,4].index

pandas.core.indexes.numeric.Float64Index

In [70]:
dict_amp['Amp1']['CT']['mean']

Amp  Run  Quantity
1    2    -1.0        31.675000
           0.0        28.205000
           1.0        24.684500
           2.0        21.320500
           3.0        17.842500
     4    -4.0        38.422500
          -3.0        36.570000
           0.0        27.863667
           2.0        20.861000
     5    -3.0        38.394000
          -2.0        35.767000
          -1.0        31.177000
           0.0        28.044000
           1.0        24.715000
           2.0        20.981000
           3.0        17.267000
     6    -2.0        36.821000
          -1.0        33.216333
           0.0        30.650333
           1.0        26.521667
           2.0        23.605667
     8    -2.0        34.889000
          -1.0        31.278500
           0.0        28.958500
           1.0        24.410000
           2.0        21.134500
Name: mean, dtype: float64

In [126]:
for n in dict_amp['Amp1'].index.get_level_values(1).unique():
    print(n)


2
4
5
6
8


In [93]:
import re

In [186]:
int(re.findall(r"\d+", "6M1D14M")[0])

'6'

In [281]:
# here we are getting the linear regression statistics per amp per run and throwing it all into a list

reg_stat=[]
for amp in dict_amp:
    #print(amp)
    for run_num in dict_amp[amp].index.get_level_values(1).unique():
        #print('run number= '+str(run_num))
        #print(int(re.findall(r"\d+", amp)[0]))
        print(amp)
        print(run_num)
        #print(dict_amp[amp].loc[int(re.findall(r"\d+", amp)[0]),run_num]['CT']['mean'])
        #print(dict_amp[amp].loc[int(re.findall(r"\d+", amp)[0]),run_num].index)
        slope, intercept, r_value, p_value, std_err = stats.linregress(
            #x=dict_amp[amp].loc[1, 2].index,
            #y=dict_amp[amp].loc[1, 2]['CT']['mean']
            x = dict_amp[amp].loc[int(re.findall(r"\d+", amp)[0]),run_num].index,
            y = dict_amp[amp].loc[int(re.findall(r"\d+", amp)[0]),run_num]['CT']['mean']
        )
        print(slope, intercept, r_value, p_value, std_err)
        #run_number
        reg_stat.append((amp,run_num,slope, intercept, r_value, p_value, std_err))

Amp1
2
-3.4549499999999997 28.20045 -0.9999787616251801 1.1749364996123588e-07 0.013000608960092053
Amp1
4
-2.9464413919413914 27.246239926739932 -0.996901101605613 0.0030988983943870396 0.1644041135989676
Amp1
5
-3.550535714285715 28.049285714285713 -0.9989842675301619 6.311982998020809e-08 0.07162169428208023
Amp1
6
-3.312533333333333 30.163 -0.9980986427791863 9.949597997457391e-05 0.11810451115666704
Amp1
8
-3.4377499999999994 28.1341 -0.9963074808266159 0.0002692013936240604 0.17103908666344872
Amp2
1
-3.4568 19.2223 -0.9993692172485391 1.901570691604717e-05 0.07092087139904178
Amp2
4
-3.2920784313725493 18.60650980392157 -0.9996843647838384 6.731184169463387e-06 0.047766134593394245
Amp2
5
-3.4383 18.756922222222222 -0.9989442123324903 1.25779314177506e-10 0.05976434188415773
Amp2
6
-3.2631666666666663 20.947266666666668 -0.9977120426967426 0.00013132778749456498 0.12766263846270526
Amp2
8
-3.465350000000001 18.8929 -0.9930618699398286 0.0006930188025331941 0.23691457497024535
Am

In [282]:
reg_stat

[('Amp1',
  2,
  -3.4549499999999997,
  28.20045,
  -0.9999787616251801,
  1.1749364996123588e-07,
  0.013000608960092053),
 ('Amp1',
  4,
  -2.9464413919413914,
  27.246239926739932,
  -0.996901101605613,
  0.0030988983943870396,
  0.1644041135989676),
 ('Amp1',
  5,
  -3.550535714285715,
  28.049285714285713,
  -0.9989842675301619,
  6.311982998020809e-08,
  0.07162169428208023),
 ('Amp1',
  6,
  -3.312533333333333,
  30.163,
  -0.9980986427791863,
  9.949597997457391e-05,
  0.11810451115666704),
 ('Amp1',
  8,
  -3.4377499999999994,
  28.1341,
  -0.9963074808266159,
  0.0002692013936240604,
  0.17103908666344872),
 ('Amp2',
  1,
  -3.4568,
  19.2223,
  -0.9993692172485391,
  1.901570691604717e-05,
  0.07092087139904178),
 ('Amp2',
  4,
  -3.2920784313725493,
  18.60650980392157,
  -0.9996843647838384,
  6.731184169463387e-06,
  0.047766134593394245),
 ('Amp2',
  5,
  -3.4383,
  18.756922222222222,
  -0.9989442123324903,
  1.25779314177506e-10,
  0.05976434188415773),
 ('Amp2',
  6,


In [285]:
# putting them all into a dataframe
reg_stats = pd.DataFrame(reg_stat,
                        columns=['amp','run_num','slope', 'intercept', 'r_value', 'p_value', 'std_err'])

In [301]:
# creating another column in the dataframe with the PCR efficiency results
reg_stats['PCR Efficiency'] = np.round(((10**(-1/reg_stats['slope'])-1)*100),decimals=2)

In [306]:
# then aggregating the values to find the mean PCR efficiency accross all the runs for each amp.
reg_stats.groupby(by='amp').agg({'PCR Efficiency':'mean'})

Unnamed: 0_level_0,PCR Efficiency
amp,Unnamed: 1_level_1
Amp1,100.048
Amp10,99.014286
Amp2,95.89375
Amp3,101.12
Amp4,95.98375
Amp5,95.971429
Amp7,97.7375
Amp8,94.18
Amp9,95.48


In [316]:
reg_stat = []
reg_stats = []

In [322]:
reg_stat = {}
for amp in dict_amp:
    print(amp)
    for qty in dict_amp[amp]:
        #print(dict_amp[amp].index.get_level_values(1))
        slope, intercept, r_value, p_value, std_err = stats.linregress(
            x = dict_amp[amp].index.get_level_values(1),
            y = dict_amp[amp]['CT']['mean']
        )
        reg_stat[str(amp)] = (slope, intercept, r_value, p_value, std_err)

Amp1
Amp2
Amp3
Amp4
Amp5
Amp7
Amp8
Amp9
Amp10


  slope = ssxym / ssxm
  slope_stderr = np.sqrt((1 - r**2) * ssym / ssxm / df)


In [323]:
reg_stat

{'Amp1': (0.5016799455888066,
  25.75211899209742,
  0.15705523281596104,
  0.44354791596860355,
  0.6439398673099401),
 'Amp2': (0.15986581386726006,
  19.821717741935483,
  0.07119674816581939,
  0.6460520467085669,
  0.3455948690864698),
 'Amp3': (nan, nan, 0.0, 1.0, inf),
 'Amp4': (0.032741042614811776,
  20.507707902358295,
  0.01413747918275906,
  0.9231930703594569,
  0.33777553652334763),
 'Amp5': (0.40714354122868474,
  17.042727166549874,
  0.19527919647624484,
  0.2335154427492217,
  0.3361615233008019),
 'Amp7': (0.32397532793748246,
  18.550892687691878,
  0.13685441127309994,
  0.37570707548510396,
  0.36184530797350983),
 'Amp8': (nan, nan, 0.0, 1.0, inf),
 'Amp9': (nan, nan, 0.0, 1.0, inf),
 'Amp10': (0.37360947847682124,
  17.484226821192053,
  0.1624635869646492,
  0.3230743456305103,
  0.3730375188785354)}

In [319]:
reg_stats = pd.DataFrame.from_dict(reg_stat, orient='index',columns=('slope', 'intercept', 'r_value', 'p_value', 'std_err'))

In [320]:
reg_stats

Unnamed: 0,slope,intercept,r_value,p_value,std_err
Amp1,0.50168,25.752119,0.157055,0.443548,0.64394
Amp2,0.159866,19.821718,0.071197,0.646052,0.345595
Amp3,,,0.0,1.0,inf
Amp4,0.032741,20.507708,0.014137,0.923193,0.337776
Amp5,0.407144,17.042727,0.195279,0.233515,0.336162
Amp7,0.323975,18.550893,0.136854,0.375707,0.361845
Amp8,,,0.0,1.0,inf
Amp9,,,0.0,1.0,inf
Amp10,0.373609,17.484227,0.162464,0.323074,0.373038


In [321]:
dict_amp['Amp3']

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,CT,CT,CT
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,count
Amp,Run,Quantity,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
3,1,-1.0,23.157,0.033941,2
3,1,0.0,17.7395,2.288905,2
3,1,1.0,16.1415,0.012021,2
3,1,2.0,12.625,0.009899,2
3,1,3.0,9.2375,0.03182,2
