In [1]:
import pandas as pd
import numpy as np
import re

In [2]:
# PARSE QPCR DATA, which exist in multiple sheets

# NOTE: change me
ecoli_cperf_fp = ''

sheets = pd.read_excel(ecoli_cperf_fp, sheet_name=None)
df = pd.DataFrame()
for sheet_name, sheet in sheets.items():
    df = pd.concat([df, sheet], axis=0)

df['Sample Name'] = df['Sample Name'].astype(str)


In [3]:
df = df[['Target Name', 'Sample Name', 'CT', 'Ct Mean', 'Quantity', 'Quantity Mean', 'Quantity SD']]

# include only standards
df = df[df['Sample Name'].str.contains('Gblock')]

# split concentration from sample name
df[['sample-name', 'concentration']] = df['Sample Name'].str.split('CTL', expand=True)

In [4]:
# extract standard concentrations from cells

# 3.041 or 3.041E1, 3.041E2, etc.
conc_pattern = re.compile(r'3\.041(E[0-9])?')

def parse_conc(row):
    match = conc_pattern.search(row['Sample Name'])
    if not match:
        match = conc_pattern.search(row['Target Name'])
    if not match:
        raise ValueError('concentration not found')

    row['Concentration'] = match.group()

    return row

df = df.apply(parse_conc, axis=1)

In [5]:
# calculate proportion amplified per standard concentration
conc_grouped = df.groupby('Concentration')
proportions = pd.Series()
for conc, g_df in conc_grouped:
    counts = g_df['CT'].value_counts(dropna=False)
    assert np.nan not in counts

    if 'Undetermined' in counts:
        undetermined = counts['Undetermined']
    else:
        undetermined = 0

    proportions[conc] = 1 - ( undetermined / counts.sum() )

print(proportions)


# IMPORTANT: The limit of detection (LOD) is between 30.41 and 3.041 copies.
#            We can detect 30.41 copies with 100% confidence, and
#            3.041 copies with ~55% confidence. The number of copies which
#            we can detect with 95% confidence (LOD definition) thus lies
#            between these two values.
#
#            Separately, the limit of quantification (LOQ) was determined
#            by Nate Stone and Megan Ruby to be 304.1 copies.

3.041      0.558824
3.041E1    1.000000
3.041E2    1.000000
3.041E3    1.000000
3.041E4    1.000000
3.041E5    1.000000
3.041E6    1.000000
3.041E7    0.985294
dtype: float64
