In [None]:
import pandas as pd
import statsmodels.formula.api as sm
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

In [None]:
samples = pd.read_table('raw/lcm_samplekey.txt', index_col=['lcm_sampleID'])
samples.head()

In [None]:
data1 = pd.read_table('raw/lcm.run1_data.txt').dropna(subset=['compound']).set_index(['Data_Filename', 'compound'])
data2 = pd.read_table('raw/lcm.run2_data.txt').dropna(subset=['compound']).set_index(['Data_Filename', 'compound'])
data = pd.concat([data1, data2])
data.head()

In [None]:
def calibrate(data, fit_intercept=False):
    if not fit_intercept:
        formula = 'concentration_mM ~ Area + 0'
    else:
        formula = 'concentration_mM ~ Area'
    fit = sm.wls(formula,
                 data=data,
                 weights=data.concentration_mM ** (-2)).fit()
    if fit_intercept:
        intercept = fit.params['Intercept']
    else:
        intercept = 0
    return fit.params['Area'], intercept, fit.rsquared

def calc_concentration(data):
    data = data.copy()
    data.Area.fillna(0, inplace=True)
    # Calculate the response factor
    standards_data = data[data.concentration_mM.notnull()]
    calibration_factor, intercept, rsquared = calibrate(standards_data)
    # Calculate concentration for each injection using this factor
    out = pd.DataFrame({'calc_conc': (intercept + calibration_factor * data.Area) / data.dilution,
                        'Data_Filename': data.Data_Filename,
                        'sample_id': data.Sample_ID})
#    print(data.columns)
    out['rsquared'] = rsquared
    return out

In [None]:
concentrations = data.join(samples, on='Sample_ID').reset_index().groupby(['compound', 'run']).apply(calc_concentration).reset_index()
result = concentrations[['compound', 'sample_id', 'calc_conc', 'rsquared']].dropna(subset=['calc_conc'])
result.head()

In [None]:
def get_calibration(data):
    data = data.copy()
    data.Area.fillna(0, inplace=True)
    standards_data = data[data.concentration_mM.notnull()]
    # Calculate the response factor
    slope_no_intercept, _, rsquared_no_intercept = calibrate(standards_data, fit_intercept=False)
    slope_with_intercept, intercept, rsquared_with_intercept = calibrate(standards_data, fit_intercept=True)

    out = pd.Series({'slope_no_intercept': slope_no_intercept,
                     'rsquared_no_intercept': rsquared_no_intercept,
                     'slope_with_intercept': slope_with_intercept,
                     'intercept': intercept,
                     'rsquared_with_intercept': rsquared_with_intercept})
    return out

In [None]:
calibrations = (data.join(samples, on='Sample_ID')
                      .reset_index().groupby(['compound', 'run'])
                      .apply(get_calibration)
                      .reset_index())
calibrations

In [None]:
calibrations.to_csv('outputs/calibrations.tsv', sep='\t', index=False)
result.to_csv('outputs/concentration.tsv', sep='\t', index=False)