# Chapter 2. Calibration parameters calculation for MS 1

In [89]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score

In [90]:
df = pd.read_csv('MS_1_calibration_data.csv', sep=';')
df.drop(['measure_id', 'Source_id', 'UoM'], axis=1, inplace=True)

compounds = set(df.Compound)

In [91]:
voc = []
sensorid = []
slope = []
intercept = []
r_sqv = []
rmsec = []
rmsep = []

for compound in compounds:
    data = df[df.Compound == compound].drop('Compound', axis=1)
    y = data.Conc_ppm
    X = data.drop('Conc_ppm', axis=1)
    for i in range(6):
        lr = LinearRegression().fit(X.iloc[:,i].values.reshape(-1,1), y)
        
        voc.append(compound)
        sensorid.append('S'+'{}'.format(i+1))
        slope.append(lr.coef_[0])
        intercept.append(lr.intercept_)
        r_sqv.append(lr.score(X.iloc[:,i].values.reshape(-1,1), y))
        rmsec.append(np.sqrt(mean_squared_error(y, lr.predict(X.iloc[:,i].values.reshape(-1,1)))))
        rmsep.append(-np.mean(cross_val_score(lr, X.iloc[:,i].values.reshape(-1,1), y,
                                              cv=5, scoring='neg_root_mean_squared_error')))

In [92]:
ms_cal = pd.DataFrame()

In [93]:
ms_cal['voc'] = voc
ms_cal['sensorid'] = sensorid
ms_cal['slope'] = slope
ms_cal['intercept'] = intercept
ms_cal['r_sqv'] = r_sqv
ms_cal['rmsec'] = rmsec
ms_cal['rmsep'] = rmsep

In [94]:
ms_cal

Unnamed: 0,voc,sensorid,slope,intercept,r_sqv,rmsec,rmsep
0,Propan-1-ol,S1,82.399949,0.943647,0.998073,7.205665,7.349378
1,Propan-1-ol,S2,43.424508,-2.049665,0.995137,11.447343,12.508717
2,Propan-1-ol,S3,0.620129,-7.820325,0.995149,11.432558,11.405664
3,Propan-1-ol,S4,2.240316,1.869948,0.993155,13.581006,14.32928
4,Propan-1-ol,S5,5.228168,-4.235382,0.995988,10.397903,11.193051
5,Propan-1-ol,S6,1.942873,-0.868038,0.993299,13.437347,14.290656
6,n-Heptane,S1,99.296039,-4.930436,0.995466,12.483658,13.279213
7,n-Heptane,S2,24.070668,-3.129801,0.997029,10.105311,10.615164
8,n-Heptane,S3,0.064155,-12.098157,0.996059,11.639227,12.468379
9,n-Heptane,S4,0.048467,-9.946322,0.99119,17.402998,17.072945


In [88]:
#ms_cal.to_csv('ms_cal_parameters')