## ML Modeling for Fluid x Virus

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42
import seaborn as sns
sns.set_context("talk")

import os 
import glob
import statsmodels.api as sm
from pathlib import Path
from scipy import stats
import sklearn

from sklearn.model_selection import StratifiedKFold
from sklearn import metrics
from sklearn.metrics import mean_squared_error
from math import sqrt

import keras
from keras.models import Sequential
from keras.layers import Dense

from tensorflow.keras.utils import to_categorical

In [None]:
DID = pd.read_excel('Fig3M.xlsx')
DID = DID.drop(columns=['Unnamed: 0','ID',"Device","Date","ProbeSide","Yoked","BottleSide","Weight","Intake"])
DID["FluidxVirus"] = DID['Fluid'] + DID['Virus']
DID = DID.drop(columns = ['Fluid', "Virus"])
df = DID.dropna()

DID.head()

In [None]:
df['StimState'] = df['StimState'].astype('category')
df['FluidxVirus'] = df['FluidxVirus'].astype('category')

df['StimStateCodes'] = df['StimState'].cat.codes
df['FluidxVirusCodes'] = df['FluidxVirus'].cat.codes

df = df.drop(columns = ['StimState','FluidxVirus'])
df.head()

In [None]:
target_column = ['FluidxVirusCodes'] 
predictors = list(set(list(df.columns))-set(target_column))
#df[predictors] = df[predictors]/df[predictors].max()
df.describe()

In [None]:
X = df[predictors].values
y = df[target_column].values

y = to_categorical(y)

count_classes = y.shape[1]
print(count_classes)

kf = StratifiedKFold(6, shuffle=True, random_state=42)

In [None]:
X.shape

In [None]:
oos_y = []
oos_pred = []
fold = 0

for train, test in kf.split(X,df[target_column]):
    fold+=1
    print(f"Fold #{fold}")
    
    X_train = X[train]
    y_train = y[train]
    X_test = X[test]
    y_test = y[test]
    
    model = Sequential()
    model.add(Dense(4096, activation='relu', input_dim=X.shape[1]))
    model.add(Dense(1024, activation='relu'))
    model.add(Dense(512, activation='relu'))
    model.add(Dense(y.shape[1], activation='softmax'))
    
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    
    history = model.fit(X_train, y_train, epochs=50, validation_data=(X_test, y_test), verbose = 0)   
    
    pred = model.predict(X_test)
    
    oos_y.append(y_test)
    pred = np.argmax(pred, axis=1)
    oos_pred.append(pred)
    
    y_compare = np.argmax(y_test, axis = 1)
    score = metrics.accuracy_score(y_compare, pred)
    print(f"Fold score (accuracy): {score}")
    
oos_y = np.concatenate(oos_y)
oos_pred = np.concatenate(oos_pred)
oos_y_compare = np.argmax(oos_y, axis=1)

score = metrics.accuracy_score(oos_y_compare, oos_pred)

print(f"-------------------------------")
print(f"Final score (accuracy): {score}")
    

oos_y = pd.DataFrame(oos_y)
oos_pred = pd.DataFrame(oos_pred)
oosDF = pd.concat([df, oos_y, oos_pred], axis =1)

In [None]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

In [None]:
# summarize history for accuracy
f, ax = plt.subplots(figsize=(6,4))
sns.despine()

plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])

plt.ylabel('Accuracy')
plt.xlabel('Epoch')

plt.legend(['train', 'test'], loc='lower right', frameon=False)


#plt.savefig('Accuracy.pdf', transparent = True, dpi = 1200)
plt.show()
plt.clf()

In [None]:
# summarize history for loss
f, ax = plt.subplots(figsize=(6,4))
sns.despine()

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])

plt.ylabel('Loss')
plt.xlabel('Epoch')


#plt.legend(['train', 'test'], loc='upper right', frameon=False)

#plt.savefig('Loss.pdf', transparent = True, dpi = 1200)
plt.show()
plt.clf()

In [None]:
pred_train= model.predict(X_train)
scores = model.evaluate(X_train, y_train, verbose=0)
print('Accuracy on training data: {}% \n Error on training data: {}'.format(scores[1], 1 - scores[1]))   

pred_test= model.predict(X_test)
scores2 = model.evaluate(X_test, y_test, verbose=0)
print('Accuracy on test data: {}% \n Error on test data: {}'.format(scores2[1], 1 - scores2[1]))   