In [None]:
import pandas as pd
dir_path = "F:\Oxford_research_projects\Research Collaborations\Team_MAPP"
file_path = "%s\EXTENDED_Einstein_Data4u_Sao_Paulo.csv" % (dir_path)
data = pd.read_csv(file_path)
# data.describe()

data

In [None]:
is_regular =  data['Patient addmited to regular ward (1=yes, 0=no)']==1
data_regular = data[is_regular]
data_regular

In [None]:
label = data_regular.loc[:,'SARS-Cov-2 exam result']
label = label.replace(to_replace=['positive', 'negative'], value=[1, 0])

subset = data_regular.iloc[:,list(range(6,20))]
subset['label'] = label
subset

In [None]:
subset = subset.dropna()
subset

In [None]:
y = subset.iloc[:,14]
x = subset.iloc[:,:14]
xN = x.to_numpy()
yN = y.to_numpy()

n_samples, n_features = xN.shape
class_names = ['Covid -ve', 'Covid +ve']

In [None]:
import numpy as np
from scipy import interp
import matplotlib.pyplot as plt

from sklearn.metrics import roc_curve, auc
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import backend

import itertools
from sklearn.metrics import confusion_matrix
from imblearn.metrics import sensitivity_specificity_support

classifier = Sequential()
classifier.add(Dense(units = 16, activation = 'relu', input_dim = n_features))
classifier.add(Dense(units = 8, activation = 'relu'))
classifier.add(Dense(units = 6, activation = 'relu'))
classifier.add(Dense(units = 1, activation = 'sigmoid'))
classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics=["accuracy"])

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, "{:.3f}".format(cm[i, j]),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
print(__doc__)
import random
seed = 50
random.seed(seed)
cv = StratifiedKFold(n_splits=10)

from imblearn.over_sampling import SMOTE
oversample = SMOTE()

import os
os.mkdir('Regular-seed-%d' % (seed))

tprs = []
aucs = []
mean_fpr = np.linspace(0, 1, 100)
fig, ax = plt.subplots()
result_acc = []
result_sens = []
result_spec = []
for i, (train, test) in enumerate(cv.split(xN, yN)):
    classifier = Sequential()
    classifier.add(Dense(units = 16, activation = 'relu', input_dim = n_features))
    classifier.add(Dense(units = 8, activation = 'relu'))
    classifier.add(Dense(units = 6, activation = 'relu'))
    classifier.add(Dense(units = 1, activation = 'sigmoid'))
    classifier.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics=["accuracy"])
    
    xTrain = xN[train];
    yTrain = yN[train];
    xTrain, yTrain = oversample.fit_resample(xTrain, yTrain)
    
    classifier.fit(xN[train], yN[train], batch_size = 1, epochs = 50)
    fpr, tpr, threshold = roc_curve(yN[test], classifier.predict(xN[test]))
    roc_auc = auc(fpr, tpr)
    test_loss, test_acc = classifier.evaluate(xN[test], yN[test])
    ax.plot(fpr, tpr, label = 'ROC fold %d (AUC = %0.2f)'.format(i) %(i, roc_auc))
    interp_tpr = interp(mean_fpr, fpr, tpr)
    interp_tpr[0] = 0.0
    tprs.append(interp_tpr)
    aucs.append(roc_auc)
    result_acc = np.append(result_acc, test_acc)
    
    y_pred = classifier.predict(xN[test])
    y_pred = [ 1 if y>=0.5 else 0 for y in y_pred ]
    cnf_matrix = confusion_matrix(yN[test], y_pred)
    np.set_printoptions(precision=2)
    
    plt.figure()
    plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True, title='Normalized confusion matrix')
    plt.savefig('Regular-seed-%d\smote-nh-cf-it%d.pdf' % (seed,i),bbox_inches='tight')
    sens, spec, support = sensitivity_specificity_support(yN[test], y_pred)
    result_sens = np.append(result_sens, sens[0])
    result_spec = np.append(result_spec, spec[0])

ax.plot([0, 1], [0, 1], linestyle='--', lw=2, color='r', label='Chance', alpha=.8)
mean_tpr = np.mean(tprs, axis=0)
mean_tpr[-1] = 1.0
mean_auc = auc(mean_fpr, mean_tpr)
std_auc = np.std(aucs)
ax.plot(mean_fpr, mean_tpr, color='b', label=r'Mean ROC (AUC = %0.2f $\pm$ %0.2f)' % (mean_auc, std_auc), lw=2, alpha=.8)

std_tpr = np.std(tprs, axis=0)
tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
ax.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', alpha=.2, label=r'$\pm$ 1 std. dev.')

ax.set(xlim=[-0.05, 1.05], ylim=[-0.05, 1.05], title="Receiver operating characteristic")
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.figure.savefig('Regular-seed-%d\smote-nh-roc.pdf' %(seed),bbox_inches='tight')


In [None]:
np.mean (result_acc)

In [None]:
np.mean (result_sens)

In [None]:
np.mean (result_spec)