<a href="https://colab.research.google.com/github/alicewoo0925/ECG-Apnoea-Detection/blob/main/Model_comparison.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler,RobustScaler,MinMaxScaler
from imblearn.over_sampling import RandomOverSampler, SMOTE

In [None]:
feature_cols = ["RRset_mean","log(RRseg_std)","log(SDSD)","RMSSD","RRseg_triI"] #name of the features
result_cols = ["A","N"] #classification results of the training set #used one-hot coding
cols = ["RRset_mean","log(RRseg_std)","log(SDSD)","RMSSD","RRseg_triI","A"] #name of the features

X_train_df = pd.read_csv("X_train.csv", names=feature_cols)
T_train_df = pd.read_csv("T_train.csv", names=result_cols)

X_train_df.head()
T_train_df.head()

df = pd.concat([X_train_df,T_train_df], axis=1)
del df['N']
df.head()

Unnamed: 0,RRset_mean,log(RRseg_std),log(SDSD),RMSSD,RRseg_triI,A
0,1.0307,-3.2674,-3.4358,0.031926,5.5556,0
1,0.93625,-1.5161,-1.4311,0.23714,3.8462,0
2,0.99066,-2.1771,-2.304,0.099071,5.5556,0
3,1.0314,-2.7058,-2.882,0.055551,4.5455,0
4,0.95794,-1.7164,-1.8649,0.15366,7.1429,0


In [None]:
# get rid of all NaN
df = df.dropna()

In [None]:
from sklearn.model_selection import train_test_split

X = df[df.columns[:-1]].values
y = df[df.columns[-1]].values
scaler = MinMaxScaler()
X = scaler.fit_transform(X)
smote = SMOTE(random_state=42)
X, y = smote.fit_resample(X, y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=4)


# 1. Run without scaler and oversampling

##KNN

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report

knn_model = KNeighborsClassifier(n_neighbors=5)
knn_model.fit(X_train, y_train)

y_pred = knn_model.predict(X_test)

In [None]:
# confusion matrix
def print_result (y_test, y_pred):

  from sklearn.metrics import confusion_matrix, accuracy_score, cohen_kappa_score, roc_auc_score, f1_score
  tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

  sensitivity = tp/(tp+fn)
  specificity = tn/(tn+fp)
  accuracy = accuracy_score(y_test, y_pred)
  PPV = tp/(tp+fp)
  NPV = tn/(tn+fn)
  kappa = cohen_kappa_score(y_test, y_pred)
  AUROC = roc_auc_score(y_test, y_pred)
  f1 = f1_score(y_test, y_pred)

  print("Sensitivity : %5.3f, Specificity: %5.3f, Accuracy: %5.3f, PPV: %5.3f, NPV: %5.3f, f1: %5.3f, Cohen's Kappa: %5.3f, AUROC: %5.3f"
        % (sensitivity, specificity, accuracy, PPV, NPV, f1, kappa, AUROC))

  return sensitivity, specificity, accuracy, PPV, NPV, f1, kappa, AUROC


In [None]:
sensitivity, specificity, accuracy, PPV, NPV, f1, kappa, AUROC = print_result(y_test, y_pred)

Sensitivity : 0.873, Specificity: 0.665, Accuracy: 0.768, PPV: 0.720, NPV: 0.842, f1: 0.789, Cohen's Kappa: 0.537, AUROC: 0.769


Result:
1. without scaler or oversampling

Sensitivity : 0.329, Specificity: 0.864, Accuracy: 0.726, PPV: 0.458, NPV: 0.787, f1: 0.383, Cohen's Kappa: 0.213, AUROC: 0.597

2.



## Naive Bayes

In [None]:
from sklearn.naive_bayes import GaussianNB

nb_model = GaussianNB()
nb_model = nb_model.fit(X_train, y_train)

y_pred = nb_model.predict(X_test)

sensitivity, specificity, accuracy, PPV, NPV, f1, kappa, AUROC = print_result(y_test, y_pred)

Sensitivity : 0.706, Specificity: 0.426, Accuracy: 0.565, PPV: 0.548, NPV: 0.595, f1: 0.617, Cohen's Kappa: 0.132, AUROC: 0.566


Sensitivity : 0.074, Specificity: 0.975, Accuracy: 0.742, PPV: 0.506, NPV: 0.751, f1: 0.129, Cohen's Kappa: 0.068, AUROC: 0.524

## Log regression

In [None]:
from sklearn.linear_model import LogisticRegression
lg_model = LogisticRegression()
lg_model = lg_model.fit(X_train, y_train)
y_pred = lg_model.predict(X_test)

sensitivity, specificity, accuracy, PPV, NPV, f1, kappa, AUROC = print_result(y_test, y_pred)

Sensitivity : 0.612, Specificity: 0.677, Accuracy: 0.645, PPV: 0.652, NPV: 0.639, f1: 0.631, Cohen's Kappa: 0.290, AUROC: 0.645


Sensitivity : 0.145, Specificity: 0.956, Accuracy: 0.746, PPV: 0.534, NPV: 0.762, f1: 0.228, Cohen's Kappa: 0.132, AUROC: 0.550

## SVM

In [None]:
from sklearn.svm import SVC

svm_model = SVC()
svm_model = svm_model.fit(X_train, y_train)

y_pred = svm_model.predict(X_test)

sensitivity, specificity, accuracy, PPV, NPV, f1, kappa, AUROC = print_result(y_test, y_pred)

Sensitivity : 0.716, Specificity: 0.614, Accuracy: 0.665, PPV: 0.647, NPV: 0.687, f1: 0.680, Cohen's Kappa: 0.330, AUROC: 0.665


Sensitivity : 0.000, Specificity: 1.000, Accuracy: 0.741, PPV:   nan, NPV: 0.741, f1: 0.000, Cohen's Kappa: 0.000, AUROC: 0.500

!! RuntimeWarning: invalid value encountered in long_scalars



## Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier()
rf_model = rf_model.fit(X_train, y_train)

y_pred = rf_model.predict(X_test)

sensitivity, specificity, accuracy, PPV, NPV, f1, kappa, AUROC = print_result(y_test, y_pred)

Sensitivity : 0.837, Specificity: 0.785, Accuracy: 0.811, PPV: 0.793, NPV: 0.830, f1: 0.814, Cohen's Kappa: 0.621, AUROC: 0.811


Sensitivity : 0.305, Specificity: 0.903, Accuracy: 0.748, PPV: 0.522, NPV: 0.788, f1: 0.385, Cohen's Kappa: 0.240, AUROC: 0.604

## Decision Tree



In [None]:
from sklearn.tree import DecisionTreeClassifier

tree_model = DecisionTreeClassifier()
tree_model = tree_model.fit(X_train, y_train)

y_pred = tree_model.predict(X_test)

sensitivity, specificity, accuracy, PPV, NPV, f1, kappa, AUROC = print_result(y_test, y_pred)

Sensitivity : 0.772, Specificity: 0.734, Accuracy: 0.753, PPV: 0.741, NPV: 0.765, f1: 0.756, Cohen's Kappa: 0.506, AUROC: 0.753


Sensitivity : 0.413, Specificity: 0.783, Accuracy: 0.687, PPV: 0.399, NPV: 0.793, f1: 0.406, Cohen's Kappa: 0.194, AUROC: 0.598

## Linear Regression

In [None]:
from sklearn.linear_model import LinearRegression

ln_model = LinearRegression()
ln_model.fit(X_train, y_train)

y_pred = ln_model.predict(X_test)

#sensitivity, specificity, accuracy, PPV, NPV, f1, kappa, AUROC = print_result(y_test, y_pred)
#CM_train=y_pred*y_pred;

y_pred = np.round_(y_pred)
sensitivity, specificity, accuracy, PPV, NPV, f1, kappa, AUROC = print_result(y_test, y_pred)


Sensitivity : 0.604, Specificity: 0.682, Accuracy: 0.643, PPV: 0.652, NPV: 0.636, f1: 0.627, Cohen's Kappa: 0.286, AUROC: 0.643


Sensitivity : 0.117, Specificity: 0.965, Accuracy: 0.746, PPV: 0.539, NPV: 0.758, f1: 0.192, Cohen's Kappa: 0.110, AUROC: 0.541
