In [None]:
import os
import mne
import numpy as np
import pandas as pd
import glob

# Classification of seizures or non seizures by features from EEG data

### Load npy

In [6]:
labels = np.load("seizures_labeled.npy")


### Separate features and labels and normalize features

In [16]:
from sklearn import preprocessing

x, y = labels[:,:276], labels[:,276]

x = preprocessing.normalize(x)
print(x.shape)

unique, counts = np.unique(y, return_counts=True)
print(dict(zip(unique, counts)))

(6216, 276)
{0.0: 6116, 1.0: 100}


### Separate in train and test 

In [57]:
from sklearn.model_selection import train_test_split

x_train_unb, x_test, y_train_unb, y_test =  train_test_split(x, y, test_size = 0.5, random_state = 0)
print(x_train_unb.shape)
print(y_train_unb.shape)
print(x_test.shape)
print(y_test.shape)

unique, counts = np.unique(y_train_unb, return_counts=True)
print(dict(zip(unique, counts)))

unique, counts = np.unique(y_test, return_counts=True)
print(dict(zip(unique, counts)))


(3108, 276)
(3108,)
(3108, 276)
(3108,)
{0.0: 3051, 1.0: 57}
{0.0: 3065, 1.0: 43}


### Balancing training set

In [58]:
from sklearn.utils import resample

labels_train = np.hstack((x_train_unb, np.expand_dims(y_train_unb, axis=1)))
print(labels_train.shape)

non_seizure_idx = np.nonzero(labels_train[:,276] == 0)
labels_majority = labels_train[non_seizure_idx]

seizure_idx = np.nonzero(labels_train[:,276] == 1)
labels_minority = labels_train[seizure_idx]

print("Before balancing:")
print(np.shape(labels_majority))
print(np.shape(labels_minority))

labels_minority = resample(labels_minority,
                            replace=True,
                            n_samples=int(0.5*labels_majority.shape[0]),
                            random_state=123)

print("After balancing:")
print(np.shape(labels_majority))
print(np.shape(labels_minority))

labels_balanced = np.concatenate((labels_majority, labels_minority))

np.random.shuffle(labels_balanced)
x_train, y_train = labels_balanced[:,:276], labels_balanced[:,276]

print("Full data after balancing:")
print(x_train.shape)
print(y_train.shape)

unique, counts = np.unique(y_train, return_counts=True)
print(dict(zip(unique, counts)))
# x_train, x_test, y_train, y_test =  train_test_split(x, y, test_size = 0.3)
# print(x_train.shape)
# print(x_test.shape)


(3108, 277)
Before balancing:
(3051, 277)
(57, 277)
After balancing:
(3051, 277)
(1525, 277)
Full data after balancing:
(4576, 276)
(4576,)
{0.0: 3051, 1.0: 1525}


### Filter features by variance and correlation 

In [42]:
from sklearn.feature_selection import VarianceThreshold

# check zero variance features
thresholder = VarianceThreshold(threshold=0)
print("Variables Kept after removing features with 0 variance: ", thresholder.fit_transform(x).shape[1])

# highly correlated features
corr = abs(pd.DataFrame(x).corr())
upper = corr.where(np.triu(np.ones(corr.shape), k=1).astype(np.bool))
cols = [column for column in upper.columns if any(upper[column] < 0.9)]
print("Variables Kept after removing features with corr > 0.9: ", len(cols)) 

Variables Kept after removing features with 0 variance:  276
Variables Kept after removing features with corr > 0.9:  275


Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  upper = corr.where(np.triu(np.ones(corr.shape), k=1).astype(np.bool))


### SVM Classifier

In [60]:
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score, KFold
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.svm import SVC

svm = SVC(kernel="rbf", class_weight='balanced', random_state = 0)

# cross validation
kf = KFold(n_splits=5)
accuracy, tpr, fpr = [], [], []
for train, test in kf.split(x_train):
    svm.fit(x_train[train, :], y_train[train])
    pred = svm.predict(x_train[test])
    tn, fp, fn, tp = confusion_matrix(y_train[test], pred).ravel()
    accuracy.append((tp + tn)/(tn + fp + fn + tp))
    #print("true positive: %.4f\ntrue negative: %.4f\nfalse positive: %.4f\nfalse negative: %.4f\n" % (tp, tn, fp, fn))
    tpr.append(tp / (tp + fn))
    fpr.append(fp / (fp + tn))

print("KFold results")
print(accuracy)
print(tpr)
print(fpr)



KFold results
[0.9203056768558951, 0.9224043715846995, 0.9169398907103825, 0.9387978142076503, 0.9158469945355191]
[0.9372822299651568, 0.9663299663299664, 0.9554140127388535, 0.9404388714733543, 0.9577922077922078]
[0.08744038155802862, 0.09870550161812297, 0.10316139767054909, 0.06208053691275168, 0.10543657331136738]


In [62]:
# Testing 
svm.fit(x_train, y_train)
pred = svm.predict(x_test)
tn, fp, fn, tp = confusion_matrix(y_test, pred).ravel()
accuracy = (tp + tn)/(tn + fp + fn + tp)
tpr = tp / (tp + fn)
fpr = fp / (fp + tn)

print("Validation")
print("Accuracy: %.2f" % (accuracy))
print("True Positive Rate: %.2f" % (tpr))
print("False Positive Rate: %.2f" % (fpr))

Validation
Accuracy: 0.93
True Positive Rate: 0.07
False Positive Rate: 0.06


In [64]:
tp

3

In [38]:
from sklearn.neural_network import MLPClassifier
mlp = MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(10, 10), learning_rate='constant',
       learning_rate_init=0.001, max_iter=500, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

# cross validation
kf = KFold(n_splits=5)
accuracy, tpr, fpr = [], [], []
for train, test in kf.split(x_train):
    mlp.fit(x_train[train, :], y_train[train])
    pred = mlp.predict(x_train[test])
    tn, fp, fn, tp = confusion_matrix(y_train[test], pred).ravel()
    accuracy.append((tp + tn)/(tn + fp + fn + tp))
    tpr.append(tp / (tp + fn))
    fpr.append(fp / (fp + tn))

print(accuracy)
print(tpr)
print(fpr)




[0.9832869080779945, 0.9832869080779945, 0.9767873723305478, 0.9693593314763231, 0.9795539033457249]
[0.9583333333333334, 0.9789473684210527, 0.905982905982906, 0.8876404494382022, 0.9347826086956522]
[0.014271151885830785, 0.016293279022403257, 0.014583333333333334, 0.02327935222672065, 0.016260162601626018]




In [52]:
# Testing 
mlp.fit(x_train, y_train)
pred = mlp.predict(x_test)
tn, fp, fn, tp = confusion_matrix(y_test, pred).ravel()
accuracy = (tp + tn)/(tn + fp + fn + tp)
tpr = tp / (tp + fn)
fpr = fp / (fp + tn)

print("Validation")
print("Accuracy: %.2f" % (accuracy))
print("True Positive Rate: %.2f" % (tpr))
print("False Positive Rate: %.2f" % (fpr))

Validation
Accuracy: 0.97
True Positive Rate: 0.06
False Positive Rate: 0.02




In [36]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(3)

# cross validation
kf = KFold(n_splits=5)
accuracy, tpr, fpr = [], [], []
for train, test in kf.split(x_train):
    knn.fit(x_train[train, :], y_train[train])
    pred = knn.predict(x_train[test])
    tn, fp, fn, tp = confusion_matrix(y_train[test], pred).ravel()
    accuracy.append((tp + tn)/(tn + fp + fn + tp))
    tpr.append(tp / (tp + fn))
    fpr.append(fp / (fp + tn))

print(accuracy)
print(tpr)
print(fpr)


[0.9740018570102136, 0.9702878365831012, 0.9740018570102136, 0.9795728876508821, 0.974907063197026]
[0.9791666666666666, 0.9789473684210527, 0.9658119658119658, 0.9775280898876404, 0.9782608695652174]
[0.026503567787971458, 0.03054989816700611, 0.025, 0.020242914979757085, 0.02540650406504065]


In [37]:
# Testing 
knn.fit(x_train, y_train)
pred = knn.predict(x_test)
tn, fp, fn, tp = confusion_matrix(y_test, pred).ravel()
accuracy = (tp + tn)/(tn + fp + fn + tp)
tpr = tp / (tp + fn)
fpr = fp / (fp + tn)

print("Validation")
print("Accuracy: %.2f" % (accuracy))
print("True Positive Rate: %.2f" % (tpr))
print("False Positive Rate: %.2f" % (fpr))

Validation
Accuracy: 0.96
True Positive Rate: 0.00
False Positive Rate: 0.02
