In [None]:
#######################################
# Yao Li
# yaoli90@illinois.edu
# 2019.10.6
#######################################

import numpy as np
import os
import pyeeg
from sklearn.model_selection import train_test_split

In [None]:
## Load data
# Dataset from http://epileptologie-bonn.de/cms/front_content.php?idcat=193&lang=3
# The dataset is catagrized into 3 classes, i.e. healthy, inter_ictal, and ictal
# and stored in 3 different folders. In each folder, there are 100 recorded eeg
# data stored in .txt files. Each file corresponding to one training/testing
# data.
#
# healthy:     label: 1, Healthy dataset
# inter_ictal: label: 0, Inter-ictal (transition between healthy to seizure)
# ictal:       label:-1, Ictal or seizures
def load_data():
    file_dirs = []
    labels = []
    categories = {'healthy': 1, 'inter_ictal': 0, 'ictal': -1}
    for cat in categories: # loop through all categories
        for file in os.listdir('data/' + cat): # loop through all data files
            # append all file directories in a list
            file_dirs.append('data/'+cat+'/'+file)
            labels.append(categories[cat]) # append the corresponding label

    data = np.zeros((len(file_dirs),4097))
    # Using the file directories list we just created to read data
    for i in range(len(file_dirs)):
        data[i][:] = np.loadtxt(file_dirs[i]) # load data from file
    labels = np.array(labels) # warp as np array
    return data, labels

In [None]:
## Calculate features from raw data
#
# Features:
# DFA: Detrended Fluctuation Analysis
# HFD: Higuchi Fractal Dimension
# SVD_Entropy: SVD Entropy
# Fisher_Information: Fisher Information
# PFD: Petrosian Fractal Dimension
# Spectral_Entropy: Spectral Entropy (Shannon's entropy of RIRs
# detailed explanation: https://www.hindawi.com/journals/cin/2011/406391/
def create_features(data):
    Kmax = 5
    Tau = 4
    DE = 10
    Band = np.arange(1,86)
    Fs = 173
    F = np.zeros((data.shape[0],6))
    for i in range(data.shape[0]): # loop through the rows of raw data
        mat = data[i][:]
        DFA                = pyeeg.dfa(mat)
        HFD                = pyeeg.hfd(mat,Kmax)
        SVD_Entropy        = pyeeg.svd_entropy(mat,Tau,DE)
        Fisher_Information = pyeeg.fisher_info(mat,Tau,DE)
        PFD                = pyeeg.pfd(mat)
        Spectral_Entropy   = pyeeg.spectral_entropy(mat, Band, Fs, Power_Ratio=None)
        F[i][:] = [DFA, HFD, SVD_Entropy, Fisher_Information, PFD, Spectral_Entropy]
    return F

In [2]:
# Load data and labels from files
data, labels = load_data()
# Calculate features from raw data
features = create_features(data)
# Split the dataset into training set and test set. The training set contains 4/5
# of the entire dataset. The test set contains the rest of the 1/5.
X_train, X_test, y_train, y_test = train_test_split(features, labels)
# Print the size of training and test dataset
print('-------------------------------------------------')
print('Dataset:')
print('X_train:', X_train.shape, 'X_test', X_test.shape)
print('-------------------------------------------------')

# Import some models from sklearn package
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
import warnings

# Classifiers we try
names = ["Nearest Neighbors", "Linear SVM", "RBF SVM", "Gaussian Process",
         "Decision Tree", "Random Forest", "Neural Net", "AdaBoost",
         "Naive Bayes", "QDA"]
classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="linear", C=0.025),
    SVC(gamma=2, C=1),
    GaussianProcessClassifier(1.0 * RBF(1.0), warm_start=True),
    DecisionTreeClassifier(max_depth=5),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    MLPClassifier(alpha=1),
    AdaBoostClassifier(),
    GaussianNB(),
    QuadraticDiscriminantAnalysis()]

clf_score=[]
with warnings.catch_warnings():
    warnings.simplefilter("ignore") # ignore a warning message of sklearn
    for name, clf in zip(names, classifiers): # loop through classifiers
        clf.fit(X_train, y_train) # train a classifier
        score = clf.score(X_test, y_test) # evaluate the accuracy
        clf_score.append([score,name])

for score in clf_score:
    print(score)

-------------------------------------------------
Dataset:
X_train: (225, 6) X_test (75, 6)
-------------------------------------------------
[0.8533333333333334, 'Nearest Neighbors']
[0.25333333333333335, 'Linear SVM']
[0.84, 'RBF SVM']
[0.8933333333333333, 'Gaussian Process']
[0.88, 'Decision Tree']
[0.8666666666666667, 'Random Forest']
[0.6533333333333333, 'Neural Net']
[0.84, 'AdaBoost']
[0.7333333333333333, 'Naive Bayes']
[0.8, 'QDA']
