In [33]:
%matplotlib notebook
import glob
import pandas as pd
import mne
import matplotlib.pyplot as plt
from io import StringIO
import mne
from mne.io import read_raw_eeglab, read_epochs_eeglab
import numpy as np
from scipy import signal
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from tqdm import tqdm_notebook
from sklearn.svm import SVR
from sklearn import preprocessing
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVR
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
from sklearn.tree import DecisionTreeClassifier
from keras_tqdm import TQDMNotebookCallback

import autosklearn.regression
import sklearn.model_selection
import sklearn.datasets
import sklearn.metrics
import multiprocessing
from oct2py import octave

from joblib import Parallel, delayed
import multiprocessing
from joblib import wrap_non_picklable_objects
import json
import pickle
import os.path
from mpl_toolkits.mplot3d import axes3d
from math import e

import tensorflow as tf
from tensorflow.keras import layers

In [2]:
features_filename = '53-features-v1.xlsx'

In [3]:
df = pd.read_excel(features_filename, index_col=0)

In [4]:
p1 = np.percentile(df['mep_category_cmap'], 50)
cat = np.ones(len(df['mep_category_cmap'])) * (df['mep_category_cmap'] > p1)
df['mep_category_cmap_across_subjects'] = cat

# Linear regression

In [5]:
x = df.drop(['mep_category_cmap_across_subjects', 
             'mep_area', 
             'mep_category_absolute',
             'mep_category_absolute_binary',
             'mep_category_binary',
             'mep_category_cmap',
             'mep_category_cmap_category',
             'mep_category_cmap_category_binary',
             'mep_category_percentile',
             'mep_size',
             'run',
             'sub',
             'exp',
             'epoch',
             'cmap_min',
             'cmap_max'], axis=1)
y = df['mep_category_cmap_across_subjects']

In [6]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=12)

In [7]:
normalizer = preprocessing.MinMaxScaler()
reg = LinearRegression()
reg.fit(normalizer.fit_transform(x_train), y_train)
y_pred = reg.predict(x_test)

y_pred = y_pred > 0.5
print(accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

0.5142857142857142
[[171   4]
 [166   9]]
             precision    recall  f1-score   support

        0.0       0.51      0.98      0.67       175
        1.0       0.69      0.05      0.10       175

avg / total       0.60      0.51      0.38       350



# Random forest

In [27]:
reg = RandomForestClassifier(n_estimators = 4000, random_state = 42)
reg.fit(normalizer.fit_transform(x_train), y_train)
y_pred = reg.predict(normalizer.transform(x_test)) > 0.5
print(accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

0.66
[[109  66]
 [ 53 122]]
             precision    recall  f1-score   support

        0.0       0.67      0.62      0.65       175
        1.0       0.65      0.70      0.67       175

avg / total       0.66      0.66      0.66       350



In [32]:
from sklearn.tree import export_graphviz
export_graphviz(reg.estimators_[5], out_file='tree.dot', 
                feature_names = x_train.columns,
                class_names = ['small', 'large'],
                rounded = True, proportion = False, 
                precision = 2, filled = True)

# Convert to png using system command (requires Graphviz)
from subprocess import call
call(['dot', '-Tpng', 'tree.dot', '-o', 'tree.png', '-Gdpi=600'])

# Display in jupyter notebook
# from IPython.display import Image
# Image(filename = 'tree.png')

0

# Gradient boosting

In [15]:
reg = GradientBoostingClassifier(n_estimators = 2000, random_state = 42)
reg.fit(x_train, y_train)
y_pred = reg.predict(x_test) > 0.5
print(accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

0.6485714285714286
[[112  63]
 [ 60 115]]
             precision    recall  f1-score   support

        0.0       0.65      0.64      0.65       175
        1.0       0.65      0.66      0.65       175

avg / total       0.65      0.65      0.65       350



# SVM

In [14]:
reg = SVC(kernel='rbf')
reg.fit(x_train, y_train)
y_pred = reg.predict(x_test) > 0.5
print(accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

0.5142857142857142
[[  9 166]
 [  4 171]]
             precision    recall  f1-score   support

        0.0       0.69      0.05      0.10       175
        1.0       0.51      0.98      0.67       175

avg / total       0.60      0.51      0.38       350



# Decision tree

In [8]:
reg = DecisionTreeClassifier()
reg.fit(x_train, y_train)
y_pred = reg.predict(x_test) > 0.5
print(accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

0.5828571428571429
[[108  67]
 [ 79  96]]
             precision    recall  f1-score   support

        0.0       0.58      0.62      0.60       175
        1.0       0.59      0.55      0.57       175

avg / total       0.58      0.58      0.58       350



# Keras

In [9]:
print(tf.version.VERSION)
print(tf.keras.__version__)

2.0.0-beta1
2.2.4-tf


In [47]:
cb = TQDMNotebookCallback(show_inner=False)
cb.on_train_batch_begin = cb.on_batch_begin
cb.on_train_batch_end = cb.on_batch_end

In [48]:
classifier = tf.keras.Sequential()
classifier.add(tf.keras.layers.Dense(64, activation='relu', input_dim=x_train.shape[1]))
classifier.add(tf.keras.layers.Dense(64, activation='relu'))
classifier.add(tf.keras.layers.Dense(1, activation='sigmoid'))
classifier.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
classifier.fit(normalizer.fit_transform(x_train), y_train, batch_size=32, epochs=100, callbacks=[cb], verbose=0)

HBox(children=(IntProgress(value=0, description='Training', style=ProgressStyle(description_width='initial')),…

<tensorflow.python.keras.callbacks.History at 0x7f7e945e86a0>

In [49]:
y_pred = classifier.predict(normalizer.transform(x_test))
y_pred = (y_pred > 0.5)
print(accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

0.5114285714285715
[[90 85]
 [86 89]]
             precision    recall  f1-score   support

        0.0       0.51      0.51      0.51       175
        1.0       0.51      0.51      0.51       175

avg / total       0.51      0.51      0.51       350

