In [0]:
import pandas as pd
import numpy as np
import scipy.io as sio
import matplotlib.pyplot
%matplotlib inline
import seaborn as sns
import tensorflow as tf
from sklearn.metrics import classification_report, confusion_matrix

In [0]:
from google.colab import files

In [0]:
#files.upload()

In [0]:
data = sio.loadmat('EEG05_Data.mat')

**(1) First, preprocessing**

In [0]:
type(data)

dict

In [0]:
print(data.keys())

dict_keys(['__header__', '__version__', '__globals__', 'ElectrodeLabels', 'Fs', 'StimulusCode_Collection1', 'eeg_data_Collection1', 'StimulusCode_Collection2', 'eeg_data_Collection2', 'StimulusCode_Collection3', 'eeg_data_Collection3'])


In [0]:
#get the dat into variables to make dataframes
stim_1 = data['StimulusCode_Collection1'] #this is the training data for stimulus

In [0]:
stim_1.shape

(19696, 1)

In [0]:
###convert stim_1 from uint8 to int32
stim_1 = np.array(stim_1, dtype = np.int32)

In [0]:
eeg_1 = data['eeg_data_Collection1'] #this is the training data for the eeg models

In [0]:
#now the rest of the testing data
stim_2 = data['StimulusCode_Collection2']
eeg_2 = data['eeg_data_Collection2']

#Save this for later if necessary for testing
#stim_3 = data['StimulusCode_Collection3']
#eeg_3 = data['eeg_data_Collection3']

In [0]:
type(eeg_1)
eeg_1.shape

(64, 19696)

In [0]:
#now flip the matrices to columns = electrodes
eeg_1t = eeg_1.transpose()
eeg_1t.shape

(19696, 64)

In [0]:
#now the rest of the transposes
eeg_2t = eeg_2.transpose()
#eeg_3t = eeg_3.transpose()

In [0]:
#now convert to dataframes
data_train = pd.DataFrame(eeg_1t)
data_train.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,54,55,56,57,58,59,60,61,62,63
0,-960,-768,-752,-1200,-624,-608,80,-480,-656,-720,...,0,-496,-976,-688,-768,-336,-496,-672,-944,128
1,128,-48,-192,-864,-368,-368,416,-112,-304,-432,...,-1264,-1040,-1408,-1040,-1280,-1168,-768,-976,-1312,-528
2,-528,-272,-224,-832,-592,-592,-80,-496,-464,-544,...,-960,-1008,-1296,-656,-848,-800,-624,-768,-976,-480
3,-128,160,240,-368,-80,-48,144,256,32,-144,...,-384,-448,-864,-480,-640,-544,-368,-560,-688,-80
4,656,208,112,-528,-64,-48,352,208,-240,-448,...,-752,-1264,-1264,-528,-896,-752,-512,-672,-896,-416


In [0]:
#then add the stim data as column 64
idx = 64
col_name = 'stim_1'
data_train.insert(idx, column = col_name, value=stim_1)
data_train.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,55,56,57,58,59,60,61,62,63,stim_1
0,-960,-768,-752,-1200,-624,-608,80,-480,-656,-720,...,-496,-976,-688,-768,-336,-496,-672,-944,128,0
1,128,-48,-192,-864,-368,-368,416,-112,-304,-432,...,-1040,-1408,-1040,-1280,-1168,-768,-976,-1312,-528,0
2,-528,-272,-224,-832,-592,-592,-80,-496,-464,-544,...,-1008,-1296,-656,-848,-800,-624,-768,-976,-480,0
3,-128,160,240,-368,-80,-48,144,256,32,-144,...,-448,-864,-480,-640,-544,-368,-560,-688,-80,0
4,656,208,112,-528,-64,-48,352,208,-240,-448,...,-1264,-1264,-528,-896,-752,-512,-672,-896,-416,0


In [0]:
#then set up the first test set (set = eeg_2t)
data_test2 = pd.DataFrame(eeg_2t)
data_test2.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,54,55,56,57,58,59,60,61,62,63
0,800,64,256,64,-16,16,-1120,1472,832,592,...,1376,1072,1152,848,1264,1088,1376,1088,1232,1408
1,832,208,400,224,368,432,-576,1360,1008,784,...,2416,736,992,992,1568,1552,1408,1152,1344,1392
2,1040,640,720,544,800,864,-256,1760,1360,1168,...,2672,2256,2208,1808,2352,2736,2448,2208,2128,2320
3,2112,1280,1440,1232,1392,1440,448,2400,1952,1824,...,2528,1424,1648,1584,2272,2512,1968,1728,1920,1920
4,1312,720,1056,1040,1120,1152,-112,1824,1472,1488,...,1776,1136,1488,1280,1728,1200,1808,1536,1664,2000


In [0]:
#This is being ignored so we can process the direct way rather than the mon-dataframe way
#once again add the stim data
#col_name2 = 'stim_2' #index is still 64
#data_test2.insert(idx, column = col_name2, value=stim_2)
#data_test2.head()

**(1b) Scaler **

In [0]:
#scale the data
from sklearn.preprocessing import StandardScaler

In [0]:
scaler = StandardScaler()

In [0]:
scaler.fit(data_train.drop('stim_1', axis = 1))

StandardScaler(copy=True, with_mean=True, with_std=True)

In [0]:
type(scaler)

sklearn.preprocessing.data.StandardScaler

In [0]:
scaled_feat = scaler.transform(data_train.drop('stim_1', axis = 1))

In [0]:
#now make the dataframe using scaled values
eeg_feat = pd.DataFrame(scaled_feat, columns = data_train.columns[:-1].astype(str))#feature_columns requires str type column names
eeg_feat.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,54,55,56,57,58,59,60,61,62,63
0,-0.675027,-0.543574,-0.522744,-0.849665,-0.398053,-0.415461,0.056488,-0.386308,-0.496252,-0.549041,...,-0.060696,-0.421182,-0.674631,-0.617957,-0.608228,-0.307298,-0.507508,-0.505991,-0.73088,0.027824
1,0.075615,-0.063078,-0.140521,-0.619793,-0.239758,-0.267249,0.307521,-0.118157,-0.246897,-0.348919,...,-0.996003,-0.820855,-0.983922,-0.876909,-0.968413,-0.907614,-0.715145,-0.735998,-1.000946,-0.494787
2,-0.376978,-0.212566,-0.162363,-0.5979,-0.378266,-0.40558,-0.063051,-0.397966,-0.36024,-0.426744,...,-0.771056,-0.797345,-0.903736,-0.594416,-0.664507,-0.64209,-0.60522,-0.578625,-0.754364,-0.456547
3,-0.101007,0.075732,0.154336,-0.280458,-0.061676,-0.069633,0.104304,0.149993,-0.008876,-0.148797,...,-0.34484,-0.385916,-0.594444,-0.46494,-0.518181,-0.457377,-0.409797,-0.421251,-0.543008,-0.137882
4,0.439897,0.107765,0.066971,-0.389921,-0.051782,-0.069633,0.259705,0.115017,-0.201559,-0.360037,...,-0.617145,-0.985426,-0.880825,-0.500252,-0.698274,-0.607456,-0.519722,-0.505991,-0.695654,-0.405561


In [0]:
#Now we need to scale the testing data (data_test2)
#gonna just work on it directly instead of doing the add remove stim_2 approach to save time
scaler2 = StandardScaler()

In [0]:
scaler2.fit(data_test2)

StandardScaler(copy=True, with_mean=True, with_std=True)

In [0]:
scaled_test2 = scaler.transform(data_test2)

In [0]:
eeg_test2 = pd.DataFrame(scaled_test2, columns = data_test2.columns.astype(str))
eeg_test2.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,54,55,56,57,58,59,60,61,62,63
0,0.539246,0.011665,0.165257,0.015092,-0.022102,-0.03011,-0.840058,1.036055,0.55784,0.362626,...,0.957487,0.730817,0.848917,0.512015,0.821259,0.720164,0.921523,0.825631,0.86603,1.047552
1,0.561324,0.107765,0.263543,0.124555,0.215341,0.226791,-0.433624,0.954444,0.682517,0.49604,...,1.727044,0.483961,0.734365,0.61795,1.035119,1.054955,0.945951,0.874054,0.948224,1.034806
2,0.704829,0.396062,0.481956,0.34348,0.482464,0.493572,-0.194545,1.245911,0.931872,0.76287,...,1.916473,1.600694,1.604963,1.218248,1.586654,1.90925,1.739858,1.673027,1.523581,1.774109
3,1.444432,0.82317,0.973385,0.814171,0.848521,0.849281,0.331429,1.71226,1.351242,1.218703,...,1.809919,0.98943,1.20403,1.05346,1.530375,1.747626,1.373439,1.309858,1.370935,1.455444
4,0.892489,0.449451,0.71129,0.682815,0.680332,0.671427,-0.086959,1.292546,1.011213,0.985228,...,1.25347,0.777838,1.089477,0.82982,1.147677,0.800976,1.2513,1.16459,1.183063,1.519177


**(2) Build the DNN calssifier**

In [0]:
X = eeg_feat
y = data_train['stim_1'] #y has been converted to type int32 as required by the classifier

feat_cols = []
for col in eeg_feat.columns:
  feat_cols.append(tf.feature_column.numeric_column(col))

In [0]:
type(y)

pandas.core.series.Series

In [0]:
classifier = tf.estimator.DNNClassifier(hidden_units = [100], n_classes = 3, feature_columns = feat_cols)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmpym59k0bm', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fdc8fef4978>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


**(3) Train the classifier**

In [0]:
input_fun = tf.estimator.inputs.pandas_input_fn(x = X, y = y, batch_size = 50, shuffle = True)

In [0]:
classifier.train(input_fn = input_fun, steps=1000) #actually trains the classifier

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpym59k0bm/model.ckpt-544
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 544 into /tmp/tmpym59k0bm/model.ckpt.
INFO:tensorflow:loss = 78.10813, step = 545
INFO:tensorflow:global_step/sec: 84.8928
INFO:tensorflow:loss = 29.67332, step = 645 (1.184 sec)
INFO:tensorflow:global_step/sec: 90.3185
INFO:tensorflow:loss = 38.165367, step = 745 (1.114 sec)
INFO:tensorflow:global_step/sec: 91.0997
INFO:tensorflow:loss = 43.754623, step = 845 (1.088 sec)
INFO:tensorflow:Saving checkpoints for 938 into /tmp/tmpym59k0bm/model.ckpt.
INFO:tensorflow:Loss for final step: 40.500793.


<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x7fdc8fef4128>

In [0]:
#predict results using the model on the test set
pred_fn = tf.estimator.inputs.pandas_input_fn(x=eeg_test2, batch_size=len(eeg_test2), shuffle=False)

In [0]:
note_predictions = list(classifier.predict(input_fn=pred_fn))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpym59k0bm/model.ckpt-938
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


In [0]:
final_preds = []
for pred in note_predictions:
  final_preds.append(pred['class_ids'][0])

**(4) Report results**

In [0]:
print(confusion_matrix(stim_2, final_preds))

[[6048 2080 1728]
 [2988 1299  961]
 [1616 1863 1113]]


In [0]:
print(classification_report(stim_2, final_preds))

             precision    recall  f1-score   support

          0       0.57      0.61      0.59      9856
          1       0.25      0.25      0.25      5248
          2       0.29      0.24      0.27      4592

avg / total       0.42      0.43      0.42     19696



In [0]:
#this was all collected at 200Hz
#so as live data is put in, the classifier would use a running average, if say over the last 100 samples (0.5 seconds) the average was strongly near 1,
#then that would be classified as intent to go right, and if the average prediction was near 2 then that would be left; if it was near 0 then that would
#be classified as no control command, etc.