In [58]:
import numpy as np
import math
import h5py
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier

from sklearn import metrics

In [59]:
A01T = h5py.File("project_datasets/A01T_slice.mat", "r")
X = np.copy(A01T["image"])
y = np.copy(A01T["type"])
image = A01T['image']
y = y[0,0:image.shape[0]:1]
y = np.asarray(y, dtype=np.int32)


In [60]:
print(X.shape)
print(y.shape)

"""
 288 trials; each trial has corresponding EEG data from 25 
 electrodes over 313 time bins
"""

(288, 25, 313)
(288,)


'\n 288 trials; each trial has corresponding EEG data from 25 \n electrodes over 313 time bins\n'

In [61]:
#Generate train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [62]:
print(X_train.shape)
print(X_test.shape)

print(y_train.shape)
print(y_test.shape)

(230, 25, 313)
(58, 25, 313)
(230,)
(58,)


In [63]:
#remove NaNs
def get_indices_to_remove(X):
    indices_to_remove = set()
    trial_nums, electrode_nums, timestamps = X.shape
    for trial in range(trial_nums):
        for electrode in range(electrode_nums):
            for t in range(timestamps):
                if(math.isnan(X[trial][electrode][t])):
                    indices_to_remove.add(trial)
            
    return indices_to_remove

In [64]:
#handle train sets
indices_to_remove_train = list(get_indices_to_remove(X_train))
print(get_indices_to_remove(X_train))
print("train indices", indices_to_remove_train)
X_train = np.delete(X_train, indices_to_remove_train, axis=0)
y_train = np.delete(y_train, indices_to_remove_train, axis=0)

#handle test sets
indices_to_remove_test = list(get_indices_to_remove(X_test))
print("test indices", indices_to_remove_test)
X_test = np.delete(X_test, indices_to_remove_test, axis=0)
y_test = np.delete(y_test, indices_to_remove_test, axis=0)

print(X_train.shape)
print(y_train.shape)

print(X_test.shape)
print(y_test.shape)

set()
train indices []
test indices [14]
(230, 25, 313)
(230,)
(57, 25, 313)
(57,)


In [66]:
nsamples, nx, ny = X_train.shape
X_train_d2 = X_train.reshape((nsamples,nx*ny))


nsamples, nx, ny = X_test.shape
X_test_d2 = X_test.reshape((nsamples, nx*ny))



# Random Classifier Baseline

In [75]:
random_class_assignments_train = np.random.choice([769, 770, 771, 772], size=len(y_train))
random_class_assignments_test = np.random.choice([769, 770, 771, 772], size=len(y_test))

f1_train = metrics.f1_score(y_train, random_class_assignments_train, average='macro')
f1_test = metrics.f1_score(y_test, random_class_assignments_test, average='macro')

print("Random classifier train F1: ", f1_train)
print("Random classifier test F1: ", f1_test)

Random classifier train F1:  0.24240313464
Random classifier test F1:  0.20953886693


# Basic MLP Baseline

In [76]:
hidden_config = (100,)
clf = MLPClassifier(activation='relu', alpha=1e-05, batch_size='auto',
       beta_1=0.9, beta_2=0.999, early_stopping=False,
       epsilon=1e-08, hidden_layer_sizes=hidden_config, learning_rate='constant',
       learning_rate_init=0.001, max_iter=300, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True,
       solver='adam', tol=0.0001, validation_fraction=0.1, verbose=False,
       warm_start=False)

clf.fit(X_train_d2, y_train)

MLPClassifier(activation='relu', alpha=1e-05, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=300, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True,
       solver='adam', tol=0.0001, validation_fraction=0.1, verbose=False,
       warm_start=False)

In [78]:
y_pred = clf.predict(X_test_d2)

f1 = metrics.f1_score(y_test, y_pred, average='macro')
print("Dummy MLP F1-score: ", f1)

Dummy MLP F1-score:  0.412240537241
