In [7]:
import numpy as np

from sklearn import svm
from sklearn import tree
from sklearn import linear_model
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.ensemble import VotingClassifier, AdaBoostClassifier, BaggingClassifier, RandomForestClassifier, ExtraTreesClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
from sklearn.cross_validation import cross_val_score
from sklearn.decomposition import PCA 
from sklearn.utils import shuffle

import tensorflow as tf

from dataio import readdata, readlabels, writedata

In [8]:
#Read training
train_dataset = readdata("./curated/train_dataset")
train_labels = readlabels("./curated/train_labels")
#Read validation
#valid_dataset = readdata("./curated/valid_dataset")
#valid_labels = readlabels("./curated/valid_labels")
#Read test
test_dataset = readdata("./curated/test_dataset")
test_labels = readlabels("./curated/test_labels")

print('Training:', train_dataset.shape, train_labels.shape)
#print('Validation:', valid_dataset.shape, valid_labels.shape)
print('Testing:', test_dataset.shape, test_labels.shape)

Training: (1106, 470, 8) (1106,)
Testing: (193, 470, 8) (193,)


In [9]:
n_input = 8*470 # EEG data input (8 channels * 1130 sample points)
n_classes = 5 # EEG total classes ("nothing", "up", "down", "left", "right")

In [10]:
def onehot(data, n_classes):
    b = np.zeros((data.size, n_classes))
    b[np.arange(data.size), data] = 1
    return(b)

train_dataset.shape = (train_dataset.shape[0], n_input)
#valid_dataset.shape = (valid_dataset.shape[0], n_input)
test_dataset.shape = (test_dataset.shape[0], n_input)
#train_labels = onehot(train_labels, n_classes)
#valid_labels = onehot(valid_labels, n_classes)
#test_labels = onehot(test_labels, n_classes)

print(train_dataset.shape)
print(test_dataset.shape)

(1106, 3760)
(193, 3760)


In [11]:
######## MODELS #########
#KNN
knn = KNeighborsClassifier(
    algorithm="auto", 
    weights="uniform", 
    n_neighbors=15)
knn.fit(train_dataset, train_labels)
knn_pred = knn.predict(test_dataset)
knn_acc = accuracy_score(test_labels, knn_pred)

#LDA
lda = LinearDiscriminantAnalysis(
    solver="svd",
    store_covariance=False,
    tol = 0.0001)
lda.fit(train_dataset, train_labels)
lda_pred = lda.predict(test_dataset)
lda_acc = accuracy_score(test_labels, lda_pred)

#Random Forests
forest = ExtraTreesClassifier(n_estimators = 1000)
forest.fit(train_dataset, train_labels)
forest_pred = forest.predict(test_dataset)
forest_acc = accuracy_score(test_labels, forest_pred)

#SVM
svm_mod = svm.LinearSVC(
    C=1.0,
    penalty="l2",
    loss="squared_hinge",
    tol=0.0001)
svm_mod.fit(train_dataset, train_labels)
svm_pred = svm_mod.predict(test_dataset)
svm_acc = accuracy_score(test_labels, svm_pred)

#Ada
ada = AdaBoostClassifier(n_estimators = 50)
ada.fit(train_dataset, train_labels)
ada_pred = ada.predict(test_dataset)
ada_acc = accuracy_score(test_labels, ada_pred)

#Logistic
lgr = linear_model.LogisticRegression()
lgr.fit(train_dataset, train_labels)
lgr_pred = lgr.predict(test_dataset)
lgr_acc = accuracy_score(test_labels, lgr_pred)

print("Knn Acc: ", knn_acc)
print("Lda Acc: ", lda_acc)
print("For Acc: ", forest_acc)
print("Svm Acc: ", svm_acc)
print("Ada Acc: ", ada_acc)
print("Lgr Acc: ", lgr_acc)
#print("")
#print(knn_pred)
#print(lda_pred)
#print(forest_pred)
#print(svm_pred)
#print(ada_pred)
#print(lgr_pred)
#print(test_labels)

Knn Acc:  0.689119170984
Lda Acc:  0.274611398964
For Acc:  0.720207253886
Svm Acc:  0.502590673575
Ada Acc:  0.430051813472
Lgr Acc:  0.512953367876




In [12]:
#Voting
vot = VotingClassifier(estimators=[
        ("KNN", knn),
        ("LDA", lda),
        ("FOR", forest),
        ("SVM", svm_mod),
        ("ADA", ada),
        ("LGR", lgr)
    ], voting = "hard", weights=[3,0,3,1,0,1])
vot.fit(train_dataset, train_labels)
vot_pred = vot.predict(test_dataset)
print(accuracy_score(test_labels, vot_pred))
print(vot_pred)
print(test_labels)

0.709844559585
[1 3 0 3 3 1 1 1 0 3 2 0 3 1 1 1 2 3 0 0 4 4 0 1 1 2 0 2 1 3 2 1 2 0 1 2 1
 0 0 0 2 1 3 4 2 0 1 0 3 1 3 4 2 3 3 0 4 4 4 1 3 3 0 1 0 0 3 1 4 1 4 1 4 4
 0 2 0 0 3 2 3 0 1 1 0 4 0 1 3 3 4 1 2 1 3 2 1 1 2 1 4 0 1 2 2 0 2 1 2 2 2
 2 3 2 0 2 1 0 1 0 3 2 0 2 3 2 2 4 1 4 0 3 1 2 4 1 3 0 0 3 0 3 3 0 1 1 2 0
 0 2 0 0 4 1 0 2 3 1 4 1 1 0 3 4 1 4 4 2 2 3 1 1 1 3 0 2 2 2 3 2 0 0 4 2 4
 3 4 1 1 1 3 0 4]
[2 3 0 3 3 3 1 3 4 4 4 0 3 3 1 1 2 3 0 2 3 4 0 2 1 2 0 2 1 3 2 1 2 0 3 2 2
 4 4 3 2 1 3 4 2 0 1 0 3 2 4 0 2 3 3 0 4 4 2 2 3 3 0 1 0 0 3 3 4 1 4 1 0 4
 0 2 4 3 3 2 3 4 1 1 4 4 0 1 3 3 3 1 2 1 3 2 3 1 2 1 4 0 1 2 3 0 2 2 1 4 2
 2 1 3 0 1 2 3 2 0 3 2 0 2 4 3 2 4 1 3 0 4 3 4 4 1 4 0 0 3 0 3 4 0 1 1 2 0
 0 2 0 0 4 1 0 2 4 1 4 1 1 0 3 4 1 4 4 4 2 3 2 1 1 4 0 4 4 2 4 2 4 3 4 2 4
 3 4 1 1 1 3 0 0]




In [None]:
#PCA 
pca = PCA(n_components=60)
train_dataset = pca.fit_transform(train_dataset)
test_dataset = pca.fit_transform(test_dataset)
print(train_dataset.shape)
print(test_dataset.shape)

In [None]:
#SGD
clf = SGDClassifier(loss="hinge", penalty="l2")
clf.fit(train_dataset, train_labels)
clf_pred = clf.predict(test_dataset)
accuracy_score(test_labels, clf_pred)

In [None]:
#Logistic
clf = linear_model.Perceptron(n_iter=100)
clf.fit(train_dataset, train_labels)
clf_pred = clf.predict(test_dataset)
clf_acc = accuracy_score(test_labels, clf_pred)
print(clf_acc)

In [None]:
#Bags
knn = BaggingClassifier(KNeighborsClassifier(
    algorithm="auto", 
    weights="uniform", 
    n_neighbors=15), max_samples=0.5, max_features=0.5)
knn.fit(train_dataset, train_labels)
knn_pred = knn.predict(test_dataset)
knn_acc = accuracy_score(test_labels, knn_pred)
print(knn_acc)