In [81]:
import numpy as np
import pandas as pd
import sklearn

from sklearn.decomposition import PCA
from sklearn.utils import shuffle

In [52]:
# Load data with specific id number
def load_data(id_num, not_id=False):
    data = np.load("eeg_data.npz")
    X = data['x']
    y = data['y']
    
    if not_id:
        index = [i for i in range(len(y)) if y[i] != id_num]
    else:
        index = [i for i in range(len(y)) if y[i] == id_num]
    
    output_data = []
    output_label = []
    
    for i in index:
        output_data.append(X[i])
        output_label.append(y[i])
        
    return output_data, output_label

In [56]:
# Compare seeing one number with rest 
def binary_all_channel(data, label, assigned_y):
    if len(data) != len(label):
        print("Something is wrong here")
        return
    
    output_data = []
    output_label = []
    
    for i in range(len(label)):
        output_label.append([assigned_y])
  
        feature = np.concatenate(data[i])
        feature = np.nan_to_num(feature)
        output_data.append(feature)
        
    return output_data, output_label    

In [57]:
def multiclass_all_channel(data, label, id_num):
    if len(data) != len(label):
        print("Something is wrong here")
        return
    
    output_data = []
    output_label = []
    
    for i in range(len(label)):
        if label[i] != id_num and label[i] != -1:
            print("Something is wrong here")
            break
        output_label.append([label[i]])
        feature = np.concatenate(data[i])
        feature = np.nan_to_num(feature)
        output_data.append(feature)
        
    return output_data, output_label    

In [5]:
def least_squares(X, y):
    w = np.linalg.inv((X.transpose() @ X)) @ (X.transpose() @ y)
    return w

In [6]:
def reg_least_squares(X, y, k):
    u, s, vh = np.linalg.svd(X, full_matrices=True)
    if k == -2:
        s_ridged = [i/(i**2+0) for i in s]
    else:
        s_ridged = [i/(i**2+2**k) for i in s]
    
    s_matrix_ridged = np.zeros((vh.shape[0], u.shape[0]), float)
    np.fill_diagonal(s_matrix_ridged, s_ridged)
    
    w = vh.transpose() @ s_matrix_ridged @ u.transpose() @ y
#     w = np.linalg.inv((X.transpose() @ X)) @ (X.transpose() @ y)
    return w

In [7]:
def predict(X, w, mode):
    raw_val = X.transpose() @ w
    
    if mode == "binary":
        if raw_val >= 0:
            return 1
        if raw_val < 0:
            return -1
    if mode == "multiclass":
        return round(raw_val[0])

In [8]:
def cross_val(X, y, batch_size, classifier):
    error_arr = []
    subset_num = int(len(X)/batch_size)-1
    for i in range(subset_num):
        error = 0
        X_test = X[i*batch_size: (i+1)*batch_size]
        y_test = y[i*batch_size: (i+1)*batch_size]
        X_train = np.concatenate((X[0: i*batch_size], X[(i+1)*batch_size: len(X)]))
        y_train = np.concatenate((y[0: i*batch_size], y[(i+1)*batch_size: len(y)]))

        w = least_squares(X_train, y_train)
        for i in range(len(X_test)):
            result = predict(X_test[i], w, classifier)
            if result != y_test[i]:
                error = error + 1
        error_rate = error/batch_size
        error_arr.append(error_rate)
    
    print ("Error rate of each iteration: " + str(error_arr))
    print ("Average error rate:" + str(np.average(error_arr)))

In [13]:
def compute_pca(data):
    pca = PCA()
    pca_data = pca.fit_transform(data)
    return pca_data

In [83]:
# Classify rest and seeing any number
class_1 = -1
class_2 = -1

data_rest, label_rest = load_data(class_1)
data_rest, label_rest = binary_all_channel(data_rest, label_rest, 1)

data_4, label_4 = load_data(class_2, not_id=True)
data_4, label_4 = binary_all_channel(data_4, label_4, -1)

X = np.concatenate((data_rest, data_4))
y = np.concatenate((label_rest, label_4))

X_normalized = sklearn.preprocessing.normalize(X, norm='l2')
X_pca = compute_pca(X_normalized)

all_data, all_label = shuffle(X_pca, y)

  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


In [84]:
cross_val(all_data, all_label, int(len(all_data)/10), "binary")

Error rate of each iteration: [0.012903225806451613, 0.012903225806451613, 0.012096774193548387, 0.010483870967741936, 0.00967741935483871, 0.012903225806451613, 0.016129032258064516, 0.01129032258064516, 0.012096774193548387]
Average error rate:0.012275985663082438


In [82]:
# Classify rest and a number n (0-9)
for i in range(10):
    data_rest, label_rest = load_data(-1)
    data_rest, label_rest = binary_all_channel(data_rest, label_rest, 1)

    data_num, label_num = load_data(i)
    data_num, label_num = binary_all_channel(data_num, label_num, -1)


    X = np.concatenate((data_rest, data_num))
    y = np.concatenate((label_rest, label_num))

    X_normalized = sklearn.preprocessing.normalize(X, norm='l2')
    X_pca = compute_pca(X_normalized)

    all_data, all_label = shuffle(X_pca, y)
    
    print("Digit" + str(i))
    cross_val(all_data, all_label, int(len(all_data)/10), "binary")

  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


Digit0
Error rate of each iteration: [0.050359712230215826, 0.02158273381294964, 0.02877697841726619, 0.02877697841726619, 0.03597122302158273, 0.03597122302158273, 0.03597122302158273, 0.02158273381294964, 0.014388489208633094]
Average error rate:0.030375699440447643


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


Digit1
Error rate of each iteration: [0.014285714285714285, 0.02142857142857143, 0.014285714285714285, 0.02857142857142857, 0.05, 0.014285714285714285, 0.06428571428571428, 0.02857142857142857, 0.02857142857142857]
Average error rate:0.029365079365079365


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


Digit2
Error rate of each iteration: [0.03676470588235294, 0.03676470588235294, 0.014705882352941176, 0.022058823529411766, 0.014705882352941176, 0.007352941176470588, 0.022058823529411766, 0.04411764705882353, 0.029411764705882353]
Average error rate:0.025326797385620915


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


Digit3
Error rate of each iteration: [0.022058823529411766, 0.04411764705882353, 0.04411764705882353, 0.014705882352941176, 0.04411764705882353, 0.022058823529411766, 0.022058823529411766, 0.03676470588235294, 0.03676470588235294]
Average error rate:0.031862745098039214


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


Digit4
Error rate of each iteration: [0.037037037037037035, 0.02962962962962963, 0.022222222222222223, 0.022222222222222223, 0.02962962962962963, 0.014814814814814815, 0.02962962962962963, 0.022222222222222223, 0.022222222222222223]
Average error rate:0.02551440329218107


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


Digit5
Error rate of each iteration: [0.04964539007092199, 0.0070921985815602835, 0.02127659574468085, 0.02127659574468085, 0.028368794326241134, 0.0, 0.03546099290780142, 0.05673758865248227, 0.028368794326241134]
Average error rate:0.027580772261623324


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


Digit6
Error rate of each iteration: [0.07042253521126761, 0.04225352112676056, 0.028169014084507043, 0.06338028169014084, 0.04929577464788732, 0.07042253521126761, 0.09154929577464789, 0.035211267605633804, 0.04225352112676056]
Average error rate:0.054773082942097026


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


Digit7
Error rate of each iteration: [0.007518796992481203, 0.03007518796992481, 0.022556390977443608, 0.03007518796992481, 0.03007518796992481, 0.05263157894736842, 0.045112781954887216, 0.022556390977443608, 0.022556390977443608]
Average error rate:0.029239766081871343


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


Digit8
Error rate of each iteration: [0.02158273381294964, 0.03597122302158273, 0.007194244604316547, 0.04316546762589928, 0.02877697841726619, 0.03597122302158273, 0.014388489208633094, 0.03597122302158273, 0.02877697841726619]
Average error rate:0.027977617905675458
Digit9
Error rate of each iteration: [0.014388489208633094, 0.014388489208633094, 0.04316546762589928, 0.014388489208633094, 0.02158273381294964, 0.02877697841726619, 0.04316546762589928, 0.014388489208633094, 0.014388489208633094]
Average error rate:0.023181454836131096


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


In [145]:
# Classify multiple class (0-9 and rest)

X, y = load_data(-1)
X, y = multiclass_all_channel(X, y, -1)

for i in range(0, 9):
    data_num, label_num = load_data(i)
    data_num, label_num = multiclass_all_channel(data_num, label_num, i)
    
    X = np.concatenate((X, data_num))
    y = np.concatenate((y, label_num))
    
X_normalized = sklearn.preprocessing.normalize(X, norm='l2')
X_pca = compute_pca(X_normalized)

all_data, all_label = shuffle(X_pca, y)
cross_val(all_data, all_label, 1000, "multiclass")

  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


Error rate of each iteration: [0.889, 0.907, 0.892, 0.872, 0.908, 0.897, 0.885, 0.906, 0.88, 0.892]
Average error rate:0.8928


In [143]:
len(X)

11167

In [107]:
len([i for i in y if i[0] == -1])

159