In [51]:
import numpy as np
import pandas as pd
import sklearn

from sklearn.decomposition import PCA
from sklearn.utils import shuffle

In [52]:
# Load data with specific id number
def load_data(id_num, not_id=False):
    data = np.load("eeg_data.npz")
    X = data['x']
    y = data['y']
    
    if not_id:
        index = [i for i in range(len(y)) if y[i] != id_num]
    else:
        index = [i for i in range(len(y)) if y[i] == id_num]
    
    output_data = []
    output_label = []
    
    for i in index:
        output_data.append(X[i])
        output_label.append(y[i])
        
    return output_data, output_label

In [56]:
# Compare seeing one number with rest 
def binary_all_channel(data, label, assigned_y):
    if len(data) != len(label):
        print("Something is wrong here")
        return
    
    output_data = []
    output_label = []
    
    for i in range(len(label)):
        output_label.append([assigned_y])
  
        feature = np.concatenate(data[i])
        feature = np.nan_to_num(feature)
        output_data.append(feature)
        
    return output_data, output_label    

In [57]:
def multiclass_all_channel(data, label, id_num):
    if len(data) != len(label):
        print("Something is wrong here")
        return
    
    output_data = []
    output_label = []
    
    for i in range(len(label)):
        if label[i] != id_num and label[i] != -1:
            print("Something is wrong here")
            break
        output_label.append([label[i]])
        feature = np.concatenate(data[i])
        feature = np.nan_to_num(feature)
        output_data.append(feature)
        
    return output_data, output_label    

In [5]:
def least_squares(X, y):
    w = np.linalg.inv((X.transpose() @ X)) @ (X.transpose() @ y)
    return w

In [6]:
def reg_least_squares(X, y, k):
    u, s, vh = np.linalg.svd(X, full_matrices=True)
    if k == -2:
        s_ridged = [i/(i**2+0) for i in s]
    else:
        s_ridged = [i/(i**2+2**k) for i in s]
    
    s_matrix_ridged = np.zeros((vh.shape[0], u.shape[0]), float)
    np.fill_diagonal(s_matrix_ridged, s_ridged)
    
    w = vh.transpose() @ s_matrix_ridged @ u.transpose() @ y
#     w = np.linalg.inv((X.transpose() @ X)) @ (X.transpose() @ y)
    return w

In [7]:
def predict(X, w, mode):
    raw_val = X.transpose() @ w
    
    if mode == "binary":
        if raw_val >= 0:
            return 1
        if raw_val < 0:
            return -1
    if mode == "multiclass":
        return round(raw_val[0])

In [8]:
def cross_val(X, y, batch_size, classifier):
    error_arr = []
    subset_num = int(len(X)/batch_size)-1
    for i in range(subset_num):
        error = 0
        X_test = X[i*batch_size: (i+1)*batch_size]
        y_test = y[i*batch_size: (i+1)*batch_size]
        X_train = np.concatenate((X[0: i*batch_size], X[(i+1)*batch_size: len(X)]))
        y_train = np.concatenate((y[0: i*batch_size], y[(i+1)*batch_size: len(y)]))

        w = least_squares(X_train, y_train)
        for i in range(len(X_test)):
            result = predict(X_test[i], w, classifier)
            if result != y_test[i]:
                error = error + 1
        error_rate = error/batch_size
        error_arr.append(error_rate)
    
    print ("Error rate of each iteration: " + str(error_arr))
    print ("Average error rate:" + str(np.average(error_arr)))

In [13]:
def compute_pca(data):
    pca = PCA()
    pca_data = pca.fit_transform(data)
    return pca_data

In [78]:
class_1 = 1
class_2 = -1

data_rest, label_rest = load_data(class_1)
data_rest, label_rest = binary_all_channel(data_rest, label_rest, 1)

data_4, label_4 = load_data(class_2)
data_4, label_4 = binary_all_channel(data_4, label_4, -1)

X = np.concatenate((data_rest, data_4))
y = np.concatenate((label_rest, label_4))

X_normalized = sklearn.preprocessing.normalize(X, norm='l2')
X_pca = compute_pca(X_normalized)

all_data, all_label = shuffle(X_pca, y)

  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


In [79]:
cross_val(all_data, all_label, 100, "binary")

Error rate of each iteration: [0.04, 0.03, 0.05, 0.07, 0.09, 0.05, 0.01, 0.05, 0.08, 0.05, 0.06, 0.09, 0.02]
Average error rate:0.053076923076923084


In [80]:
# Classify rest and a number n (0-9)
for i in range(10):
    data_rest, label_rest = load_data(-1)
    data_rest, label_rest = binary_all_channel(data_rest, label_rest, 1)

    data_num, label_num = load_data(i)
    data_num, label_num = binary_all_channel(data_num, label_num, -1)


    X = np.concatenate((data_rest, data_num))
    y = np.concatenate((label_rest, label_num))

    X_normalized = sklearn.preprocessing.normalize(X, norm='l2')
    X_pca = compute_pca(X_normalized)

    all_data, all_label = shuffle(X_pca, y)
    
    print("Digit" + str(i))
    cross_val(all_data, all_label, 100, "binary")

  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


Digit0
Error rate of each iteration: [0.0, 0.0, 0.02, 0.01, 0.01, 0.04, 0.02, 0.05, 0.06, 0.06, 0.03, 0.03]
Average error rate:0.027500000000000007


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


Digit1
Error rate of each iteration: [0.03, 0.0, 0.01, 0.03, 0.03, 0.01, 0.04, 0.05, 0.04, 0.0, 0.02, 0.02, 0.03]
Average error rate:0.02384615384615385


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


Digit2
Error rate of each iteration: [0.01, 0.0, 0.02, 0.03, 0.02, 0.07, 0.02, 0.05, 0.04, 0.04, 0.03, 0.03]
Average error rate:0.03


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


Digit3
Error rate of each iteration: [0.03, 0.06, 0.04, 0.03, 0.05, 0.03, 0.05, 0.02, 0.06, 0.02, 0.01, 0.02]
Average error rate:0.03500000000000001


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


Digit4
Error rate of each iteration: [0.03, 0.02, 0.03, 0.04, 0.01, 0.02, 0.01, 0.02, 0.0, 0.05, 0.05, 0.02]
Average error rate:0.024999999999999998


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


Digit5
Error rate of each iteration: [0.06, 0.01, 0.05, 0.03, 0.03, 0.03, 0.01, 0.02, 0.0, 0.0, 0.03, 0.03, 0.06]
Average error rate:0.027692307692307697


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


Digit6
Error rate of each iteration: [0.03, 0.05, 0.06, 0.02, 0.05, 0.03, 0.04, 0.07, 0.1, 0.01, 0.05, 0.08, 0.1]
Average error rate:0.05307692307692307


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


Digit7
Error rate of each iteration: [0.03, 0.04, 0.05, 0.06, 0.01, 0.01, 0.04, 0.05, 0.0, 0.01, 0.08, 0.01]
Average error rate:0.0325


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


Digit8
Error rate of each iteration: [0.02, 0.02, 0.04, 0.01, 0.02, 0.03, 0.06, 0.03, 0.01, 0.03, 0.03, 0.01]
Average error rate:0.025833333333333337


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


Digit9
Error rate of each iteration: [0.03, 0.05, 0.03, 0.01, 0.02, 0.03, 0.02, 0.03, 0.03, 0.02, 0.02, 0.02]
Average error rate:0.025833333333333337


In [145]:
# Classify multiple class (0-9 and rest)

X, y = load_data(-1)
X, y = multiclass_all_channel(X, y, -1)

for i in range(0, 9):
    data_num, label_num = load_data(i)
    data_num, label_num = multiclass_all_channel(data_num, label_num, i)
    
    X = np.concatenate((X, data_num))
    y = np.concatenate((y, label_num))
    
X_normalized = sklearn.preprocessing.normalize(X, norm='l2')
X_pca = compute_pca(X_normalized)

all_data, all_label = shuffle(X_pca, y)
cross_val(all_data, all_label, 1000, "multiclass")

  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


Error rate of each iteration: [0.889, 0.907, 0.892, 0.872, 0.908, 0.897, 0.885, 0.906, 0.88, 0.892]
Average error rate:0.8928


In [143]:
len(X)

11167

In [107]:
len([i for i in y if i[0] == -1])

159