In [212]:
import sklearn
import warnings 
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.utils import shuffle

In [213]:
warnings.filterwarnings("ignore", category=RuntimeWarning)

In [214]:
# Load data with specific id number
def load_data(id_num, not_id=False):
    data = np.load("eeg_data.npz")
    X = data['x']
    y = data['y']
    
    if not_id:
        index = [i for i in range(len(y)) if y[i] != id_num]
    else:
        index = [i for i in range(len(y)) if y[i] == id_num]
    
    output_data = []
    output_label = []
    
    for i in index:
        output_data.append(X[i])
        output_label.append(y[i])
        
    return output_data, output_label

In [215]:
# Compare seeing one number with rest 
def binary_all_channel(data, label, assigned_y):
    if len(data) != len(label):
        print("Something is wrong here")
        return
    
    output_data = []
    output_label = []
    
    for i in range(len(label)):
        output_label.append([assigned_y])
  
        feature = np.concatenate(data[i])
        feature = np.nan_to_num(feature)
        output_data.append(feature)
        
    return output_data, output_label    

In [216]:
def binary_selected_channel(data, label, assigned_y, brain_region): 
    output_data = []
    output_label = []
    channel_list = ['AF3', 'F7', 'F3', 'FC5', 'T7', 'P7', 'O1', 'O2', 'P8', 'T8', 'FC6', 'F4', 'F8', 'AF4']
    
    if len(data) != len(label):
        print("Something is wrong here")
        return
    if brain_region == "frontal":
        index = [i for i in range(len(channel_list)) if 'F' in channel_list[i]]
    if brain_region == "temporal":
        index = [i for i in range(len(channel_list)) if 'T' in channel_list[i]]
    if brain_region == "parietal":
        index = [i for i in range(len(channel_list)) if 'P' in channel_list[i]]
    if brain_region == "occipital":
        index = [i for i in range(len(channel_list)) if 'O' in channel_list[i]]
    
    for i in range(len(label)):
        output_label.append([assigned_y])
        feature = data[i][index[0]]
        for k in index[1:]:
            feature = np.append(feature, data[i][k])
        feature = np.nan_to_num(feature)
        output_data.append(feature)
        
    return output_data, output_label  

In [217]:
def multiclass_all_channel(data, label, id_num):
    if len(data) != len(label):
        print("Something is wrong here")
        return
    
    output_data = []
    output_label = []
    
    for i in range(len(label)):
        if label[i] != id_num and label[i] != -1:
            print("Something is wrong here")
            break
        output_label.append([label[i]])
        feature = np.concatenate(data[i])
        feature = np.nan_to_num(feature)
        output_data.append(feature)
        
    return output_data, output_label    

In [218]:
def least_squares(X, y):
    w = np.linalg.inv((X.transpose() @ X)) @ (X.transpose() @ y)
    return w

In [219]:
def reg_least_squares(X, y, k):
    u, s, vh = np.linalg.svd(X, full_matrices=True)
    if k == -2:
        s_ridged = [i/(i**2+0) for i in s]
    else:
        s_ridged = [i/(i**2+2**k) for i in s]
    
    s_matrix_ridged = np.zeros((vh.shape[0], u.shape[0]), float)
    np.fill_diagonal(s_matrix_ridged, s_ridged)
    
    w = vh.transpose() @ s_matrix_ridged @ u.transpose() @ y
    return w

In [220]:
def predict(X, w, mode):
    raw_val = X.transpose() @ w
    
    if mode == "binary":
        if raw_val >= 0:
            return 1
        if raw_val < 0:
            return -1
    if mode == "multiclass":
        return round(raw_val[0])

In [231]:
def cross_val(X, y, batch_size, classifier):
    error_arr = []
    subset_num = int(len(X)/batch_size)-1
    for i in range(subset_num):
        error = 0
        X_test = X[i*batch_size: (i+1)*batch_size]
        y_test = y[i*batch_size: (i+1)*batch_size]
        X_train = np.concatenate((X[0: i*batch_size], X[(i+1)*batch_size: len(X)]))
        y_train = np.concatenate((y[0: i*batch_size], y[(i+1)*batch_size: len(y)]))

        w = least_squares(X_train, y_train)
        for i in range(len(X_test)):
            result = predict(X_test[i], w, classifier)
            if result != y_test[i]:
                error = error + 1
        error_rate = error/batch_size
        error_arr.append(error_rate)
    
#     print ("Error rate of each iteration: " + str(error_arr))
    print ("Average error rate:" + str(np.average(error_arr)))

In [232]:
def compute_pca(data):
    pca = PCA()
    pca_data = pca.fit_transform(data)
    return pca_data

In [305]:
# Classify rest and seeing any number
# channel = ["all", "frontal", "temporal", "parietal", "occipital"]
def rest_verses_all_number(channel):
    class_1 = -1
    class_2 = -1

    data_1, label_1= load_data(class_1)
    if channel == "all":
        data_1, label_1 = binary_all_channel(data_1, label_1, 1)
    else: 
        data_1, label_1 = binary_selected_channel(data_1, label_1, 1, channel) 
            
    data_2, label_2 = load_data(class_2, not_id=True)
    if channel == "all":
        data_2, label_2 = binary_all_channel(data_2, label_2, -1)
    else:
        data_2, label_2 = binary_selected_channel(data_2, label_2, -1, channel)

    X = np.concatenate((data_1, data_2))
    y = np.concatenate((label_1, label_2))

    X_normalized = sklearn.preprocessing.normalize(X, norm='l2')
    X_pca = compute_pca(X_normalized)

    all_data, all_label = shuffle(X_pca, y)
    
    print("Digit 0 ~ 9 verses rest")
    cross_val(all_data, all_label, int(len(all_data)/10), "binary")

In [306]:
rest_verses_all_number("all")

Digit 0 ~ 9 verses rest
Average error rate:0.011827956989247311


In [307]:
# Classify rest and a number n (0-9)
def rest_verses_single_number(channel):
    for i in range(10):
        data_rest, label_rest = load_data(-1)
        if channel == "all":
            data_rest, label_rest = binary_all_channel(data_rest, label_rest, 1)
        else:
            data_rest, label_rest = binary_selected_channel(data_rest, label_rest, 1, channel) 

        data_num, label_num = load_data(i)
        
        if channel == "all":
            data_num, label_num = binary_all_channel(data_num, label_num, -1)
        else:
             data_num, label_num = binary_selected_channel(data_num, label_num, -1, channel) 

        X = np.concatenate((data_rest, data_num))
        y = np.concatenate((label_rest, label_num))

        X_normalized = sklearn.preprocessing.normalize(X, norm='l2')
        X_pca = compute_pca(X_normalized)

        all_data, all_label = shuffle(X_pca, y)
        print(len(all_data))

        print("Digit " + str(i) + " verses rest")
        cross_val(all_data, all_label, int(len(all_data)/10), "binary")

In [308]:
# Break down by brain regin binary
for i in ["frontal", "temporal", "parietal", "occipital"]:
    print(i)
    rest_verses_all_number(i)
    rest_verses_single_number(i)

frontal
Digit 0 ~ 9 verses rest
Average error rate:0.012724014336917562
1393
Digit 0 verses rest
Average error rate:0.019984012789768187
1405
Digit 1 verses rest
Average error rate:0.0253968253968254
1360
Digit 2 verses rest
Average error rate:0.026143790849673203
1367
Digit 3 verses rest
Average error rate:0.03104575163398693
1350
Digit 4 verses rest
Average error rate:0.025514403292181072
1412
Digit 5 verses rest
Average error rate:0.027580772261623324
1427
Digit 6 verses rest
Average error rate:0.03208137715179968
1332
Digit 7 verses rest
Average error rate:0.03341687552213868
1393
Digit 8 verses rest
Average error rate:0.027178257394084734
1393
Digit 9 verses rest
Average error rate:0.023181454836131096
temporal
Digit 0 ~ 9 verses rest
Average error rate:0.013351254480286737
1393
Digit 0 verses rest
Average error rate:0.11191047162270183
1405
Digit 1 verses rest
Average error rate:0.10555555555555556
1360
Digit 2 verses rest
Average error rate:0.1315359477124183
1367
Digit 3 verses

In [226]:
# Classify multiple class (0-9 and rest)
X, y = load_data(-1)
X, y = multiclass_all_channel(X, y, -1)

for i in range(0, 9):
    data_num, label_num = load_data(i)
    data_num, label_num = multiclass_all_channel(data_num, label_num, i)
    
    X = np.concatenate((X, data_num))
    y = np.concatenate((y, label_num))
    
X_normalized = sklearn.preprocessing.normalize(X, norm='l2')
X_pca = compute_pca(X_normalized)

all_data, all_label = shuffle(X_pca, y)
cross_val(all_data, all_label, 1000, "multiclass")

Error rate of each iteration: [0.863, 0.883, 0.894, 0.889, 0.898, 0.901, 0.885, 0.908, 0.904, 0.906]
Average error rate:0.8931000000000001


In [227]:
# Breakdown by brain regions
