<a href="https://colab.research.google.com/github/YintongMa/EEGDataMining/blob/feat%2Fkernel-svm/%E2%80%9Csvm_ipynb%E2%80%9D%E7%9A%84%E5%89%AF%E6%9C%AC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
from functools import lru_cache
from sklearn.svm import SVC


class svm():

    def __init__(self,
                 kernel="rbf", lmd=1e-1, gamma=0.1, bias=1.0, max_iter=100):
        if kernel not in self.__kernel_dict:
            print(kernel + " kernel does not exist!\nUse rbf kernel.")
            kernel = "rbf"
        if kernel == "rbf":
            def kernel_func(x, y):
                return self.__kernel_dict[kernel](x, y, gamma=gamma)
        else:
            kernel_func = self.__kernel_dict[kernel]
        self.kernel = kernel_func
        self.lmd = lmd
        self.bias = bias
        self.max_iter = max_iter

    def __linear_kernel(x, y):
        return np.dot(x, y)

    def __gaussian_kernel(x, y, gamma):
        diff = x - y
        return np.exp(-gamma * np.dot(diff, diff))

    __kernel_dict = {"linear": __linear_kernel, "rbf": __gaussian_kernel}

    def fit(self, X, y):
        def update_alpha(alpha, t):
            data_size, feature_size = np.shape(self.X_with_bias)
            new_alpha = np.copy(alpha)
            it = np.random.randint(low=0, high=data_size)
            x_it = self.X_with_bias[it]
            y_it = self.y[it]

            # alpha[k] = alpha[k] + eta[k] * (1 - myData.loc[k, 2] * sum(alpha * myData.loc[:, 2] * K[:, k]))
            if (y_it * (1. / (self.lmd * t)) * sum([alpha_j * y_it * self.kernel(x_it, x_j) for x_j, alpha_j in zip(self.X_with_bias, alpha)])) < 1.:
                new_alpha[it] += 1
            return new_alpha

        self.X_with_bias = np.c_[X, np.ones((np.shape(X)[0])) * self.bias]
        self.y = y
        alpha = np.zeros((np.shape(self.X_with_bias)[0], 1))

        for t in range(1, self.max_iter + 1):
            alpha = update_alpha(alpha, t)
        self.alpha = alpha
        return alpha


    def predict(self,X):
        X_with_bias = np.c_[X, np.ones((np.shape(X)[0])) * self.bias]

        y_score = []

        for x in X_with_bias:
            i = 0
            for (x_j, y_j, alpha_j) in zip(self.X_with_bias, self.y, self.alpha):
                i += alpha_j * y_j * self.kernel(x_j, x)
            y_score.append((1. / (self.lmd * self.max_iter)) * i)

        y_predict = []
        for s in y_score:
            if s >= 0.:
                y_predict.append(1)
            else:
                y_predict.append(-1)
        return y_predict






In [2]:
import numpy as np
import pandas as pd
import sklearn

from sklearn.decomposition import PCA
from sklearn.utils import shuffle

In [3]:
# Load data with specific id number
def load_data(id_num):
    data = np.load("/content/eeg_data.npz")
    X = data['x']
    y = data['y']
    
    index = [i for i in range(len(y)) if y[i] == id_num]
    
    output_data = []
    output_label = []
    
    for i in index:
        output_data.append(X[i])
        output_label.append(y[i])
        
    return output_data, output_label

In [4]:
# Compare seeing one number with rest 
def binary_all_channel(data, label, id_num):
    if len(data) != len(label):
        print("Something is wrong here")
        return
    
    output_data = []
    output_label = []
    
    for i in range(len(label)):
        if label[i] != id_num and label[i] != -1:
            print("Something is wrong here")
            break
        if label[i] != -1:
            output_label.append([1])
        else:
            output_label.append([-1])
        
        feature = np.concatenate(data[i])
        feature = np.nan_to_num(feature)
        output_data.append(feature)
        
    return output_data, output_label    

In [5]:
def predict(X, w, mode):
    raw_val = X.transpose() @ w
    
    if mode == "binary":
        if raw_val >= 0:
            return 1
        if raw_val < 0:
            return -1
    if mode == "multiclass":
        return round(raw_val[0])

In [36]:
def cross_val(X, y, batch_size):
    error_arr = []
    subset_num = int(len(X)/batch_size)-1
    for i in range(subset_num):
        print("batch: " + str(i))
        error = 0
        X_test = X[i*batch_size: (i+1)*batch_size]
        y_test = y[i*batch_size: (i+1)*batch_size]
        X_train = np.concatenate((X[0: i*batch_size], X[(i+1)*batch_size: len(X)]))
        y_train = np.concatenate((y[0: i*batch_size], y[(i+1)*batch_size: len(y)]))

        print('running svm')
        cf = svm(kernel="linear")
        cf.fit(X_train, y_train)

        print('predicting')
        result = cf.predict((X_test))
        for j in range(len(result)):
            if result[j] != y_test[j]:
                error = error + 1
        error_rate = error/batch_size
        error_arr.append(error_rate)

        print("error rate is " + str(error_rate))
        print()
    
    print ("Error rate of each iteration: " + str(error_arr))
    print ("Average error rate:" + str(np.average(error_arr)))



In [19]:
def compute_pca(data):
    pca = PCA()
    pca_data = pca.fit_transform(data)
    return pca_data

In [32]:
data = np.load("/content/eeg_data.npz")
data_x = data['x']
data_y = data['y']

X = []
y = []

for i in range(len(data_y)):
    if data_y[i] != -1:
        y.append(1)
    else:
        y.append(-1)

for i in data_x:
  X.append(np.nan_to_num(i.flatten()))



X_normalized = sklearn.preprocessing.normalize(X, norm='l2')

X_pca = compute_pca(X_normalized)
#X_pca = X_normalized

all_data, all_label = shuffle(X_pca, y)


  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)


(12401, 350)
(12401,)


In [38]:
cross_val(all_data, all_label, 1240)


batch: 0
running svm
predicting
error rate is 0.016935483870967744

batch: 1
running svm
predicting
error rate is 0.016129032258064516

batch: 2
running svm
predicting
error rate is 0.018548387096774192

batch: 3
running svm
predicting
error rate is 0.013709677419354839

batch: 4
running svm
predicting
error rate is 0.007258064516129033

batch: 5
running svm
predicting
error rate is 0.00967741935483871

batch: 6
running svm
predicting
error rate is 0.010483870967741936

batch: 7
running svm
predicting
error rate is 0.013709677419354839

batch: 8
running svm
predicting
error rate is 0.008870967741935484

Error rate of each iteration: [0.016935483870967744, 0.016129032258064516, 0.018548387096774192, 0.013709677419354839, 0.007258064516129033, 0.00967741935483871, 0.010483870967741936, 0.013709677419354839, 0.008870967741935484]
Average error rate:0.012813620071684587


In [39]:
def cross_val_rbf(X, y, batch_size):
    error_arr = []
    subset_num = int(len(X)/batch_size)-1
    for i in range(subset_num):
        print("batch: " + str(i))
        error = 0
        X_test = X[i*batch_size: (i+1)*batch_size]
        y_test = y[i*batch_size: (i+1)*batch_size]
        X_train = np.concatenate((X[0: i*batch_size], X[(i+1)*batch_size: len(X)]))
        y_train = np.concatenate((y[0: i*batch_size], y[(i+1)*batch_size: len(y)]))

        print('running svm')
        cf = svm(kernel="rbf")
        cf.fit(X_train, y_train)

        print('predicting')
        result = cf.predict((X_test))
        for j in range(len(result)):
            if result[j] != y_test[j]:
                error = error + 1
        error_rate = error/batch_size
        error_arr.append(error_rate)

        print("error rate is " + str(error_rate))
        print()
    
    print ("Error rate of each iteration: " + str(error_arr))
    print ("Average error rate:" + str(np.average(error_arr)))


In [40]:
cross_val_rbf(all_data, all_label, 1240)

batch: 0
running svm
predicting
error rate is 0.016935483870967744

batch: 1
running svm
predicting
error rate is 0.016129032258064516

batch: 2
running svm
predicting
error rate is 0.018548387096774192

batch: 3
running svm
predicting
error rate is 0.013709677419354839

batch: 4
running svm
predicting
error rate is 0.007258064516129033

batch: 5
running svm
predicting
error rate is 0.00967741935483871

batch: 6
running svm
predicting
error rate is 0.010483870967741936

batch: 7
running svm
predicting
error rate is 0.013709677419354839

batch: 8
running svm
predicting
error rate is 0.008870967741935484

Error rate of each iteration: [0.016935483870967744, 0.016129032258064516, 0.018548387096774192, 0.013709677419354839, 0.007258064516129033, 0.00967741935483871, 0.010483870967741936, 0.013709677419354839, 0.008870967741935484]
Average error rate:0.012813620071684587


In [37]:
data = np.load("eeg_data.npz")
X = data['x']
y = data['y']
print(y.shape)
ds = []
for i in X:
    ds.append(np.nan_to_num(i.flatten()))

print(np.array(ds).shape)

(12401,)
(12401, 350)


# 新段落