# Notebook with SVM code

In [8]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from sklearn.svm import SVC
import numpy as np
from scipy import interp
import matplotlib.pyplot as plt
from itertools import cycle
from sklearn import svm, datasets
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_curve, auc
from sklearn.model_selection import StratifiedKFold
import os
import sys
import pandas as pd
import scipy.io as sio


sys.path.append('../')  # go to parent dir
from mrcode.preprocessing import experiment_data

In [11]:
X, y = experiment_data.load_data_svm(load_mode='raw',target='animate')

X_train shape: (10350, 3520)
y_train shape: (10350,)
[0 0 0 ... 1 1 1]


### SVM RBF kernel

In [None]:
cv = list(range(0,len(y),690))
random_state = np.random.RandomState(0)
classifier = svm.SVC(random_state=random_state)
df = pd.DataFrame(columns=['scores_train', 'scores_test'])
for count, ii in enumerate(cv):
    test = list(range(ii, ii+690))
    train = np.delete(list(range(0, len(y))), test, 0)
    clf = classifier.fit(X[train], y[train])
    scores_train = clf.score(X[train], y[train])
    scores_test = clf.score(X[test], y[test])
    df.loc[count]=[scores_train, scores_test]
    print(str(count))

In [None]:
df.describe()

In [None]:
df.to_csv('../data/svm_training_leave_one_subject_out_rbf_kernel.csv')

###  SVM RBF - gamma parameter

In [4]:
X, y = experiment_data.load_data_svm(load_mode='std_overlap',target='animate')
# Currently loading data arranged in animate, animate.... inanimate order (calling load_data_generator_animate)

X_train shape: (10350, 1280)
y_train shape: (10350,)
[0 0 0 ... 1 1 1]


In [7]:
cv = list(range(0,len(y),690))
random_state = np.random.RandomState(0)
classifier = svm.SVC(random_state=random_state,gamma=0.0004)
df = pd.DataFrame(columns=['scores_train', 'scores_test'])
for count, ii in enumerate(cv):
    test = list(range(ii, ii+690))
    train = np.delete(list(range(0, len(y))), test, 0)
    clf = classifier.fit(X[train], y[train])
    scores_train = clf.score(X[train], y[train])
    scores_test = clf.score(X[test], y[test])
    df.loc[count]=[scores_train, scores_test]
    print(str(count))

0


KeyboardInterrupt: 

In [8]:
print(df)

   scores_train  scores_test
0      0.625052     0.547826


### Importing ASR data 

In [4]:
print(os.getcwd())

os.chdir('C:\\Users\\Greta\\Documents\\GitHub\\Project-MindReading\\\data\\ASR')
print(os.getcwd())


C:\Users\Greta\Documents\GitHub\Project-MindReading\mrcode
C:\Users\Greta\Documents\GitHub\Project-MindReading\data\ASR


In [5]:
ASR = sio.loadmat('ASRfile')
A = ASR['A'] #Extracting the values


In [6]:
np.shape(A) #new EEG data. y (categories) is still the same: sorted in the same way

(10350, 1600)

10350

###  ASR SVM RBF 

In [12]:
cv = list(range(0,len(y),690))
gamma_val = (1/400)**2
random_state = np.random.RandomState(0)
classifier = svm.SVC(random_state=random_state,gamma=gamma_val)
df = pd.DataFrame(columns=['scores_train', 'scores_test'])
for count, ii in enumerate(cv):
    test = list(range(ii, ii+690))
    train = np.delete(list(range(0, len(y))), test, 0)
    clf = classifier.fit(A[train], y[train])
    scores_train = clf.score(A[train], y[train])
    scores_test = clf.score(A[test], y[test])
    df.loc[count]=[scores_train, scores_test]
    print(str(count))

0
1


KeyboardInterrupt: 

In [24]:
gamma_val = (1/400)**2
print(gamma_val)

6.25e-06


In [13]:
print(df)

   scores_train  scores_test
0      0.677847     0.584058
1      0.671843     0.595652


# Cross validation - RBF kernel 

In [None]:
cv = list(range(0,len(y),690))
random_state = np.random.RandomState(0) # Why? 
df = pd.DataFrame(columns=['Subject no.', 'C value', 'scores_train', 'scores_test'])

C_range = [0.001, 0.01, 0.1, 1, 10]
# C_range = [1,1]
# cv2 = [0,1]

count = 0
for counter, ii in enumerate(cv):
    for C in C_range:
        classifier = svm.SVC(C=C, random_state=random_state)
        test = list(range(ii, ii+690))
        train = np.delete(list(range(0, len(y))), test, 0)
        clf = classifier.fit(X[train], y[train])
        scores_train = clf.score(X[train], y[train])
        scores_test = clf.score(X[test], y[test])
        df.loc[count]=[counter+1, C, scores_train, scores_test]
        count += 1
        print(str(count))

In [None]:
print(df)

### SVM - linear kernel

In [None]:
cv = list(range(0,len(y),690))
random_state = np.random.RandomState(0)
classifier = svm.SVC(kernel='linear', random_state=random_state)
df = pd.DataFrame(columns=['scores_train', 'scores_test'])
for count, ii in enumerate(cv):
    test = list(range(ii, ii+690))
    train = np.delete(list(range(0, len(y))), test, 0)
    clf = classifier.fit(X[train], y[train])
    scores_train = clf.score(X[train], y[train])
    scores_test = clf.score(X[test], y[test])
    df.loc[count]=[scores_train, scores_test]
    print(str(count))

In [None]:
df.describe()

In [None]:
df.to_csv('../data/svm_training_leave_one_subject_out_linear_kernel.csv')

## Multiclass SVM

In [None]:
X, y = experiment_data.load_data_svm(load_mode='raw_one_feature', target = 'category') # need to load data as categories

In [None]:
# When loading with target = category, it errors with:
# Found array with 0 sample(s) (shape=(0, 17600)) while a minimum of 1 is required.

In [None]:
cv = list(range(0,len(y[0]),690))
random_state = np.random.RandomState(0) # Why? 
df = pd.DataFrame(columns=['Subject no.', 'C value', 'scores_train', 'scores_test'])

C_range = [0.001, 0.01, 0.1, 1, 10]
# C_range = [1]
# cv2 = [0]

for counter, ii in enumerate(cv):
    for count, C in enumerate(C_range):
        classifier = svm.LinearSVC(C=C, random_state=random_state, multi_class='ovr')
        test = list(range(ii, ii+690))
        train = np.delete(list(range(0, len(y[0]))), test, 0)
        clf = classifier.fit(X[train], y[train])
        scores_train = clf.score(X[train], y[train])
        scores_test = clf.score(X[test], y[test])
        df.loc[count]=[counter+1, C, scores_train, scores_test]
        print(str(count))

In [None]:
for counter, ii in enumerate(cv):
    classifier = svm.LinearSVC(random_state=0,multi_class='ovr')
    test = list(range(ii, ii+690))
    train = np.delete(list(range(0, len(y))), test, 0)
    clf = classifier.fit(X[train], y[train])
    scores_train = clf.score(X[train], y[train])
    scores_test = clf.score(X[test], y[test])
    df.loc[count]=[counter+1, C, scores_train, scores_test]
    print(str(count))

In [None]:
print(df)

## Take out categories - not working

In [2]:
X_train, y_train, X_test, y_test = experiment_data.load_data_svm_cat(target='category', load_mode='std_overlap')

  categories = list(sorted_image_info[target].as_matrix()) # 0 is inanimate, 1 is animate


X_train shape: (9900, 1280)
y_train shape: (9900,)
X_test shape: (450, 1280)
y_test shape: (450,)
categories: ['airplane', 'airplane', 'airplane', 'airplane', 'airplane', 'airplane', 'airplane', 'airplane', 'airplane', 'airplane', 'airplane', 'airplane', 'airplane', 'airplane', 'airplane', 'airplane', 'airplane', 'airplane', 'airplane', 'airplane', 'airplane', 'airplane', 'airplane', 'airplane', 'airplane', 'airplane', 'airplane', 'airplane', 'airplane', 'airplane', 'bear', 'bear', 'bear', 'bear', 'bear', 'bear', 'bear', 'bear', 'bear', 'bear', 'bear', 'bear', 'bear', 'bear', 'bear', 'bear', 'bear', 'bear', 'bear', 'bear', 'bear', 'bear', 'bear', 'bear', 'bear', 'bear', 'bear', 'bear', 'bear', 'bear', 'bed', 'bed', 'bed', 'bed', 'bed', 'bed', 'bed', 'bed', 'bed', 'bed', 'bed', 'bed', 'bed', 'bed', 'bed', 'bed', 'bed', 'bed', 'bed', 'bed', 'bed', 'bed', 'bed', 'bed', 'bed', 'bed', 'bed', 'bed', 'bed', 'bed', 'bench', 'bench', 'bench', 'bench', 'bench', 'bench', 'bench', 'bench', 'bench'

In [7]:
count = 1
classifier = LogisticRegression(random_state=0, solver='lbfgs', multi_class='multinomial')
df = pd.DataFrame(columns=['scores_train', 'scores_test'])

clf = classifier.fit(X_train, y_train)
scores_train = clf.score(X_train, y_train)
scores_test = clf.score(X_test, y_test)
df.loc[count]=[scores_train, scores_test]

# Bad input shape (9900, 23)


In [8]:
print(df)

   scores_train  scores_test
1      0.133434          0.0


In [9]:
count = 1
classifier = svm.LinearSVC(random_state=0,multi_class='ovr')
df = pd.DataFrame(columns=['scores_train', 'scores_test'])

clf = classifier.fit(X_train, y_train)
scores_train = clf.score(X_train, y_train)
scores_test = clf.score(X_test, y_test)
df.loc[count]=[scores_train, scores_test]



In [10]:
print(df)

   scores_train  scores_test
1      0.112727          0.0
