In [None]:
import scipy.io
import numpy as np
import itertools
from scipy.fftpack import fft, fftfreq
import matplotlib.pyplot as plt
from scipy.signal import butter, lfilter
import math
from scipy import stats
import csv
from sklearn import svm
from sklearn.metrics import confusion_matrix

In [None]:
## import dataset
mat = scipy.io.loadmat('SSVEPDataset.mat')
data = mat['subject'][0]
number_of_subjects = len(data)
number_of_conditions = len(data[0])
number_of_samplings = len(data[0][0])
print "Data includes", number_of_subjects, "subjects :"
print "(", number_of_conditions, "conditions per subject )"
print "(", number_of_samplings, "samplings per condition )"
#print data

In [None]:
def get_data(data, condition_id):

    #get data
    data_selected = np.zeros((number_of_subjects, number_of_samplings-number_of_filter_out_samplings))
    for i, d in enumerate(data):
        #one loop is one subject
        join_list = list(itertools.chain.from_iterable(d[condition_id-1]))

        #bandpass filter
        nyq = 0.5 * number_of_samplings_per_sec
        low = 7 / nyq
        high = 8 / nyq
        order = 2
        b, a = butter(order, [low, high], btype='band')
        f = lfilter(b, a, join_list)

        #filter out first-nine second
        data_selected[i] = f[number_of_filter_out_samplings:]

    print "Select data from condition #", condition_id
    print "Size of data is", len(data_selected), "subjects with", len(data_selected[0]), "samplings per subject."
    
    return data_selected

In [None]:
def perform_fft(data_selected):

    #FFT
    for i, d in enumerate(data_selected):
        print "==== FFT with subjects #", i, "===="
        for index in range(0, number_of_slide_windows):
            #one loop per window
#             print "From second #", index, "to", index+window_size-1,"( sampling no.", \
#                     index*number_of_samplings_per_sec, "to", (index + window_size) * number_of_samplings_per_sec - 1, ")"

            #FFT
            fft_out = fft(d[index*number_of_samplings_per_sec : (index + window_size) * number_of_samplings_per_sec])

            freqs = fftfreq(len(fft_out)) * number_of_samplings_per_sec

            #Get maximum magnitude value from window_size freq
            if window_size == 5:
                fix_freq = 7.6
            elif window_size == 6:
                fix_freq = 7.5
            elif window_size == 8:
                #still cannot find fix freq
                #peak is around 7.56-7.58
                #but after find from 7.5 to 7.8 (with scale + 0.00001), I still cannot find it
                fix_freq = 7.5
            elif window_size == 10:
                fix_freq = 7.6

            fft_out_max_list[i][index] = np.abs(fft_out)[np.where(freqs==fix_freq)]

            if index == number_of_slide_windows - 1:
                #plot FFT of some specific window
                fig, ax = plt.subplots()
                ax.plot(freqs, np.abs(fft_out))
                ax.set_xlabel('Frequency in Hertz [Hz]')
                ax.set_ylabel('Frequency Domain (Spectrum) Magnitude')
                ax.set_xlim(1, 15)
                ax.set_ylim(1, 2500)
                plt.grid()
                plt.show() 

        #z-score normalization
        fft_out_max_list[i] = stats.zscore(fft_out_max_list[i])  

        plt.plot(fft_out_max_list[i], 'ro')
        plt.xlabel('Window no.')
        plt.ylabel('Max Spectrum Magnitude')
        plt.grid()
        plt.show()

    return fft_out_max_list

In [None]:
def fit_curve(x, y, degree):
    coefs = []
    
    if len(x) == len(y[0]):
        for i in range(number_of_subjects):
            coefs.append(np.polyfit(x, y[i], degree))
    else:
        for i in range(number_of_subjects):
            coefs.append(np.polyfit(x[i], y[i], degree))
        
    print 'Curve fitting done!'
    return coefs   

In [None]:
def get_div(x, coef):
    ffit = np.poly1d(coef)
    div = ffit.deriv()
    return div(x)

In [None]:
def get_features(coef):
    features = np.zeros(shape = (number_of_subjects,number_of_selected_windows))
    for i in range(number_of_subjects):
        #print 'subject', i
        for j in range(number_of_selected_windows):
            #print 'feature#', j, get_div(j+1, coef[i])
            features[i][j] = get_div(j+1, coef[i])
    print 'get', len(features[0]),'features per each subject in this condition.'
    return features

In [None]:
def plot_curve(coef, fft_out, cond):
    for i, c in enumerate(coef):
        ffit = np.poly1d(c)
        ys = []
        for x in range(1,number_of_slide_windows+1):
            ys.append(ffit(x))
        
        fig = plt.figure()
        ax1 = fig.add_subplot(111)
        ax1.scatter( range(1,number_of_slide_windows+1), ys, s=10, c='b', marker="s", label='curve fitting')
        ax1.scatter( range(1,number_of_slide_windows+1), fft_out[i], s=10, c='r', marker="o", label='actual data')
        plt.title('Subject#' + str(i) + ', Condition#' + str(cond))
        plt.legend(loc='upper left');
        plt.show()

In [None]:
def transform_features(features):
    transformed_x = np.zeros(shape = (number_of_subjects*number_of_selected_conditions, number_of_selected_windows))
    y = np.empty(shape = (number_of_subjects*number_of_selected_conditions,1))
    
    for i in range(len(features)):
        #print 'condition', i
        for j in range(len(features[i])):
            #print 'subject', j, 'index', number_of_subjects*i + j, 'feature', features[i][j]
            transformed_x[number_of_subjects*i + j] = features[i][j]
            y[number_of_subjects*i + j] = i
    
    return transformed_x, y

In [None]:
def train_test_split(x, y, test_size = 1, r=0):

    print "Subject #", r, "is test set."
    print 'test :', r, r + number_of_subjects, r + number_of_subjects*2
    
    x_tr = np.zeros(shape = ((number_of_subjects-test_size)*number_of_selected_conditions, number_of_selected_windows))
    y_tr = np.zeros(shape = ((number_of_subjects-test_size)*number_of_selected_conditions, 1))
    x_te = np.zeros(shape = (number_of_selected_conditions, number_of_selected_windows))
    y_te = np.zeros(shape = (number_of_selected_conditions, 1))
    
    cnt_tr = 0
    cnt_te = 0
    for i in range(len(x)):
        if i in [r, r + number_of_subjects, r + number_of_subjects*2]:
            x_te[cnt_te] = x[i]
            y_te[cnt_te] = y[i]
            cnt_te += 1
        else:
            x_tr[cnt_tr] = x[i]
            y_tr[cnt_tr] = y[i]
            cnt_tr += 1
    
    y_tr = y_tr.reshape(1, -1)[0]
    y_te = y_te.reshape(1, -1)[0]
    
    return x_tr, \
            x_te, \
            y_tr, \
            y_te, \
            r

In [None]:
## perform SVM
#setting parameters
number_of_cv_rounds = number_of_subjects
test_size = 1

all_correct = 0
all_incorrect = 0
x, y = transform_features(features)
for i in range(0, number_of_cv_rounds):
    X_train, X_test, y_train, y_test, subject_id = train_test_split(x = x, y = y, \
                                                            test_size=test_size, r = i)
    print 'y_test', y_test
    clf = svm.SVC()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    
    correct = 0
    incorrect = 0
    for y_t, y_p in itertools.izip(y_test, y_pred):
        if y_t == np.argmax(y_p):
            correct = correct+1
            all_correct = all_correct+1
        else:
            incorrect = incorrect+1
            all_incorrect = all_incorrect+1
            
    print 'y_pred', y_pred
    print 'correct:', correct
    print 'incorrect:', incorrect
    print
    
print 'All correct:', all_correct
print 'All incorrect:', all_incorrect

In [None]:
## perform SVM
#setting parameters
number_of_cv_rounds = number_of_subjects
test_size = 1

all_correct = 0
all_incorrect = 0
x, y = transform_features(features)
for i in range(0, number_of_cv_rounds):
    X_train, X_test, y_train, y_test, subject_id = train_test_split(x = x, y = y, \
                                                            test_size=test_size, r = i)
    print 'y_test', y_test
    clf = svm.LinearSVC()
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    
    correct = 0
    incorrect = 0
    for y_t, y_p in itertools.izip(y_test, y_pred):
        if y_t == np.argmax(y_p):
            correct = correct+1
            all_correct = all_correct+1
        else:
            incorrect = incorrect+1
            all_incorrect = all_incorrect+1
            
    print 'y_pred', y_pred
    print 'correct:', correct
    print 'incorrect:', incorrect
    print
    
print 'All correct:', all_correct
print 'All incorrect:', all_incorrect