In [1]:
import pygad
import scipy.io as sio
import numpy as np
import matplotlib.pyplot as plt

from sklearn.svm import LinearSVC, SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

from mlconfound.stats import partial_confound_test
from mlconfound.plot import plot_null_dist, plot_graph

In [21]:
# DATA_ALL = sio.loadmat("../data/subjects_40_vowels_v6.mat")
DATA_ALL = sio.loadmat("../data/subjects_40_sen_fix_win1.0.mat")

In [22]:
print(DATA_ALL.keys())

dict_keys(['__header__', '__version__', '__globals__', 'DATA', 'FEAT', 'FEAT_N', 'LABEL', 'LABEL_SEN', 'SUBJECT_ID', 'SUBJECT_SKINFOLD', 'SUBJECT_VFI'])


In [6]:
FEAT_N           = DATA_ALL['FEAT_N']            # Normalized features
LABEL            = DATA_ALL['LABEL']             # Labels
LABEL_VOWELS     = DATA_ALL['LABEL_VOWEL']
VFI_1            = DATA_ALL['SUBJECT_VFI']
SUBJECT_ID       = DATA_ALL['SUBJECT_ID']        # Sujbect ID
SUBJECT_SKINFOLD = DATA_ALL['SUBJECT_SKINFOLD']

In [17]:
idx = LABEL_VOWELS[0][0].flatten() == 1

In [20]:
X = FEAT_N[0,0][idx,:]
print(X.shape)
Y = LABEL[0,0].flatten()[idx]
print(Y.shape)
print(np.size(Y))

(55, 48)
(55,)
55


In [None]:
leftout = 1
testing_acc  = np.zeros(40)
valid_acc    = np.zeros(40)
training_acc = np.zeros(40)
p_value      = np.zeros(40)

sub_index = [1]
for sub_test in range(40):
    print('\n===Exp No. %d===\n'%(sub_test+1))
    
    sub_txt = "R%03d"%(int(SUBJECT_ID[sub_test][0][0]))
    print('Test Subject %s:'%(sub_txt))
    print('VFI-1:', (VFI_1[sub_test][0][0]))
    if int(VFI_1[sub_test][0][0]) > 10:
        sub_group = 'Fatigued'
    else:
        sub_group = 'Healthy'

    # ===== Load Testing Signals =====
    num_signal = np.shape(FEAT_N[sub_test,0])[0]    
    X_Temp = FEAT_N[sub_test,0]
    Y_Temp = LABEL[sub_test,0].flatten()

    num_leftout = round(leftout*num_signal)
    index_leftout = np.random.choice(range(num_signal), size=num_leftout, replace=False)
    print("Left-out Test samples: ", index_leftout.size)

    X_Test = X_Temp[index_leftout,:]
    Y_Test = Y_Temp[index_leftout]

    index_include = np.arange(num_signal)
    index_include = np.delete(index_include, index_leftout)
    print("Included Training samples: ", index_include.size)
    X_include = X_Temp[index_include,:]
    Y_include = Y_Temp[index_include]


    # ===== Load Traing Signals =====
    X_TV = np.zeros((0,48))
    Y_TV = np.zeros(0)    
    C_TV = np.zeros(0)
    for sub_train in range(40):
        if sub_train != sub_test:
            x_s = FEAT_N[sub_train,0]
            y_s = LABEL[sub_train,0].flatten()
            c_s = np.mean(np.mean(SUBJECT_SKINFOLD[sub_train,:]), axis=1)
            # ===== CAN BE CONVERTED INTO A FUNCTION =====
            X_TV = np.concatenate((X_TV, x_s), axis=0)
            Y_TV = np.concatenate((Y_TV, y_s), axis=0)
            C_TV = np.concatenate((C_TV, c_s), axis=0)       

    print('# of Healthy Samples: %d'%(np.sum(Y_TV == -1)))
    print('# of Fatigued Samples: %d'%(np.sum(Y_TV == 1)))    

    # ===== Data loading and preprocessing =====
    # Training and Validation
    X_Train, X_Valid, YC_Train, YC_Valid = train_test_split(X_TV, 
                                                            np.transpose([Y_TV, C_TV]), 
                                                            test_size=0.1, 
                                                            random_state=42)
    Y_Train, C_Train = YC_Train[:,0], YC_Train[:,1]
    Y_Valid, C_Valid = YC_Valid[:,0], YC_Valid[:,1]    
    
    clf = SVC(C=1.0, gamma='scale', kernel='rbf', class_weight='balanced', max_iter=1000, tol=0.001)
    clf.fit(X_Train, Y_Train)
    
    label_predict = clf.predict(X_Train)
    
#     ret = partial_confound_test(Y_Train, label_predict, C_Train, progress=True)  
#     print('P value: ', ret.p)
#     p_value[sub_test] = ret.p
    
    print('Training Acc: ', accuracy_score(label_predict, Y_Train))
    training_acc[sub_test] = accuracy_score(label_predict, Y_Train)

    label_predict = clf.predict(X_Valid)
    print('Validation Acc: ', accuracy_score(label_predict, Y_Valid))
    valid_acc[sub_test] = accuracy_score(label_predict, Y_Valid)

    label_predict = clf.predict(X_Test)
    print('Testing Acc: ', accuracy_score(label_predict, Y_Test))
    testing_acc[sub_test] = accuracy_score(label_predict, Y_Test)

In [None]:
np.mean(training_acc)

In [None]:
np.mean(valid_acc)

In [None]:
np.mean(testing_acc)

In [None]:
np.mean(p_value)

In [None]:
import pandas as pd

data_array = np.array([training_acc, valid_acc, testing_acc, p_value]).T
df = pd.DataFrame(data_array, columns = ['Train', 'Valid', 'Test', 'P-Value'])
print(df)

In [None]:
df.mean(axis=0)

In [None]:
df.to_csv('RBF_SVM_LOO.csv')