In [1]:
import pygad
import scipy.io as sio
import numpy as np
import matplotlib.pyplot as plt

from sklearn.svm import LinearSVC, SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

from mlconfound.stats import partial_confound_test
from mlconfound.plot import plot_null_dist, plot_graph

In [2]:
DATA_ALL = sio.loadmat("data/subjects_40_v6.mat")

In [3]:
FEAT_N           = DATA_ALL['FEAT_N']            # Normalized features
LABEL            = DATA_ALL['LABEL']             # Labels
VFI_1            = DATA_ALL['SUBJECT_VFI']
SUBJECT_ID       = DATA_ALL['SUBJECT_ID']        # Sujbect ID
SUBJECT_SKINFOLD = DATA_ALL['SUBJECT_SKINFOLD']

In [4]:
leftout = 1
testing_acc  = np.zeros(40)
valid_acc    = np.zeros(40)
training_acc = np.zeros(40)
p_value      = np.zeros(40)

sub_index = [1]
for sub_test in range(40):
    print('\n===Exp No. %d===\n'%(sub_test+1))
    
    sub_txt = "R%03d"%(int(SUBJECT_ID[sub_test][0][0]))
    print('Test Subject %s:'%(sub_txt))
    print('VFI-1:', (VFI_1[sub_test][0][0]))
    if int(VFI_1[sub_test][0][0]) > 10:
        sub_group = 'Fatigued'
    else:
        sub_group = 'Healthy'

    # ===== Load Testing Signals =====
    num_signal = np.shape(FEAT_N[sub_test,0])[0]    
    X_Temp = FEAT_N[sub_test,0]
    Y_Temp = LABEL[sub_test,0].flatten()

    num_leftout = round(leftout*num_signal)
    index_leftout = np.random.choice(range(num_signal), size=num_leftout, replace=False)
    print("Left-out Test samples: ", index_leftout.size)

    X_Test = X_Temp[index_leftout,:]
    Y_Test = Y_Temp[index_leftout]

    index_include = np.arange(num_signal)
    index_include = np.delete(index_include, index_leftout)
    print("Included Training samples: ", index_include.size)
    X_include = X_Temp[index_include,:]
    Y_include = Y_Temp[index_include]


    # ===== Load Traing Signals =====
    X_TV = np.zeros((0,48))
    Y_TV = np.zeros(0)    
    C_TV = np.zeros(0)
    for sub_train in range(40):
        if sub_train != sub_test:
            x_s = FEAT_N[sub_train,0]
            y_s = LABEL[sub_train,0].flatten()
            c_s = np.mean(np.mean(SUBJECT_SKINFOLD[sub_train,:]), axis=1)
            # ===== CAN BE CONVERTED INTO A FUNCTION =====
            X_TV = np.concatenate((X_TV, x_s), axis=0)
            Y_TV = np.concatenate((Y_TV, y_s), axis=0)
            C_TV = np.concatenate((C_TV, c_s), axis=0)       

    print('# of Healthy Samples: %d'%(np.sum(Y_TV == -1)))
    print('# of Fatigued Samples: %d'%(np.sum(Y_TV == 1)))    

    # ===== Data loading and preprocessing =====
    # Training and Validation
    X_Train, X_Valid, YC_Train, YC_Valid = train_test_split(X_TV, 
                                                            np.transpose([Y_TV, C_TV]), 
                                                            test_size=0.1, 
                                                            random_state=42)
    Y_Train, C_Train = YC_Train[:,0], YC_Train[:,1]
    Y_Valid, C_Valid = YC_Valid[:,0], YC_Valid[:,1]    
    
    clf = SVC(C=1.0, gamma='scale', kernel='rbf', class_weight='balanced', max_iter=1000, tol=0.001)
    clf.fit(X_Train, Y_Train)
    
    label_predict = clf.predict(X_Train)
    
#     ret = partial_confound_test(Y_Train, label_predict, C_Train, progress=True)  
#     print('P value: ', ret.p)
#     p_value[sub_test] = ret.p
    
    print('Training Acc: ', accuracy_score(label_predict, Y_Train))
    training_acc[sub_test] = accuracy_score(label_predict, Y_Train)

    label_predict = clf.predict(X_Valid)
    print('Validation Acc: ', accuracy_score(label_predict, Y_Valid))
    valid_acc[sub_test] = accuracy_score(label_predict, Y_Valid)

    label_predict = clf.predict(X_Test)
    print('Testing Acc: ', accuracy_score(label_predict, Y_Test))
    testing_acc[sub_test] = accuracy_score(label_predict, Y_Test)


===Exp No. 1===

Test Subject R044:
VFI-1: [0]
Left-out Test samples:  165
Included Training samples:  0
# of Healthy Samples: 3037
# of Fatigued Samples: 3270




Training Acc:  0.9695207892882312
Validation Acc:  0.9635499207606973
Testing Acc:  0.20606060606060606

===Exp No. 2===

Test Subject R041:
VFI-1: [2]
Left-out Test samples:  164
Included Training samples:  0
# of Healthy Samples: 3038
# of Fatigued Samples: 3270




Training Acc:  0.9531442663378545
Validation Acc:  0.9429477020602218
Testing Acc:  0.9573170731707317

===Exp No. 3===

Test Subject R081:
VFI-1: [0]
Left-out Test samples:  165
Included Training samples:  0
# of Healthy Samples: 3037
# of Fatigued Samples: 3270




Training Acc:  0.9589499647639183
Validation Acc:  0.9540412044374009
Testing Acc:  0.7090909090909091

===Exp No. 4===

Test Subject R085:
VFI-1: [1]
Left-out Test samples:  162
Included Training samples:  0
# of Healthy Samples: 3040
# of Fatigued Samples: 3270




Training Acc:  0.9535129424194401
Validation Acc:  0.9492868462757528
Testing Acc:  0.5185185185185185

===Exp No. 5===

Test Subject R008:
VFI-1: [3]
Left-out Test samples:  162
Included Training samples:  0
# of Healthy Samples: 3040
# of Fatigued Samples: 3270




Training Acc:  0.9505194576509949
Validation Acc:  0.9445324881141046
Testing Acc:  0.8765432098765432

===Exp No. 6===

Test Subject R024:
VFI-1: [4]
Left-out Test samples:  165
Included Training samples:  0
# of Healthy Samples: 3037
# of Fatigued Samples: 3270




Training Acc:  0.9564834390415786
Validation Acc:  0.9461172741679873
Testing Acc:  0.9939393939393939

===Exp No. 7===

Test Subject R034:
VFI-1: [0]
Left-out Test samples:  163
Included Training samples:  0
# of Healthy Samples: 3039
# of Fatigued Samples: 3270




Training Acc:  0.9533286368439592
Validation Acc:  0.9508716323296355
Testing Acc:  0.27607361963190186

===Exp No. 8===

Test Subject R029:
VFI-1: [1]
Left-out Test samples:  150
Included Training samples:  0
# of Healthy Samples: 3052
# of Fatigued Samples: 3270




Training Acc:  0.9592195464932326
Validation Acc:  0.9605055292259084
Testing Acc:  0.94

===Exp No. 9===

Test Subject R052:
VFI-1: [1]
Left-out Test samples:  164
Included Training samples:  0
# of Healthy Samples: 3038
# of Fatigued Samples: 3270




Training Acc:  0.9621278844460102
Validation Acc:  0.9540412044374009
Testing Acc:  0.5060975609756098

===Exp No. 10===

Test Subject R039:
VFI-1: [4]
Left-out Test samples:  163
Included Training samples:  0
# of Healthy Samples: 3039
# of Fatigued Samples: 3270




Training Acc:  0.9598450158506516
Validation Acc:  0.9524564183835182
Testing Acc:  0.4171779141104294

===Exp No. 11===

Test Subject R088:
VFI-1: [4]
Left-out Test samples:  164
Included Training samples:  0
# of Healthy Samples: 3038
# of Fatigued Samples: 3270




Training Acc:  0.9570195525805884
Validation Acc:  0.9334389857369255
Testing Acc:  1.0

===Exp No. 12===

Test Subject R092:
VFI-1: [4]
Left-out Test samples:  158
Included Training samples:  0
# of Healthy Samples: 3044
# of Fatigued Samples: 3270




Training Acc:  0.9577613516367476
Validation Acc:  0.9319620253164557
Testing Acc:  0.2721518987341772

===Exp No. 13===

Test Subject R016:
VFI-1: [4]
Left-out Test samples:  165
Included Training samples:  0
# of Healthy Samples: 3037
# of Fatigued Samples: 3270




Training Acc:  0.9556025369978859
Validation Acc:  0.9477020602218701
Testing Acc:  1.0

===Exp No. 14===

Test Subject R002:
VFI-1: [0]
Left-out Test samples:  160
Included Training samples:  0
# of Healthy Samples: 3042
# of Fatigued Samples: 3270




Training Acc:  0.9568661971830986
Validation Acc:  0.935126582278481
Testing Acc:  0.83125

===Exp No. 15===

Test Subject R084:
VFI-1: [1]
Left-out Test samples:  165
Included Training samples:  0
# of Healthy Samples: 3037
# of Fatigued Samples: 3270




Training Acc:  0.9591261451726568
Validation Acc:  0.9381933438985737
Testing Acc:  0.7212121212121212

===Exp No. 16===

Test Subject R040:
VFI-1: [4]
Left-out Test samples:  163
Included Training samples:  0
# of Healthy Samples: 3039
# of Fatigued Samples: 3270




Training Acc:  0.9528002817893625
Validation Acc:  0.9381933438985737
Testing Acc:  1.0

===Exp No. 17===

Test Subject R037:
VFI-1: [0]
Left-out Test samples:  165
Included Training samples:  0
# of Healthy Samples: 3037
# of Fatigued Samples: 3270




Training Acc:  0.9492600422832981
Validation Acc:  0.93026941362916
Testing Acc:  1.0

===Exp No. 18===

Test Subject R090:
VFI-1: [3]
Left-out Test samples:  165
Included Training samples:  0
# of Healthy Samples: 3037
# of Fatigued Samples: 3270




Training Acc:  0.9538407329105003
Validation Acc:  0.9445324881141046
Testing Acc:  0.9757575757575757

===Exp No. 19===

Test Subject R061:
VFI-1: [2]
Left-out Test samples:  165
Included Training samples:  0
# of Healthy Samples: 3037
# of Fatigued Samples: 3270




Training Acc:  0.9575405214940098
Validation Acc:  0.9477020602218701
Testing Acc:  0.896969696969697

===Exp No. 20===

Test Subject R010:
VFI-1: [4]
Left-out Test samples:  109
Included Training samples:  0
# of Healthy Samples: 3093
# of Fatigued Samples: 3270




Training Acc:  0.9537198742577716
Validation Acc:  0.9340659340659341
Testing Acc:  1.0

===Exp No. 21===

Test Subject R057:
VFI-1: [17]
Left-out Test samples:  152
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3118




Training Acc:  0.9511251758087201
Validation Acc:  0.9319620253164557
Testing Acc:  0.881578947368421

===Exp No. 22===

Test Subject R058:
VFI-1: [11]
Left-out Test samples:  165
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3105




Training Acc:  0.9538407329105003
Validation Acc:  0.9413629160063391
Testing Acc:  0.6242424242424243

===Exp No. 23===

Test Subject R011:
VFI-1: [23]
Left-out Test samples:  165
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3105




Training Acc:  0.9545454545454546
Validation Acc:  0.9445324881141046
Testing Acc:  0.21818181818181817

===Exp No. 24===

Test Subject R019:
VFI-1: [22]
Left-out Test samples:  160
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3110




Training Acc:  0.956338028169014
Validation Acc:  0.9224683544303798
Testing Acc:  0.48125

===Exp No. 25===

Test Subject R021:
VFI-1: [18]
Left-out Test samples:  164
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3106




Training Acc:  0.9575480007045974
Validation Acc:  0.9445324881141046
Testing Acc:  0.6524390243902439

===Exp No. 26===

Test Subject R030:
VFI-1: [11]
Left-out Test samples:  164
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3106




Training Acc:  0.9645939756913863
Validation Acc:  0.9540412044374009
Testing Acc:  0.2073170731707317

===Exp No. 27===

Test Subject R032:
VFI-1: [11]
Left-out Test samples:  165
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3105




Training Acc:  0.9545454545454546
Validation Acc:  0.936608557844691
Testing Acc:  1.0

===Exp No. 28===

Test Subject R043:
VFI-1: [19]
Left-out Test samples:  165
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3105




Training Acc:  0.9511980267794221
Validation Acc:  0.9350237717908082
Testing Acc:  0.9515151515151515

===Exp No. 29===

Test Subject R045:
VFI-1: [20]
Left-out Test samples:  165
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3105




Training Acc:  0.9552501761804087
Validation Acc:  0.9318541996830428
Testing Acc:  1.0

===Exp No. 30===

Test Subject R047:
VFI-1: [24]
Left-out Test samples:  165
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3105




Training Acc:  0.9612403100775194
Validation Acc:  0.9461172741679873
Testing Acc:  0.6424242424242425

===Exp No. 31===

Test Subject R083:
VFI-1: [15]
Left-out Test samples:  165
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3105




Training Acc:  0.9589499647639183
Validation Acc:  0.9413629160063391
Testing Acc:  0.23636363636363636

===Exp No. 32===

Test Subject R071:
VFI-1: [13]
Left-out Test samples:  165
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3105




Training Acc:  0.954369274136716
Validation Acc:  0.9381933438985737
Testing Acc:  1.0

===Exp No. 33===

Test Subject R055:
VFI-1: [14]
Left-out Test samples:  163
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3107




Training Acc:  0.9513913349771046
Validation Acc:  0.9397781299524565
Testing Acc:  1.0

===Exp No. 34===

Test Subject R050:
VFI-1: [17]
Left-out Test samples:  165
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3105




Training Acc:  0.9529598308668076
Validation Acc:  0.9318541996830428
Testing Acc:  0.9030303030303031

===Exp No. 35===

Test Subject R063:
VFI-1: [23]
Left-out Test samples:  162
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3108




Training Acc:  0.9515759816869167
Validation Acc:  0.9429477020602218
Testing Acc:  0.7222222222222222

===Exp No. 36===

Test Subject R056:
VFI-1: [28]
Left-out Test samples:  165
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3105




Training Acc:  0.9533121916842847
Validation Acc:  0.9318541996830428
Testing Acc:  1.0

===Exp No. 37===

Test Subject R059:
VFI-1: [20]
Left-out Test samples:  165
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3105




Training Acc:  0.9540169133192389
Validation Acc:  0.9381933438985737
Testing Acc:  1.0

===Exp No. 38===

Test Subject R069:
VFI-1: [11]
Left-out Test samples:  162
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3108




Training Acc:  0.9554499031519633
Validation Acc:  0.9318541996830428
Testing Acc:  0.7160493827160493

===Exp No. 39===

Test Subject R046:
VFI-1: [18]
Left-out Test samples:  164
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3106




Training Acc:  0.9605425400739828
Validation Acc:  0.9477020602218701
Testing Acc:  0.7378048780487805

===Exp No. 40===

Test Subject R049:
VFI-1: [28]
Left-out Test samples:  164
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3106




Training Acc:  0.9568434032059187
Validation Acc:  0.9461172741679873
Testing Acc:  0.8048780487804879


In [5]:
np.mean(training_acc)

0.9559956463180279

In [6]:
np.mean(valid_acc)

0.9419474276683761

In [7]:
np.mean(testing_acc)

0.7469364312625681

In [8]:
np.mean(p_value)

0.0

In [9]:
import pandas as pd

data_array = np.array([training_acc, valid_acc, testing_acc, p_value]).T
df = pd.DataFrame(data_array, columns = ['Train', 'Valid', 'Test', 'P-Value'])
print(df)

       Train     Valid      Test  P-Value
0   0.969521  0.963550  0.206061      0.0
1   0.953144  0.942948  0.957317      0.0
2   0.958950  0.954041  0.709091      0.0
3   0.953513  0.949287  0.518519      0.0
4   0.950519  0.944532  0.876543      0.0
5   0.956483  0.946117  0.993939      0.0
6   0.953329  0.950872  0.276074      0.0
7   0.959220  0.960506  0.940000      0.0
8   0.962128  0.954041  0.506098      0.0
9   0.959845  0.952456  0.417178      0.0
10  0.957020  0.933439  1.000000      0.0
11  0.957761  0.931962  0.272152      0.0
12  0.955603  0.947702  1.000000      0.0
13  0.956866  0.935127  0.831250      0.0
14  0.959126  0.938193  0.721212      0.0
15  0.952800  0.938193  1.000000      0.0
16  0.949260  0.930269  1.000000      0.0
17  0.953841  0.944532  0.975758      0.0
18  0.957541  0.947702  0.896970      0.0
19  0.953720  0.934066  1.000000      0.0
20  0.951125  0.931962  0.881579      0.0
21  0.953841  0.941363  0.624242      0.0
22  0.954545  0.944532  0.218182  

In [10]:
df.mean(axis=0)

Train      0.955996
Valid      0.941947
Test       0.746936
P-Value    0.000000
dtype: float64

In [11]:
df.to_csv('RBF_SVM_LOO.csv')