In [1]:
import pygad
import scipy.io as sio
import numpy as np
import matplotlib.pyplot as plt

from sklearn.svm import LinearSVC, SVC
from sklearn.metrics import accuracy_score
from mlconfound.stats import partial_confound_test
from mlconfound.plot import plot_null_dist, plot_graph
from sklearn.model_selection import train_test_split

In [2]:
DATA_ALL = sio.loadmat("subjects_40_v6.mat")
DATA_ALL.keys()

dict_keys(['__header__', '__version__', '__globals__', 'DATA', 'FEAT', 'FEAT_N', 'LABEL', 'LABEL_VOWEL', 'SUBJECT_ID', 'SUBJECT_SKINFOLD', 'SUBJECT_VFI', 'VOWEL_REP'])

In [3]:
FEAT_N           = DATA_ALL['FEAT_N']            # Normalized features
LABEL            = DATA_ALL['LABEL']             # Labels
VFI_1            = DATA_ALL['SUBJECT_VFI']
SUBJECT_ID       = DATA_ALL['SUBJECT_ID']        # Sujbect ID
SUBJECT_SKINFOLD = DATA_ALL['SUBJECT_SKINFOLD']

In [14]:
leftout = 1
testing_acc  = np.zeros(40)
valid_acc    = np.zeros(40)
training_acc = np.zeros(40)
p_value      = np.zeros(40)

sub_index = [1]
for sub_test in range(40):
    print('\n===Exp No. %d===\n'%(sub_test+1))
    
    sub_txt = "R%03d"%(int(SUBJECT_ID[sub_test][0][0]))
    print('Test Subject %s:'%(sub_txt))
    print('VFI-1:', (VFI_1[sub_test][0][0]))
    if int(VFI_1[sub_test][0][0]) > 10:
        sub_group = 'Fatigued'
    else:
        sub_group = 'Healthy'

    # ===== Load Testing Signals =====
    num_signal = np.shape(FEAT_N[sub_test,0])[0]    
    X_Temp = FEAT_N[sub_test,0]
    Y_Temp = LABEL[sub_test,0].flatten()

    num_leftout = round(leftout*num_signal)
    index_leftout = np.random.choice(range(num_signal), size=num_leftout, replace=False)
    print("Left-out Test samples: ", index_leftout.size)

    X_Test = X_Temp[index_leftout,:]
    Y_Test = Y_Temp[index_leftout]

    index_include = np.arange(num_signal)
    index_include = np.delete(index_include, index_leftout)
    print("Included Training samples: ", index_include.size)
    X_include = X_Temp[index_include,:]
    Y_include = Y_Temp[index_include]


    # ===== Load Traing Signals =====
    X_TV = np.zeros((0,48))
    Y_TV = np.zeros(0)    
    C_TV = np.zeros(0)
    for sub_train in range(40):
        if sub_train != sub_test:
            x_s = FEAT_N[sub_train,0]
            y_s = LABEL[sub_train,0].flatten()
            c_s = np.mean(np.mean(SUBJECT_SKINFOLD[sub_train,:]), axis=1)
            # ===== CAN BE CONVERTED INTO A FUNCTION =====
            X_TV = np.concatenate((X_TV, x_s), axis=0)
            Y_TV = np.concatenate((Y_TV, y_s), axis=0)
            C_TV = np.concatenate((C_TV, c_s), axis=0)       

    print('# of Healthy Samples: %d'%(np.sum(Y_TV == -1)))
    print('# of Fatigued Samples: %d'%(np.sum(Y_TV == 1)))    

    # ===== Data loading and preprocessing =====
    # Training and Validation
    # Training and Validation
    X_Train, X_Valid, YC_Train, YC_Valid = train_test_split(X_TV, 
                                                            np.transpose([Y_TV, C_TV]), 
                                                            test_size=0.1, 
                                                            random_state=42)
    Y_Train, C_Train = YC_Train[:,0], YC_Train[:,1]
    Y_Valid, C_Valid = YC_Valid[:,0], YC_Valid[:,1]    
    
    clf = LinearSVC(class_weight='balanced', max_iter=1000, tol=0.001) 
    clf.fit(X_Train, Y_Train)
    
    label_predict = clf.predict(X_Train)    
    ret = partial_confound_test(Y_Train, label_predict, C_Train, progress=True)  
    print('P value: ', ret.p)
    p_value[sub_test] = ret.p
    
    print('Training Acc: ', accuracy_score(label_predict, Y_Train))
    training_acc[sub_test] = accuracy_score(label_predict, Y_Train)

    label_predict = clf.predict(X_Valid)
    print('Validation Acc: ', accuracy_score(label_predict, Y_Valid))
    valid_acc[sub_test] = accuracy_score(label_predict, Y_Valid)

    label_predict = clf.predict(X_Test)
    print('Testing Acc: ', accuracy_score(label_predict, Y_Test))
    testing_acc[sub_test] = accuracy_score(label_predict, Y_Test)


===Exp No. 1===

Test Subject R044:
VFI-1: [0]
Left-out Test samples:  165
Included Training samples:  0
# of Healthy Samples: 3037
# of Fatigued Samples: 3270


Permuting: 100%|██████████| 1000/1000 [00:08<00:00, 113.55it/s]


P value:  0.106
Training Acc:  0.8893587033121917
Validation Acc:  0.9049128367670365
Testing Acc:  0.3696969696969697

===Exp No. 2===

Test Subject R041:
VFI-1: [2]
Left-out Test samples:  164
Included Training samples:  0
# of Healthy Samples: 3038
# of Fatigued Samples: 3270


Permuting: 100%|██████████| 1000/1000 [00:07<00:00, 134.52it/s]


P value:  0.003
Training Acc:  0.8897304914567553
Validation Acc:  0.8795562599049128
Testing Acc:  0.22560975609756098

===Exp No. 3===

Test Subject R081:
VFI-1: [0]
Left-out Test samples:  165
Included Training samples:  0
# of Healthy Samples: 3037
# of Fatigued Samples: 3270


Permuting: 100%|██████████| 1000/1000 [00:07<00:00, 132.30it/s]


P value:  0.125
Training Acc:  0.8766737138830162
Validation Acc:  0.8906497622820919
Testing Acc:  0.8181818181818182

===Exp No. 4===

Test Subject R085:
VFI-1: [1]
Left-out Test samples:  162
Included Training samples:  0
# of Healthy Samples: 3040
# of Fatigued Samples: 3270


Permuting: 100%|██████████| 1000/1000 [00:07<00:00, 132.96it/s]


P value:  0.0
Training Acc:  0.8834301813699595
Validation Acc:  0.9033280507131537
Testing Acc:  0.35802469135802467

===Exp No. 5===

Test Subject R008:
VFI-1: [3]
Left-out Test samples:  162
Included Training samples:  0
# of Healthy Samples: 3040
# of Fatigued Samples: 3270


Permuting: 100%|██████████| 1000/1000 [00:07<00:00, 133.37it/s]


P value:  0.047
Training Acc:  0.8753301637612255
Validation Acc:  0.8969889064976229
Testing Acc:  0.9938271604938271

===Exp No. 6===

Test Subject R024:
VFI-1: [4]
Left-out Test samples:  165
Included Training samples:  0
# of Healthy Samples: 3037
# of Fatigued Samples: 3270


Permuting: 100%|██████████| 1000/1000 [00:07<00:00, 125.46it/s]


P value:  0.001
Training Acc:  0.8798449612403101
Validation Acc:  0.8763866877971473
Testing Acc:  0.8484848484848485

===Exp No. 7===

Test Subject R034:
VFI-1: [0]
Left-out Test samples:  163
Included Training samples:  0
# of Healthy Samples: 3039
# of Fatigued Samples: 3270


Permuting: 100%|██████████| 1000/1000 [00:07<00:00, 126.22it/s]


P value:  0.221
Training Acc:  0.8920394505107432
Validation Acc:  0.884310618066561
Testing Acc:  0.3067484662576687

===Exp No. 8===

Test Subject R029:
VFI-1: [1]
Left-out Test samples:  150
Included Training samples:  0
# of Healthy Samples: 3052
# of Fatigued Samples: 3270


Permuting: 100%|██████████| 1000/1000 [00:07<00:00, 129.25it/s]


P value:  0.182
Training Acc:  0.8774828616628582
Validation Acc:  0.8862559241706162
Testing Acc:  1.0

===Exp No. 9===

Test Subject R052:
VFI-1: [1]
Left-out Test samples:  164
Included Training samples:  0
# of Healthy Samples: 3038
# of Fatigued Samples: 3270


Permuting: 100%|██████████| 1000/1000 [00:07<00:00, 131.96it/s]


P value:  0.004
Training Acc:  0.8937819270741589
Validation Acc:  0.8716323296354992
Testing Acc:  0.4268292682926829

===Exp No. 10===

Test Subject R039:
VFI-1: [4]
Left-out Test samples:  163
Included Training samples:  0
# of Healthy Samples: 3039
# of Fatigued Samples: 3270


Permuting: 100%|██████████| 1000/1000 [00:07<00:00, 127.49it/s]


P value:  0.051
Training Acc:  0.8927439239168722
Validation Acc:  0.8748019017432647
Testing Acc:  0.39263803680981596

===Exp No. 11===

Test Subject R088:
VFI-1: [4]
Left-out Test samples:  164
Included Training samples:  0
# of Healthy Samples: 3038
# of Fatigued Samples: 3270


Permuting: 100%|██████████| 1000/1000 [00:07<00:00, 130.35it/s]


P value:  0.217
Training Acc:  0.8782807821032236
Validation Acc:  0.8637083993660856
Testing Acc:  1.0

===Exp No. 12===

Test Subject R092:
VFI-1: [4]
Left-out Test samples:  158
Included Training samples:  0
# of Healthy Samples: 3044
# of Fatigued Samples: 3270


Permuting: 100%|██████████| 1000/1000 [00:13<00:00, 75.84it/s]


P value:  0.463
Training Acc:  0.8975712777191129
Validation Acc:  0.8860759493670886
Testing Acc:  0.0

===Exp No. 13===

Test Subject R016:
VFI-1: [4]
Left-out Test samples:  165
Included Training samples:  0
# of Healthy Samples: 3037
# of Fatigued Samples: 3270


Permuting: 100%|██████████| 1000/1000 [00:15<00:00, 65.07it/s]


P value:  0.072
Training Acc:  0.8780831571529246
Validation Acc:  0.8684627575277337
Testing Acc:  0.9939393939393939

===Exp No. 14===

Test Subject R002:
VFI-1: [0]
Left-out Test samples:  160
Included Training samples:  0
# of Healthy Samples: 3042
# of Fatigued Samples: 3270


Permuting: 100%|██████████| 1000/1000 [00:15<00:00, 64.43it/s]


P value:  0.079
Training Acc:  0.8788732394366198
Validation Acc:  0.8591772151898734
Testing Acc:  1.0

===Exp No. 15===

Test Subject R084:
VFI-1: [1]
Left-out Test samples:  165
Included Training samples:  0
# of Healthy Samples: 3037
# of Fatigued Samples: 3270


Permuting: 100%|██████████| 1000/1000 [00:14<00:00, 71.04it/s]


P value:  0.014
Training Acc:  0.8793164200140944
Validation Acc:  0.8684627575277337
Testing Acc:  1.0

===Exp No. 16===

Test Subject R040:
VFI-1: [4]
Left-out Test samples:  163
Included Training samples:  0
# of Healthy Samples: 3039
# of Fatigued Samples: 3270


Permuting: 100%|██████████| 1000/1000 [00:14<00:00, 68.02it/s]


P value:  0.019
Training Acc:  0.8820007044734062
Validation Acc:  0.8510301109350238
Testing Acc:  1.0

===Exp No. 17===

Test Subject R037:
VFI-1: [0]
Left-out Test samples:  165
Included Training samples:  0
# of Healthy Samples: 3037
# of Fatigued Samples: 3270


Permuting: 100%|██████████| 1000/1000 [00:14<00:00, 67.14it/s]


P value:  0.002
Training Acc:  0.881430584918957
Validation Acc:  0.8763866877971473
Testing Acc:  0.8727272727272727

===Exp No. 18===

Test Subject R090:
VFI-1: [3]
Left-out Test samples:  165
Included Training samples:  0
# of Healthy Samples: 3037
# of Fatigued Samples: 3270


Permuting: 100%|██████████| 1000/1000 [00:15<00:00, 63.90it/s]


P value:  0.003
Training Acc:  0.8793164200140944
Validation Acc:  0.8684627575277337
Testing Acc:  0.8

===Exp No. 19===

Test Subject R061:
VFI-1: [2]
Left-out Test samples:  165
Included Training samples:  0
# of Healthy Samples: 3037
# of Fatigued Samples: 3270


Permuting: 100%|██████████| 1000/1000 [00:15<00:00, 64.64it/s]


P value:  0.006
Training Acc:  0.885306553911205
Validation Acc:  0.8795562599049128
Testing Acc:  0.04242424242424243

===Exp No. 20===

Test Subject R010:
VFI-1: [4]
Left-out Test samples:  109
Included Training samples:  0
# of Healthy Samples: 3093
# of Fatigued Samples: 3270


Permuting: 100%|██████████| 1000/1000 [00:15<00:00, 65.01it/s]


P value:  0.131
Training Acc:  0.8786238211666084
Validation Acc:  0.8775510204081632
Testing Acc:  1.0

===Exp No. 21===

Test Subject R057:
VFI-1: [17]
Left-out Test samples:  152
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3118


Permuting: 100%|██████████| 1000/1000 [00:15<00:00, 63.51it/s]


P value:  0.387
Training Acc:  0.8799226441631505
Validation Acc:  0.865506329113924
Testing Acc:  0.8947368421052632

===Exp No. 22===

Test Subject R058:
VFI-1: [11]
Left-out Test samples:  165
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3105


Permuting: 100%|██████████| 1000/1000 [00:15<00:00, 66.49it/s]


P value:  0.0
Training Acc:  0.8875968992248062
Validation Acc:  0.8652931854199684
Testing Acc:  0.6

===Exp No. 23===

Test Subject R011:
VFI-1: [23]
Left-out Test samples:  165
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3105


Permuting: 100%|██████████| 1000/1000 [00:15<00:00, 66.04it/s]


P value:  0.0
Training Acc:  0.9272374911909795
Validation Acc:  0.9096671949286846
Testing Acc:  0.012121212121212121

===Exp No. 24===

Test Subject R019:
VFI-1: [22]
Left-out Test samples:  160
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3110


Permuting: 100%|██████████| 1000/1000 [00:14<00:00, 67.39it/s]


P value:  0.01
Training Acc:  0.8802816901408451
Validation Acc:  0.8575949367088608
Testing Acc:  0.58125

===Exp No. 25===

Test Subject R021:
VFI-1: [18]
Left-out Test samples:  164
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3106


Permuting: 100%|██████████| 1000/1000 [00:14<00:00, 69.04it/s]


P value:  0.188
Training Acc:  0.8832129645939757
Validation Acc:  0.838351822503962
Testing Acc:  0.36585365853658536

===Exp No. 26===

Test Subject R030:
VFI-1: [11]
Left-out Test samples:  164
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3106


Permuting: 100%|██████████| 1000/1000 [00:14<00:00, 70.65it/s]


P value:  0.0
Training Acc:  0.8936057776994891
Validation Acc:  0.8637083993660856
Testing Acc:  0.06097560975609756

===Exp No. 27===

Test Subject R032:
VFI-1: [11]
Left-out Test samples:  165
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3105


Permuting: 100%|██████████| 1000/1000 [00:14<00:00, 67.64it/s]


P value:  0.005
Training Acc:  0.8787878787878788
Validation Acc:  0.8557844690966719
Testing Acc:  0.9939393939393939

===Exp No. 28===

Test Subject R043:
VFI-1: [19]
Left-out Test samples:  165
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3105


Permuting: 100%|██████████| 1000/1000 [00:14<00:00, 70.86it/s]


P value:  0.001
Training Acc:  0.8780831571529246
Validation Acc:  0.8573692551505546
Testing Acc:  0.9878787878787879

===Exp No. 29===

Test Subject R045:
VFI-1: [20]
Left-out Test samples:  165
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3105


Permuting: 100%|██████████| 1000/1000 [00:13<00:00, 72.55it/s]


P value:  0.075
Training Acc:  0.8835447498238196
Validation Acc:  0.8557844690966719
Testing Acc:  0.8848484848484849

===Exp No. 30===

Test Subject R047:
VFI-1: [24]
Left-out Test samples:  165
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3105


Permuting: 100%|██████████| 1000/1000 [00:13<00:00, 73.44it/s]


P value:  0.147
Training Acc:  0.883016208597604
Validation Acc:  0.849445324881141
Testing Acc:  0.7090909090909091

===Exp No. 31===

Test Subject R083:
VFI-1: [15]
Left-out Test samples:  165
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3105


Permuting: 100%|██████████| 1000/1000 [00:14<00:00, 71.39it/s]


P value:  0.038
Training Acc:  0.8957011980267794
Validation Acc:  0.866877971473851
Testing Acc:  0.12121212121212122

===Exp No. 32===

Test Subject R071:
VFI-1: [13]
Left-out Test samples:  165
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3105


Permuting: 100%|██████████| 1000/1000 [00:13<00:00, 76.10it/s]


P value:  0.0
Training Acc:  0.8800211416490487
Validation Acc:  0.8605388272583201
Testing Acc:  1.0

===Exp No. 33===

Test Subject R055:
VFI-1: [14]
Left-out Test samples:  163
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3107


Permuting: 100%|██████████| 1000/1000 [00:12<00:00, 78.56it/s]


P value:  0.116
Training Acc:  0.8760126805213103
Validation Acc:  0.8557844690966719
Testing Acc:  0.9938650306748467

===Exp No. 34===

Test Subject R050:
VFI-1: [17]
Left-out Test samples:  165
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3105


Permuting: 100%|██████████| 1000/1000 [00:15<00:00, 63.87it/s]


P value:  0.022
Training Acc:  0.8907681465821
Validation Acc:  0.8763866877971473
Testing Acc:  0.36363636363636365

===Exp No. 35===

Test Subject R063:
VFI-1: [23]
Left-out Test samples:  162
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3108


Permuting: 100%|██████████| 1000/1000 [00:14<00:00, 70.13it/s]


P value:  0.012
Training Acc:  0.8804366966015144
Validation Acc:  0.8748019017432647
Testing Acc:  0.8024691358024691

===Exp No. 36===

Test Subject R056:
VFI-1: [28]
Left-out Test samples:  165
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3105


Permuting: 100%|██████████| 1000/1000 [00:14<00:00, 69.43it/s]


P value:  0.004
Training Acc:  0.8793164200140944
Validation Acc:  0.8589540412044374
Testing Acc:  1.0

===Exp No. 37===

Test Subject R059:
VFI-1: [20]
Left-out Test samples:  165
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3105


Permuting: 100%|██████████| 1000/1000 [00:13<00:00, 72.45it/s]


P value:  0.362
Training Acc:  0.8809020436927414
Validation Acc:  0.8589540412044374
Testing Acc:  0.9939393939393939

===Exp No. 38===

Test Subject R069:
VFI-1: [11]
Left-out Test samples:  162
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3108


Permuting: 100%|██████████| 1000/1000 [00:13<00:00, 73.20it/s]


P value:  0.0
Training Acc:  0.8806127839408346
Validation Acc:  0.8827258320126783
Testing Acc:  0.6234567901234568

===Exp No. 39===

Test Subject R046:
VFI-1: [18]
Left-out Test samples:  164
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3106


Permuting: 100%|██████████| 1000/1000 [00:12<00:00, 77.07it/s]


P value:  0.006
Training Acc:  0.8879689977100581
Validation Acc:  0.8621236133122029
Testing Acc:  0.6585365853658537

===Exp No. 40===

Test Subject R049:
VFI-1: [28]
Left-out Test samples:  164
Included Training samples:  0
# of Healthy Samples: 3202
# of Fatigued Samples: 3106


Permuting: 100%|██████████| 1000/1000 [00:12<00:00, 79.77it/s]

P value:  0.0
Training Acc:  0.8932534789501497
Validation Acc:  0.8748019017432647
Testing Acc:  0.3780487804878049





In [15]:
np.mean(training_acc)

0.884737559704061

In [16]:
np.mean(valid_acc)

0.871453796656055

In [17]:
np.mean(testing_acc)

0.6618747756070793

In [18]:
np.mean(p_value)

0.07797499999999999

In [23]:
import pandas as pd

data_array = np.array([training_acc, valid_acc, testing_acc, p_value]).T
df = pd.DataFrame(data_array, columns = ['Train', 'Valid', 'Test', 'P-Value'])
print(df)

       Train     Valid      Test  P-Value
0   0.889359  0.904913  0.369697    0.106
1   0.889730  0.879556  0.225610    0.003
2   0.876674  0.890650  0.818182    0.125
3   0.883430  0.903328  0.358025    0.000
4   0.875330  0.896989  0.993827    0.047
5   0.879845  0.876387  0.848485    0.001
6   0.892039  0.884311  0.306748    0.221
7   0.877483  0.886256  1.000000    0.182
8   0.893782  0.871632  0.426829    0.004
9   0.892744  0.874802  0.392638    0.051
10  0.878281  0.863708  1.000000    0.217
11  0.897571  0.886076  0.000000    0.463
12  0.878083  0.868463  0.993939    0.072
13  0.878873  0.859177  1.000000    0.079
14  0.879316  0.868463  1.000000    0.014
15  0.882001  0.851030  1.000000    0.019
16  0.881431  0.876387  0.872727    0.002
17  0.879316  0.868463  0.800000    0.003
18  0.885307  0.879556  0.042424    0.006
19  0.878624  0.877551  1.000000    0.131
20  0.879923  0.865506  0.894737    0.387
21  0.887597  0.865293  0.600000    0.000
22  0.927237  0.909667  0.012121  

In [25]:
df.mean(axis=0)

Train      0.884738
Valid      0.871454
Test       0.661875
P-Value    0.077975
dtype: float64

In [24]:
df.to_csv('Linear_SVM_LOO.csv')