In [37]:
import warnings
warnings.filterwarnings("ignore")

In [38]:
#from google.colab import drive
#drive.mount('/content/drive')

In [39]:
#import sys
#sys.path.insert(0,'/content/drive/My Drive/AStudyOfASDClassification')

In [40]:
from sklearn.metrics import make_scorer
from sklearn.metrics import accuracy_score, recall_score
scoring = {
    'accuracy':    make_scorer(accuracy_score),
    'sensitivity': make_scorer(recall_score),
    'specificity': make_scorer(recall_score,pos_label=0)
}

In [41]:
# Logistic Regression
import numpy as np
from sklearn.linear_model import LogisticRegression

lr_model = LogisticRegression()

param_grid_lr = [
        {
         'C' : [0.01,0.1,1,10,100],
         'solver' : ['liblinear']
        }
]

In [42]:
# Linear SVM

from sklearn.svm import LinearSVC
lsvm_model = LinearSVC(random_state=0, tol=1e-5)

param_grid_lsvm = [
        {
         'C' : [10,100,300,500]
        }
]


In [43]:
from sklearn.svm import SVC
# Kernel SVM
ksvm_model = SVC(probability=True)

param_grid_ksvm = [
        {'kernel' : ['rbf'],
         'C' : [0.01,0.1,1,3]
        }
]

# Load Dataset

In [9]:
from numpy import loadtxt
y_basc444 = loadtxt('Y.csv', delimiter=' ')

In [10]:
import numpy as np
time_series_basc444 = np.load('BASC444.npy', allow_pickle=True)

====================================================================
## Calculate Pearson and Spearman Connectivity
====================================================================

In [11]:
pearson_connectivity = np.zeros((len(time_series_basc444),time_series_basc444[0].shape[1], time_series_basc444[0].shape[1]))
spearman_connectivity = np.zeros((len(time_series_basc444),time_series_basc444[0].shape[1], time_series_basc444[0].shape[1]))

print(pearson_connectivity.shape)
print(spearman_connectivity.shape)

(871, 444, 444)
(871, 444, 444)


In [50]:
time_series_basc444[0].T.shape

(444, 196)

In [12]:
from nilearn.connectome import cov_to_corr
import scipy.stats as stats

for i in range(len(time_series_basc444)):
     cov = np.cov(time_series_basc444[i].T)
     pearson_connectivity[i,:,:] = cov_to_corr(cov)
     rho, pvalue = stats.spearmanr(time_series_basc444[i])
     spearman_connectivity[i,:,:] = rho

In [13]:
pearson_lower = np.zeros((len(time_series_basc444),(444*444-444)//2))
spearman_lower = np.zeros((len(time_series_basc444),(444*444-444)//2))
combine_lower = np.zeros((len(time_series_basc444),pearson_lower.shape[1]+spearman_lower.shape[1]))
print(pearson_lower.shape)
print(spearman_lower.shape)
print(combine_lower.shape)                         

(871, 98346)
(871, 98346)
(871, 196692)


In [14]:
for i in range(pearson_connectivity.shape[0]):
    
    temp = pearson_connectivity[i,:,:]
    v = temp[np.triu_indices(temp.shape[0], k = 1)] 
    pearson_lower[i,:] = v
    
    temp = spearman_connectivity[i,:,:]
    v = temp[np.triu_indices(temp.shape[0], k = 1)] 
    spearman_lower[i,:] = v
    
    combine_lower[i,:] = np.concatenate((pearson_lower[i,:],spearman_lower[i,:]),axis=0)

In [15]:
print(np.isnan(pearson_lower).any())
print(np.isnan(spearman_lower).any())
print(np.isnan(combine_lower).any())

where_are_NaNs = np.isnan(pearson_lower)
pearson_lower[where_are_NaNs] = 0

where_are_NaNs = np.isnan(spearman_lower)
spearman_lower[where_are_NaNs] = 0

where_are_NaNs = np.isnan(combine_lower)
combine_lower[where_are_NaNs] = 0

print(np.isnan(pearson_lower).any())
print(np.isnan(spearman_lower).any())
print(np.isnan(combine_lower).any())


True
True
True
False
False
False


In [16]:
pearson_lower_z = np.arctanh(pearson_lower)
spearman_lower_z = np.arctanh(spearman_lower)

## ================================
## 1. Logistic Regression
## ================================

In [37]:
from sklearn.model_selection import GridSearchCV
gs_lr = GridSearchCV(lr_model, param_grid_lr, cv = 5, verbose = True, n_jobs=-1)
best_lr_train = gs_lr.fit(pearson_lower, y_basc444)

Fitting 5 folds for each of 5 candidates, totalling 25 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 out of  25 | elapsed:  5.9min finished


In [38]:
best_lr_train.best_estimator_

LogisticRegression(C=0.01, solver='liblinear')

## Without Feature Selection (Logistric Regression)

In [39]:
## Without Feature Selection

from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectFromModel
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif
from sklearn.metrics import confusion_matrix
import time    
from sklearn.model_selection import cross_validate
from sklearn.model_selection import RepeatedStratifiedKFold

pipe = make_pipeline( 
    best_lr_train.best_estimator_
)
    
t = time.time()

cv = RepeatedStratifiedKFold(n_splits = 10, n_repeats = 10, random_state = 1)

lr_corr_scores_0  = cross_validate(pipe, pearson_lower,  y_basc444, cv = cv, scoring = scoring, return_train_score = True, verbose = True, n_jobs = -1)
lr_spear_scores_0 = cross_validate(pipe, spearman_lower, y_basc444, cv = cv, scoring = scoring, return_train_score = True, verbose = True, n_jobs = -1)

elapsed = time.time() - t
print("Time = " + str(elapsed/60) + " minutes")

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed: 11.0min
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed: 25.3min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:  8.4min


Time = 46.97749478419622 minutes


[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed: 21.6min finished


In [60]:
import numpy as np

lr_corr_acc_0 = np.mean(lr_corr_scores_0['test_accuracy'])
lr_corr_acc_std_0 = np.std(lr_corr_scores_0['test_accuracy'])
lr_corr_sen_0 = np.mean(lr_corr_scores_0['test_sensitivity'])
lr_corr_sen_std_0 = np.std(lr_corr_scores_0['test_sensitivity'])
lr_corr_spe_0 = np.mean(lr_corr_scores_0['test_specificity'])
lr_corr_spe_std_0 = np.std(lr_corr_scores_0['test_specificity'])
    
lr_spear_acc_0 = np.mean(lr_spear_scores_0['test_accuracy'])
lr_spear_acc_std_0 = np.std(lr_spear_scores_0['test_accuracy'])
lr_spear_sen_0 = np.mean(lr_spear_scores_0['test_sensitivity'])
lr_spear_sen_std_0 = np.std(lr_spear_scores_0['test_sensitivity'])
lr_spear_spe_0 = np.mean(lr_spear_scores_0['test_specificity'])
lr_spear_spe_std_0 = np.std(lr_spear_scores_0['test_specificity'])


print("Correlation:")
print("=================================")
print("corr_acc = " + str(lr_corr_acc_0) + " std = " + str(lr_corr_acc_std_0))
print("corr_sen = " + str(lr_corr_sen_0) + " std = " + str(lr_corr_sen_std_0))
print("corr_spe = " + str(lr_corr_spe_0) + " std = " + str(lr_corr_spe_std_0))

print("\nSpearman:")
print("=================================")
print("spear_acc = " + str(lr_spear_acc_0) + " std = " + str(lr_spear_acc_std_0))
print("spear_sen = " + str(lr_spear_sen_0) + " std = " + str(lr_spear_sen_std_0))
print("spear_spe = " + str(lr_spear_spe_0) + " std = " + str(lr_spear_spe_std_0))


Correlation:
corr_acc = 0.6967633228840125 std = 0.044273445045771256
corr_sen = 0.6324939024390244 std = 0.07214789649992212
corr_spe = 0.7520536540240519 std = 0.06553340681871049

Spearman:
spear_acc = 0.7018181818181819 std = 0.04509101861589183
spear_sen = 0.6388780487804877 std = 0.07892562519808748
spear_spe = 0.7559111933395006 std = 0.06266196562472093


In [40]:
lr_corr_scores_0['test_accuracy']

array([0.73863636, 0.79310345, 0.66666667, 0.65517241, 0.67816092,
       0.68965517, 0.79310345, 0.72413793, 0.65517241, 0.64367816,
       0.76136364, 0.5862069 , 0.75862069, 0.64367816, 0.67816092,
       0.70114943, 0.70114943, 0.77011494, 0.66666667, 0.66666667,
       0.64772727, 0.67816092, 0.67816092, 0.67816092, 0.77011494,
       0.75862069, 0.59770115, 0.68965517, 0.68965517, 0.67816092,
       0.76136364, 0.70114943, 0.70114943, 0.66666667, 0.67816092,
       0.63218391, 0.77011494, 0.70114943, 0.74712644, 0.66666667,
       0.69318182, 0.64367816, 0.67816092, 0.71264368, 0.74712644,
       0.67816092, 0.73563218, 0.77011494, 0.66666667, 0.75862069,
       0.77272727, 0.71264368, 0.68965517, 0.64367816, 0.67816092,
       0.71264368, 0.66666667, 0.72413793, 0.64367816, 0.68965517,
       0.70454545, 0.73563218, 0.72413793, 0.64367816, 0.75862069,
       0.66666667, 0.67816092, 0.67816092, 0.73563218, 0.5862069 ,
       0.64772727, 0.59770115, 0.72413793, 0.70114943, 0.66666

In [41]:
lr_corr_scores_0['test_sensitivity']

array([0.6097561 , 0.80487805, 0.56097561, 0.6       , 0.65      ,
       0.65      , 0.7       , 0.6       , 0.6       , 0.525     ,
       0.70731707, 0.51219512, 0.75609756, 0.525     , 0.55      ,
       0.7       , 0.6       , 0.7       , 0.65      , 0.55      ,
       0.6097561 , 0.65853659, 0.68292683, 0.625     , 0.7       ,
       0.75      , 0.6       , 0.525     , 0.45      , 0.65      ,
       0.63414634, 0.6097561 , 0.58536585, 0.55      , 0.65      ,
       0.675     , 0.75      , 0.625     , 0.7       , 0.6       ,
       0.63414634, 0.6097561 , 0.63414634, 0.525     , 0.625     ,
       0.525     , 0.75      , 0.725     , 0.575     , 0.75      ,
       0.65853659, 0.65853659, 0.65853659, 0.5       , 0.6       ,
       0.675     , 0.6       , 0.725     , 0.525     , 0.675     ,
       0.65853659, 0.73170732, 0.63414634, 0.55      , 0.725     ,
       0.6       , 0.625     , 0.575     , 0.75      , 0.6       ,
       0.53658537, 0.48780488, 0.70731707, 0.625     , 0.675  

In [42]:
lr_corr_scores_0['test_specificity']

array([0.85106383, 0.7826087 , 0.76086957, 0.70212766, 0.70212766,
       0.72340426, 0.87234043, 0.82978723, 0.70212766, 0.74468085,
       0.80851064, 0.65217391, 0.76086957, 0.74468085, 0.78723404,
       0.70212766, 0.78723404, 0.82978723, 0.68085106, 0.76595745,
       0.68085106, 0.69565217, 0.67391304, 0.72340426, 0.82978723,
       0.76595745, 0.59574468, 0.82978723, 0.89361702, 0.70212766,
       0.87234043, 0.7826087 , 0.80434783, 0.76595745, 0.70212766,
       0.59574468, 0.78723404, 0.76595745, 0.78723404, 0.72340426,
       0.74468085, 0.67391304, 0.7173913 , 0.87234043, 0.85106383,
       0.80851064, 0.72340426, 0.80851064, 0.74468085, 0.76595745,
       0.87234043, 0.76086957, 0.7173913 , 0.76595745, 0.74468085,
       0.74468085, 0.72340426, 0.72340426, 0.74468085, 0.70212766,
       0.74468085, 0.73913043, 0.80434783, 0.72340426, 0.78723404,
       0.72340426, 0.72340426, 0.76595745, 0.72340426, 0.57446809,
       0.74468085, 0.69565217, 0.73913043, 0.76595745, 0.65957

In [43]:
lr_spear_scores_0['test_accuracy']

array([0.71590909, 0.77011494, 0.68965517, 0.65517241, 0.65517241,
       0.71264368, 0.7816092 , 0.67816092, 0.67816092, 0.64367816,
       0.76136364, 0.6091954 , 0.8045977 , 0.65517241, 0.68965517,
       0.66666667, 0.68965517, 0.79310345, 0.74712644, 0.65517241,
       0.64772727, 0.71264368, 0.71264368, 0.68965517, 0.7816092 ,
       0.73563218, 0.6091954 , 0.68965517, 0.66666667, 0.71264368,
       0.76136364, 0.72413793, 0.70114943, 0.65517241, 0.70114943,
       0.65517241, 0.75862069, 0.70114943, 0.72413793, 0.68965517,
       0.69318182, 0.64367816, 0.68965517, 0.70114943, 0.73563218,
       0.67816092, 0.75862069, 0.72413793, 0.68965517, 0.81609195,
       0.75      , 0.73563218, 0.67816092, 0.62068966, 0.68965517,
       0.73563218, 0.67816092, 0.74712644, 0.71264368, 0.64367816,
       0.71590909, 0.73563218, 0.67816092, 0.64367816, 0.77011494,
       0.66666667, 0.68965517, 0.65517241, 0.73563218, 0.62068966,
       0.65909091, 0.62068966, 0.71264368, 0.72413793, 0.66666

In [44]:
lr_spear_scores_0['test_sensitivity']

array([0.58536585, 0.80487805, 0.56097561, 0.55      , 0.55      ,
       0.675     , 0.725     , 0.525     , 0.6       , 0.55      ,
       0.75609756, 0.51219512, 0.82926829, 0.55      , 0.55      ,
       0.725     , 0.625     , 0.725     , 0.675     , 0.525     ,
       0.63414634, 0.70731707, 0.65853659, 0.625     , 0.725     ,
       0.75      , 0.6       , 0.55      , 0.425     , 0.725     ,
       0.65853659, 0.65853659, 0.58536585, 0.525     , 0.65      ,
       0.7       , 0.725     , 0.6       , 0.65      , 0.625     ,
       0.68292683, 0.56097561, 0.65853659, 0.525     , 0.625     ,
       0.525     , 0.775     , 0.625     , 0.65      , 0.8       ,
       0.65853659, 0.68292683, 0.65853659, 0.525     , 0.6       ,
       0.7       , 0.625     , 0.725     , 0.6       , 0.6       ,
       0.65853659, 0.73170732, 0.56097561, 0.55      , 0.725     ,
       0.65      , 0.625     , 0.575     , 0.75      , 0.6       ,
       0.51219512, 0.53658537, 0.68292683, 0.625     , 0.7    

In [45]:
lr_spear_scores_0['test_specificity']

array([0.82978723, 0.73913043, 0.80434783, 0.74468085, 0.74468085,
       0.74468085, 0.82978723, 0.80851064, 0.74468085, 0.72340426,
       0.76595745, 0.69565217, 0.7826087 , 0.74468085, 0.80851064,
       0.61702128, 0.74468085, 0.85106383, 0.80851064, 0.76595745,
       0.65957447, 0.7173913 , 0.76086957, 0.74468085, 0.82978723,
       0.72340426, 0.61702128, 0.80851064, 0.87234043, 0.70212766,
       0.85106383, 0.7826087 , 0.80434783, 0.76595745, 0.74468085,
       0.61702128, 0.78723404, 0.78723404, 0.78723404, 0.74468085,
       0.70212766, 0.7173913 , 0.7173913 , 0.85106383, 0.82978723,
       0.80851064, 0.74468085, 0.80851064, 0.72340426, 0.82978723,
       0.82978723, 0.7826087 , 0.69565217, 0.70212766, 0.76595745,
       0.76595745, 0.72340426, 0.76595745, 0.80851064, 0.68085106,
       0.76595745, 0.73913043, 0.7826087 , 0.72340426, 0.80851064,
       0.68085106, 0.74468085, 0.72340426, 0.72340426, 0.63829787,
       0.78723404, 0.69565217, 0.73913043, 0.80851064, 0.63829

## Feature Selection Select from Model (Logistic Regression)

In [46]:
## Feature Selection

from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectFromModel
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif
from sklearn.metrics import confusion_matrix
import time    
from sklearn.model_selection import cross_validate
from sklearn.model_selection import RepeatedStratifiedKFold

pipe = make_pipeline( 
    SelectFromModel(best_lr_train.best_estimator_),
    best_lr_train.best_estimator_
)
    
t = time.time()

cv = RepeatedStratifiedKFold(n_splits = 10, n_repeats = 10, random_state = 1)

lr_corr_scores  = cross_validate(pipe, pearson_lower,  y_basc444, cv = cv, scoring = scoring, return_train_score = True, verbose = True, n_jobs = -1)
lr_spear_scores = cross_validate(pipe, spearman_lower, y_basc444, cv = cv, scoring = scoring, return_train_score = True, verbose = True, n_jobs = -1)

elapsed = time.time() - t
print("Time = " + str(elapsed/60) + " minutes")

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed: 11.0min
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed: 22.8min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:  7.6min


Time = 41.74854173262914 minutes


[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed: 19.0min finished


In [61]:
import numpy as np

lr_corr_acc = np.mean(lr_corr_scores['test_accuracy'])
lr_corr_acc_std = np.std(lr_corr_scores['test_accuracy'])
lr_corr_sen = np.mean(lr_corr_scores['test_sensitivity'])
lr_corr_sen_std = np.std(lr_corr_scores['test_sensitivity'])
lr_corr_spe = np.mean(lr_corr_scores['test_specificity'])
lr_corr_spe_std = np.std(lr_corr_scores['test_specificity'])
    
lr_spear_acc = np.mean(lr_spear_scores['test_accuracy'])
lr_spear_acc_std = np.std(lr_spear_scores['test_accuracy'])
lr_spear_sen = np.mean(lr_spear_scores['test_sensitivity'])
lr_spear_sen_std = np.std(lr_spear_scores['test_sensitivity'])
lr_spear_spe = np.mean(lr_spear_scores['test_specificity'])
lr_spear_spe_std = np.std(lr_spear_scores['test_specificity'])


print("Correlation:")
print("=================================")
print("corr_acc = " + str(lr_corr_acc) + " std = " + str(lr_corr_acc_std))
print("corr_sen = " + str(lr_corr_sen) + " std = " + str(lr_corr_sen_std))
print("corr_spe = " + str(lr_corr_spe) + " std = " + str(lr_corr_spe_std))

print("\nSpearman:")
print("=================================")
print("spear_acc = " + str(lr_spear_acc) + " std = " + str(lr_spear_acc_std))
print("spear_sen = " + str(lr_spear_sen) + " std = " + str(lr_spear_sen_std))
print("spear_spe = " + str(lr_spear_spe) + " std = " + str(lr_spear_spe_std))


Correlation:
corr_acc = 0.6957301462904912 std = 0.043468536703426965
corr_sen = 0.6332134146341464 std = 0.06918184526418936
corr_spe = 0.7494819611470861 std = 0.06598254021834118

Spearman:
spear_acc = 0.7025 std = 0.04378095210516275
spear_sen = 0.6396341463414634 std = 0.07660822829484257
spear_spe = 0.7565587419056431 std = 0.0627504938627806


In [47]:
lr_corr_scores['test_accuracy']

array([0.73863636, 0.7816092 , 0.66666667, 0.64367816, 0.67816092,
       0.68965517, 0.8045977 , 0.71264368, 0.66666667, 0.62068966,
       0.76136364, 0.6091954 , 0.74712644, 0.65517241, 0.67816092,
       0.66666667, 0.70114943, 0.7816092 , 0.68965517, 0.65517241,
       0.63636364, 0.66666667, 0.67816092, 0.67816092, 0.75862069,
       0.75862069, 0.59770115, 0.66666667, 0.67816092, 0.68965517,
       0.76136364, 0.70114943, 0.70114943, 0.65517241, 0.68965517,
       0.63218391, 0.77011494, 0.70114943, 0.74712644, 0.66666667,
       0.68181818, 0.65517241, 0.68965517, 0.71264368, 0.74712644,
       0.67816092, 0.72413793, 0.75862069, 0.65517241, 0.74712644,
       0.76136364, 0.72413793, 0.67816092, 0.64367816, 0.71264368,
       0.70114943, 0.66666667, 0.72413793, 0.66666667, 0.68965517,
       0.70454545, 0.73563218, 0.72413793, 0.64367816, 0.75862069,
       0.67816092, 0.67816092, 0.68965517, 0.73563218, 0.5862069 ,
       0.64772727, 0.6091954 , 0.71264368, 0.71264368, 0.66666

In [48]:
lr_corr_scores['test_sensitivity']

array([0.6097561 , 0.7804878 , 0.56097561, 0.575     , 0.65      ,
       0.65      , 0.725     , 0.575     , 0.6       , 0.525     ,
       0.70731707, 0.56097561, 0.75609756, 0.525     , 0.55      ,
       0.7       , 0.6       , 0.725     , 0.65      , 0.55      ,
       0.6097561 , 0.63414634, 0.68292683, 0.625     , 0.7       ,
       0.725     , 0.6       , 0.525     , 0.425     , 0.65      ,
       0.65853659, 0.6097561 , 0.58536585, 0.55      , 0.65      ,
       0.675     , 0.75      , 0.625     , 0.7       , 0.6       ,
       0.6097561 , 0.63414634, 0.65853659, 0.525     , 0.625     ,
       0.525     , 0.75      , 0.725     , 0.575     , 0.725     ,
       0.65853659, 0.68292683, 0.63414634, 0.5       , 0.625     ,
       0.675     , 0.6       , 0.725     , 0.575     , 0.675     ,
       0.65853659, 0.73170732, 0.63414634, 0.55      , 0.725     ,
       0.6       , 0.625     , 0.6       , 0.75      , 0.6       ,
       0.53658537, 0.51219512, 0.68292683, 0.625     , 0.675  

In [49]:
lr_corr_scores['test_specificity']

array([0.85106383, 0.7826087 , 0.76086957, 0.70212766, 0.70212766,
       0.72340426, 0.87234043, 0.82978723, 0.72340426, 0.70212766,
       0.80851064, 0.65217391, 0.73913043, 0.76595745, 0.78723404,
       0.63829787, 0.78723404, 0.82978723, 0.72340426, 0.74468085,
       0.65957447, 0.69565217, 0.67391304, 0.72340426, 0.80851064,
       0.78723404, 0.59574468, 0.78723404, 0.89361702, 0.72340426,
       0.85106383, 0.7826087 , 0.80434783, 0.74468085, 0.72340426,
       0.59574468, 0.78723404, 0.76595745, 0.78723404, 0.72340426,
       0.74468085, 0.67391304, 0.7173913 , 0.87234043, 0.85106383,
       0.80851064, 0.70212766, 0.78723404, 0.72340426, 0.76595745,
       0.85106383, 0.76086957, 0.7173913 , 0.76595745, 0.78723404,
       0.72340426, 0.72340426, 0.72340426, 0.74468085, 0.70212766,
       0.74468085, 0.73913043, 0.80434783, 0.72340426, 0.78723404,
       0.74468085, 0.72340426, 0.76595745, 0.72340426, 0.57446809,
       0.74468085, 0.69565217, 0.73913043, 0.78723404, 0.65957

In [50]:
lr_spear_scores['test_accuracy']

array([0.73863636, 0.75862069, 0.68965517, 0.65517241, 0.65517241,
       0.70114943, 0.79310345, 0.67816092, 0.67816092, 0.64367816,
       0.76136364, 0.63218391, 0.7816092 , 0.66666667, 0.68965517,
       0.63218391, 0.70114943, 0.77011494, 0.73563218, 0.64367816,
       0.64772727, 0.70114943, 0.71264368, 0.68965517, 0.77011494,
       0.77011494, 0.62068966, 0.68965517, 0.66666667, 0.71264368,
       0.78409091, 0.71264368, 0.68965517, 0.66666667, 0.70114943,
       0.64367816, 0.77011494, 0.71264368, 0.73563218, 0.68965517,
       0.70454545, 0.65517241, 0.71264368, 0.68965517, 0.73563218,
       0.67816092, 0.74712644, 0.73563218, 0.70114943, 0.8045977 ,
       0.77272727, 0.74712644, 0.67816092, 0.6091954 , 0.71264368,
       0.72413793, 0.67816092, 0.74712644, 0.72413793, 0.64367816,
       0.71590909, 0.72413793, 0.67816092, 0.65517241, 0.75862069,
       0.66666667, 0.67816092, 0.67816092, 0.73563218, 0.63218391,
       0.65909091, 0.62068966, 0.71264368, 0.72413793, 0.66666

In [51]:
lr_spear_scores['test_sensitivity']

array([0.6097561 , 0.7804878 , 0.56097561, 0.55      , 0.575     ,
       0.675     , 0.75      , 0.525     , 0.625     , 0.55      ,
       0.73170732, 0.53658537, 0.80487805, 0.55      , 0.55      ,
       0.65      , 0.625     , 0.7       , 0.675     , 0.525     ,
       0.65853659, 0.68292683, 0.65853659, 0.625     , 0.725     ,
       0.75      , 0.6       , 0.55      , 0.425     , 0.725     ,
       0.70731707, 0.63414634, 0.56097561, 0.55      , 0.65      ,
       0.7       , 0.775     , 0.625     , 0.65      , 0.625     ,
       0.68292683, 0.58536585, 0.68292683, 0.5       , 0.625     ,
       0.525     , 0.75      , 0.65      , 0.675     , 0.8       ,
       0.68292683, 0.68292683, 0.65853659, 0.525     , 0.625     ,
       0.675     , 0.625     , 0.725     , 0.65      , 0.6       ,
       0.65853659, 0.73170732, 0.56097561, 0.55      , 0.7       ,
       0.65      , 0.6       , 0.6       , 0.75      , 0.625     ,
       0.51219512, 0.51219512, 0.68292683, 0.625     , 0.7    

In [63]:
lr_spear_scores['test_specificity']

array([0.85106383, 0.73913043, 0.80434783, 0.74468085, 0.72340426,
       0.72340426, 0.82978723, 0.80851064, 0.72340426, 0.72340426,
       0.78723404, 0.7173913 , 0.76086957, 0.76595745, 0.80851064,
       0.61702128, 0.76595745, 0.82978723, 0.78723404, 0.74468085,
       0.63829787, 0.7173913 , 0.76086957, 0.74468085, 0.80851064,
       0.78723404, 0.63829787, 0.80851064, 0.87234043, 0.70212766,
       0.85106383, 0.7826087 , 0.80434783, 0.76595745, 0.74468085,
       0.59574468, 0.76595745, 0.78723404, 0.80851064, 0.74468085,
       0.72340426, 0.7173913 , 0.73913043, 0.85106383, 0.82978723,
       0.80851064, 0.74468085, 0.80851064, 0.72340426, 0.80851064,
       0.85106383, 0.80434783, 0.69565217, 0.68085106, 0.78723404,
       0.76595745, 0.72340426, 0.76595745, 0.78723404, 0.68085106,
       0.76595745, 0.7173913 , 0.7826087 , 0.74468085, 0.80851064,
       0.68085106, 0.74468085, 0.74468085, 0.72340426, 0.63829787,
       0.78723404, 0.7173913 , 0.73913043, 0.80851064, 0.63829

## Feature Selection Recursive Feature Elimination. (Logistric Regression)

In [35]:
from sklearn.feature_selection import RFE
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
import time    
from sklearn.model_selection import cross_validate
from sklearn.model_selection import RepeatedStratifiedKFold

pipe_lr_rfe = make_pipeline( 
    RFE(best_lr_train.best_estimator_, step = 0.1),
    best_lr_train.best_estimator_
)
    
t = time.time()

cv = RepeatedStratifiedKFold(n_splits = 10, n_repeats = 10, random_state = 1)

lr_corr_scores_1  = cross_validate(pipe_lr_rfe, pearson_lower,  y_basc444, cv = cv, scoring = scoring, return_train_score = True, verbose = True, n_jobs = -1)
lr_spear_scores_1 = cross_validate(pipe_lr_rfe, spearman_lower, y_basc444, cv = cv, scoring = scoring, return_train_score = True, verbose = True, n_jobs = -1)

elapsed = time.time() - t
print("Time = " + str(elapsed/60) + " minutes")

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed: 50.7min
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed: 120.2min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed: 43.9min


Time = 665.0803770184517 minutes


[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed: 544.9min finished


In [36]:
import numpy as np

lr_corr_acc_1 = np.mean(lr_corr_scores_1['test_accuracy'])
lr_corr_acc_std_1 = np.std(lr_corr_scores_1['test_accuracy'])
lr_corr_sen_1 = np.mean(lr_corr_scores_1['test_sensitivity'])
lr_corr_sen_std_1 = np.std(lr_corr_scores_1['test_sensitivity'])
lr_corr_spe_1 = np.mean(lr_corr_scores_1['test_specificity'])
lr_corr_spe_std_1 = np.std(lr_corr_scores_1['test_specificity'])
    
lr_spear_acc_1 = np.mean(lr_spear_scores_1['test_accuracy'])
lr_spear_acc_std_1 = np.std(lr_spear_scores_1['test_accuracy'])
lr_spear_sen_1 = np.mean(lr_spear_scores_1['test_sensitivity'])
lr_spear_sen_std_1 = np.std(lr_spear_scores_1['test_sensitivity'])
lr_spear_spe_1 = np.mean(lr_spear_scores_1['test_specificity'])
lr_spear_spe_std_1 = np.std(lr_spear_scores_1['test_specificity'])


print("Correlation:")
print("=================================")
print("corr_acc = " + str(lr_corr_acc_1) + " std = " + str(lr_corr_acc_std_1))
print("corr_sen = " + str(lr_corr_sen_1) + " std = " + str(lr_corr_sen_std_1))
print("corr_spe = " + str(lr_corr_spe_1) + " std = " + str(lr_corr_spe_std_1))

print("\nSpearman:")
print("=================================")
print("spear_acc = " + str(lr_spear_acc_1) + " std = " + str(lr_spear_acc_std_1))
print("spear_sen = " + str(lr_spear_sen_1) + " std = " + str(lr_spear_sen_std_1))
print("spear_spe = " + str(lr_spear_spe_1) + " std = " + str(lr_spear_spe_std_1))


Correlation:
corr_acc = 0.6982588819226752 std = 0.045055856609241204
corr_sen = 0.6339756097560976 std = 0.07279711933164881
corr_spe = 0.7535476410730806 std = 0.06499710386863451

Spearman:
spear_acc = 0.7021577847439918 std = 0.04367019227305481
spear_sen = 0.6389024390243903 std = 0.07618565101502371
spear_spe = 0.756554116558742 std = 0.06177039714643876


=========================================================
# Linear SVM
=========================================================

In [27]:
from sklearn.model_selection import GridSearchCV
gs_lsvm = GridSearchCV(lsvm_model, param_grid_lsvm, cv = 5, verbose = True, n_jobs = -1)
best_lsvm_train = gs_lsvm.fit(pearson_lower, y_basc444)

Fitting 5 folds for each of 4 candidates, totalling 20 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed: 32.9min finished


In [31]:
best_lsvm_train.best_estimator_

LinearSVC(C=10, random_state=0, tol=1e-05)

## Without Feature Selection (Linear SVM)

In [53]:
## Without Feature Selection

from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectFromModel
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif
from sklearn.metrics import confusion_matrix
import time    
from sklearn.model_selection import cross_validate
from sklearn.model_selection import RepeatedStratifiedKFold
    
pipe = make_pipeline( 
    LinearSVC(C=10, random_state=0, tol=1e-05)
)
    
t = time.time()

cv = RepeatedStratifiedKFold(n_splits = 10, n_repeats = 10, random_state = 1)

lsvm_corr_scores_0  = cross_validate(pipe, pearson_lower,  y_basc444, cv = cv, scoring = scoring, return_train_score = True, verbose = True, n_jobs = -1)
lsvm_spear_scores_0 = cross_validate(pipe, spearman_lower, y_basc444, cv = cv, scoring = scoring, return_train_score = True, verbose = True, n_jobs = -1)

elapsed = time.time() - t
print("Time = " + str(elapsed/60) + " minutes")

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed: 58.1min
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed: 135.2min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed: 46.2min


Time = 260.49739653666813 minutes


[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed: 125.3min finished


In [62]:
import numpy as np

lsvm_corr_acc_0 = np.mean(lsvm_corr_scores_0['test_accuracy'])
lsvm_corr_acc_std_0 = np.std(lsvm_corr_scores_0['test_accuracy'])
lsvm_corr_sen_0 = np.mean(lsvm_corr_scores_0['test_sensitivity'])
lsvm_corr_sen_std_0 = np.std(lsvm_corr_scores_0['test_sensitivity'])
lsvm_corr_spe_0 = np.mean(lsvm_corr_scores_0['test_specificity'])
lsvm_corr_spe_std_0 = np.std(lsvm_corr_scores_0['test_specificity'])
    
lsvm_spear_acc_0 = np.mean(lsvm_spear_scores_0['test_accuracy'])
lsvm_spear_acc_std_0 = np.std(lsvm_spear_scores_0['test_accuracy'])
lsvm_spear_sen_0 = np.mean(lsvm_spear_scores_0['test_sensitivity'])
lsvm_spear_sen_std_0 = np.std(lsvm_spear_scores_0['test_sensitivity'])
lsvm_spear_spe_0 = np.mean(lsvm_spear_scores_0['test_specificity'])
lsvm_spear_spe_std_0 = np.std(lsvm_spear_scores_0['test_specificity'])


print("Correlation:")
print("=================================")
print("corr_acc = " + str(lsvm_corr_acc_0) + " std = " + str(lsvm_corr_acc_std_0))
print("corr_sen = " + str(lsvm_corr_sen_0) + " std = " + str(lsvm_corr_sen_std_0))
print("corr_spe = " + str(lsvm_corr_spe_0) + " std = " + str(lsvm_corr_spe_std_0))

print("\nSpearman:")
print("=================================")
print("spear_acc = " + str(lsvm_spear_acc_0) + " std = " + str(lsvm_spear_acc_std_0))
print("spear_sen = " + str(lsvm_spear_sen_0) + " std = " + str(lsvm_spear_sen_std_0))
print("spear_spe = " + str(lsvm_spear_spe_0) + " std = " + str(lsvm_spear_spe_std_0))


Correlation:
corr_acc = 0.6911428944618601 std = 0.04374094333553315
corr_sen = 0.6404146341463414 std = 0.0720541884127271
corr_spe = 0.7347317298797411 std = 0.06329751305962833

Spearman:
spear_acc = 0.7027468652037618 std = 0.046037581572157614
spear_sen = 0.6530365853658537 std = 0.07370939832576778
spear_spe = 0.7454209065679928 std = 0.06908710832570167


In [54]:
lsvm_corr_scores_0['test_accuracy']

array([0.72727273, 0.79310345, 0.63218391, 0.67816092, 0.64367816,
       0.67816092, 0.77011494, 0.71264368, 0.70114943, 0.65517241,
       0.70454545, 0.6091954 , 0.72413793, 0.64367816, 0.68965517,
       0.66666667, 0.68965517, 0.7816092 , 0.66666667, 0.63218391,
       0.67045455, 0.70114943, 0.65517241, 0.67816092, 0.73563218,
       0.72413793, 0.6091954 , 0.67816092, 0.67816092, 0.68965517,
       0.73863636, 0.68965517, 0.71264368, 0.65517241, 0.65517241,
       0.65517241, 0.77011494, 0.72413793, 0.72413793, 0.66666667,
       0.67045455, 0.63218391, 0.65517241, 0.71264368, 0.73563218,
       0.64367816, 0.72413793, 0.75862069, 0.70114943, 0.77011494,
       0.75      , 0.71264368, 0.70114943, 0.59770115, 0.66666667,
       0.68965517, 0.70114943, 0.71264368, 0.67816092, 0.73563218,
       0.72727273, 0.66666667, 0.70114943, 0.66666667, 0.71264368,
       0.65517241, 0.65517241, 0.68965517, 0.75862069, 0.59770115,
       0.65909091, 0.59770115, 0.73563218, 0.65517241, 0.66666

In [55]:
lsvm_corr_scores_0['test_sensitivity']

array([0.6097561 , 0.80487805, 0.56097561, 0.625     , 0.6       ,
       0.65      , 0.775     , 0.675     , 0.65      , 0.575     ,
       0.68292683, 0.53658537, 0.75609756, 0.55      , 0.55      ,
       0.675     , 0.575     , 0.725     , 0.65      , 0.5       ,
       0.6097561 , 0.63414634, 0.65853659, 0.675     , 0.675     ,
       0.725     , 0.575     , 0.575     , 0.45      , 0.65      ,
       0.65853659, 0.68292683, 0.63414634, 0.6       , 0.625     ,
       0.725     , 0.7       , 0.65      , 0.725     , 0.6       ,
       0.63414634, 0.6097561 , 0.65853659, 0.55      , 0.6       ,
       0.5       , 0.775     , 0.75      , 0.6       , 0.725     ,
       0.65853659, 0.65853659, 0.68292683, 0.45      , 0.6       ,
       0.675     , 0.6       , 0.7       , 0.6       , 0.75      ,
       0.70731707, 0.68292683, 0.63414634, 0.65      , 0.725     ,
       0.6       , 0.525     , 0.575     , 0.775     , 0.625     ,
       0.53658537, 0.53658537, 0.73170732, 0.575     , 0.725  

In [56]:
lsvm_corr_scores_0['test_specificity']

array([0.82978723, 0.7826087 , 0.69565217, 0.72340426, 0.68085106,
       0.70212766, 0.76595745, 0.74468085, 0.74468085, 0.72340426,
       0.72340426, 0.67391304, 0.69565217, 0.72340426, 0.80851064,
       0.65957447, 0.78723404, 0.82978723, 0.68085106, 0.74468085,
       0.72340426, 0.76086957, 0.65217391, 0.68085106, 0.78723404,
       0.72340426, 0.63829787, 0.76595745, 0.87234043, 0.72340426,
       0.80851064, 0.69565217, 0.7826087 , 0.70212766, 0.68085106,
       0.59574468, 0.82978723, 0.78723404, 0.72340426, 0.72340426,
       0.70212766, 0.65217391, 0.65217391, 0.85106383, 0.85106383,
       0.76595745, 0.68085106, 0.76595745, 0.78723404, 0.80851064,
       0.82978723, 0.76086957, 0.7173913 , 0.72340426, 0.72340426,
       0.70212766, 0.78723404, 0.72340426, 0.74468085, 0.72340426,
       0.74468085, 0.65217391, 0.76086957, 0.68085106, 0.70212766,
       0.70212766, 0.76595745, 0.78723404, 0.74468085, 0.57446809,
       0.76595745, 0.65217391, 0.73913043, 0.72340426, 0.61702

In [57]:
lsvm_spear_scores_0['test_accuracy']

array([0.70454545, 0.77011494, 0.68965517, 0.71264368, 0.63218391,
       0.70114943, 0.77011494, 0.68965517, 0.73563218, 0.65517241,
       0.71590909, 0.66666667, 0.75862069, 0.67816092, 0.68965517,
       0.62068966, 0.66666667, 0.81609195, 0.72413793, 0.67816092,
       0.64772727, 0.68965517, 0.71264368, 0.66666667, 0.77011494,
       0.72413793, 0.63218391, 0.73563218, 0.66666667, 0.72413793,
       0.77272727, 0.70114943, 0.71264368, 0.71264368, 0.67816092,
       0.64367816, 0.79310345, 0.72413793, 0.75862069, 0.70114943,
       0.68181818, 0.63218391, 0.66666667, 0.71264368, 0.72413793,
       0.66666667, 0.75862069, 0.72413793, 0.72413793, 0.8045977 ,
       0.73863636, 0.68965517, 0.74712644, 0.6091954 , 0.68965517,
       0.64367816, 0.72413793, 0.73563218, 0.73563218, 0.71264368,
       0.71590909, 0.71264368, 0.67816092, 0.63218391, 0.72413793,
       0.67816092, 0.68965517, 0.63218391, 0.75862069, 0.6091954 ,
       0.65909091, 0.62068966, 0.75862069, 0.71264368, 0.64367

In [58]:
lsvm_spear_scores_0['test_sensitivity']

array([0.6097561 , 0.82926829, 0.58536585, 0.625     , 0.575     ,
       0.675     , 0.775     , 0.6       , 0.7       , 0.55      ,
       0.70731707, 0.58536585, 0.82926829, 0.6       , 0.55      ,
       0.65      , 0.6       , 0.75      , 0.625     , 0.575     ,
       0.65853659, 0.63414634, 0.65853659, 0.625     , 0.675     ,
       0.7       , 0.65      , 0.575     , 0.475     , 0.725     ,
       0.70731707, 0.68292683, 0.63414634, 0.625     , 0.6       ,
       0.675     , 0.725     , 0.65      , 0.725     , 0.625     ,
       0.68292683, 0.53658537, 0.68292683, 0.55      , 0.625     ,
       0.55      , 0.8       , 0.675     , 0.725     , 0.75      ,
       0.63414634, 0.65853659, 0.73170732, 0.475     , 0.65      ,
       0.675     , 0.65      , 0.7       , 0.625     , 0.7       ,
       0.68292683, 0.75609756, 0.56097561, 0.6       , 0.7       ,
       0.675     , 0.6       , 0.55      , 0.75      , 0.6       ,
       0.53658537, 0.58536585, 0.73170732, 0.6       , 0.7    

In [59]:
lsvm_spear_scores_0['test_specificity']

array([0.78723404, 0.7173913 , 0.7826087 , 0.78723404, 0.68085106,
       0.72340426, 0.76595745, 0.76595745, 0.76595745, 0.74468085,
       0.72340426, 0.73913043, 0.69565217, 0.74468085, 0.80851064,
       0.59574468, 0.72340426, 0.87234043, 0.80851064, 0.76595745,
       0.63829787, 0.73913043, 0.76086957, 0.70212766, 0.85106383,
       0.74468085, 0.61702128, 0.87234043, 0.82978723, 0.72340426,
       0.82978723, 0.7173913 , 0.7826087 , 0.78723404, 0.74468085,
       0.61702128, 0.85106383, 0.78723404, 0.78723404, 0.76595745,
       0.68085106, 0.7173913 , 0.65217391, 0.85106383, 0.80851064,
       0.76595745, 0.72340426, 0.76595745, 0.72340426, 0.85106383,
       0.82978723, 0.7173913 , 0.76086957, 0.72340426, 0.72340426,
       0.61702128, 0.78723404, 0.76595745, 0.82978723, 0.72340426,
       0.74468085, 0.67391304, 0.7826087 , 0.65957447, 0.74468085,
       0.68085106, 0.76595745, 0.70212766, 0.76595745, 0.61702128,
       0.76595745, 0.65217391, 0.7826087 , 0.80851064, 0.59574

## Feature Selection select from model (linear SVM)

In [28]:
## Feature Selection

from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectFromModel
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif
from sklearn.metrics import confusion_matrix
import time    
from sklearn.model_selection import cross_validate
from sklearn.model_selection import RepeatedStratifiedKFold

pipe = make_pipeline( 
    SelectFromModel(best_lsvm_train.best_estimator_),
    best_lsvm_train.best_estimator_
)
    
t = time.time()

cv = RepeatedStratifiedKFold(n_splits = 10, n_repeats = 10, random_state = 1)

lsvm_corr_scores  = cross_validate(pipe, pearson_lower,  y_basc444, cv = cv, scoring = scoring, return_train_score = True, verbose = True, n_jobs = -1)
lsvm_spear_scores = cross_validate(pipe, spearman_lower, y_basc444, cv = cv, scoring = scoring, return_train_score = True, verbose = True, n_jobs = -1)

elapsed = time.time() - t
print("Time = " + str(elapsed/60) + " minutes")

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed: 87.1min
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed: 209.7min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed: 70.0min


Time = 405.37570828199387 minutes


[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed: 195.7min finished


In [29]:
import numpy as np

lsvm_corr_acc = np.mean(lsvm_corr_scores['test_accuracy'])
lsvm_corr_acc_std = np.std(lsvm_corr_scores['test_accuracy'])
lsvm_corr_sen = np.mean(lsvm_corr_scores['test_sensitivity'])
lsvm_corr_sen_std = np.std(lsvm_corr_scores['test_sensitivity'])
lsvm_corr_spe = np.mean(lsvm_corr_scores['test_specificity'])
lsvm_corr_spe_std = np.std(lsvm_corr_scores['test_specificity'])
    
lsvm_spear_acc = np.mean(lsvm_spear_scores['test_accuracy'])
lsvm_spear_acc_std = np.std(lsvm_spear_scores['test_accuracy'])
lsvm_spear_sen = np.mean(lsvm_spear_scores['test_sensitivity'])
lsvm_spear_sen_std = np.std(lsvm_spear_scores['test_sensitivity'])
lsvm_spear_spe = np.mean(lsvm_spear_scores['test_specificity'])
lsvm_spear_spe_std = np.std(lsvm_spear_scores['test_specificity'])


print("Correlation:")
print("=================================")
print("corr_acc = " + str(lsvm_corr_acc) + " std = " + str(lsvm_corr_acc_std))
print("corr_sen = " + str(lsvm_corr_sen) + " std = " + str(lsvm_corr_sen_std))
print("corr_spe = " + str(lsvm_corr_spe) + " std = " + str(lsvm_corr_spe_std))

print("\nSpearman:")
print("=================================")
print("spear_acc = " + str(lsvm_spear_acc) + " std = " + str(lsvm_spear_acc_std))
print("spear_sen = " + str(lsvm_spear_sen) + " std = " + str(lsvm_spear_sen_std))
print("spear_spe = " + str(lsvm_spear_spe) + " std = " + str(lsvm_spear_spe_std))


Correlation:
corr_acc = 0.6896512539184954 std = 0.044116357861898645
corr_sen = 0.6384085365853659 std = 0.0742747751886721
corr_spe = 0.7336725254394081 std = 0.06660181537274533

Spearman:
spear_acc = 0.7020559038662487 std = 0.04524300930539694
spear_sen = 0.652060975609756 std = 0.07455797663451869
spear_spe = 0.7449861239592971 std = 0.0677302173672148


## Feature Selection Recursive Feature Elimination. (linear SVM)

In [17]:
## Feature Selection
from sklearn.feature_selection import RFE
from sklearn.pipeline import make_pipeline
import time    
from sklearn.model_selection import cross_validate
from sklearn.model_selection import RepeatedStratifiedKFold

pipe = make_pipeline( 
    #RFE(best_lsvm_train.best_estimator_, step = 0.1),
    #best_lsvm_train.best_estimator_
    
    RFE(LinearSVC(C=10, random_state=0, tol=1e-05), step = 0.1),
    LinearSVC(C=10, random_state=0, tol=1e-05)
)
    
t = time.time()

cv = RepeatedStratifiedKFold(n_splits = 10, n_repeats = 10, random_state = 1)

lsvm_corr_scores_1  = cross_validate(pipe, pearson_lower,  y_basc444, cv = cv, scoring = scoring, return_train_score = True, verbose = True, n_jobs = -1)
lsvm_spear_scores_1 = cross_validate(pipe, spearman_lower, y_basc444, cv = cv, scoring = scoring, return_train_score = True, verbose = True, n_jobs = -1)

elapsed = time.time() - t
print("Time = " + str(elapsed/60) + " minutes")

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed: 269.1min
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed: 706.2min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed: 271.4min


Time = 1475.9961933175723 minutes


[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed: 769.8min finished


In [19]:
import numpy as np

lsvm_corr_acc_1 = np.mean(lsvm_corr_scores_1['test_accuracy'])
lsvm_corr_acc_std_1 = np.std(lsvm_corr_scores_1['test_accuracy'])
lsvm_corr_sen_1 = np.mean(lsvm_corr_scores_1['test_sensitivity'])
lsvm_corr_sen_std_1 = np.std(lsvm_corr_scores_1['test_sensitivity'])
lsvm_corr_spe_1 = np.mean(lsvm_corr_scores_1['test_specificity'])
lsvm_corr_spe_std_1 = np.std(lsvm_corr_scores_1['test_specificity'])
    
lsvm_spear_acc_1 = np.mean(lsvm_spear_scores_1['test_accuracy'])
lsvm_spear_acc_std_1 = np.std(lsvm_spear_scores_1['test_accuracy'])
lsvm_spear_sen_1 = np.mean(lsvm_spear_scores_1['test_sensitivity'])
lsvm_spear_sen_std_1 = np.std(lsvm_spear_scores_1['test_sensitivity'])
lsvm_spear_spe_1 = np.mean(lsvm_spear_scores_1['test_specificity'])
lsvm_spear_spe_std_1 = np.std(lsvm_spear_scores_1['test_specificity'])


print("Correlation:")
print("=================================")
print("corr_acc = " + str(lsvm_corr_acc_1) + " std = " + str(lsvm_corr_acc_std_1))
print("corr_sen = " + str(lsvm_corr_sen_1) + " std = " + str(lsvm_corr_sen_std_1))
print("corr_spe = " + str(lsvm_corr_spe_1) + " std = " + str(lsvm_corr_spe_std_1))

print("\nSpearman:")
print("=================================")
print("spear_acc = " + str(lsvm_spear_acc_1) + " std = " + str(lsvm_spear_acc_std_1))
print("spear_sen = " + str(lsvm_spear_sen_1) + " std = " + str(lsvm_spear_sen_std_1))
print("spear_spe = " + str(lsvm_spear_spe_1) + " std = " + str(lsvm_spear_spe_std_1))


Correlation:
corr_acc = 0.6903369905956113 std = 0.045203400414910264
corr_sen = 0.6389207317073171 std = 0.07286930061241846
corr_spe = 0.7345189639222943 std = 0.0644119523702747

Spearman:
spear_acc = 0.70332157784744 std = 0.0451817240376329
spear_sen = 0.6535853658536586 std = 0.07462384752791244
spear_spe = 0.7460545790934322 std = 0.06727049752913697


In [22]:
lsvm_corr_scores_1['test_accuracy']

array([0.72727273, 0.79310345, 0.63218391, 0.68965517, 0.64367816,
       0.68965517, 0.77011494, 0.70114943, 0.70114943, 0.63218391,
       0.69318182, 0.6091954 , 0.73563218, 0.65517241, 0.68965517,
       0.65517241, 0.67816092, 0.7816092 , 0.67816092, 0.64367816,
       0.67045455, 0.66666667, 0.67816092, 0.68965517, 0.73563218,
       0.71264368, 0.62068966, 0.67816092, 0.67816092, 0.67816092,
       0.75      , 0.68965517, 0.71264368, 0.66666667, 0.63218391,
       0.64367816, 0.7816092 , 0.72413793, 0.73563218, 0.68965517,
       0.68181818, 0.63218391, 0.65517241, 0.71264368, 0.73563218,
       0.62068966, 0.71264368, 0.74712644, 0.68965517, 0.77011494,
       0.76136364, 0.71264368, 0.66666667, 0.5862069 , 0.66666667,
       0.68965517, 0.70114943, 0.70114943, 0.68965517, 0.73563218,
       0.71590909, 0.68965517, 0.68965517, 0.66666667, 0.72413793,
       0.65517241, 0.66666667, 0.68965517, 0.75862069, 0.5862069 ,
       0.64772727, 0.57471264, 0.73563218, 0.67816092, 0.65517

In [23]:
lsvm_corr_scores_1['test_sensitivity']

array([0.6097561 , 0.80487805, 0.56097561, 0.65      , 0.6       ,
       0.65      , 0.775     , 0.65      , 0.65      , 0.55      ,
       0.68292683, 0.53658537, 0.7804878 , 0.575     , 0.55      ,
       0.65      , 0.575     , 0.725     , 0.65      , 0.5       ,
       0.6097561 , 0.58536585, 0.65853659, 0.675     , 0.675     ,
       0.725     , 0.575     , 0.575     , 0.45      , 0.625     ,
       0.68292683, 0.68292683, 0.63414634, 0.6       , 0.6       ,
       0.725     , 0.725     , 0.65      , 0.725     , 0.625     ,
       0.65853659, 0.6097561 , 0.65853659, 0.55      , 0.6       ,
       0.5       , 0.775     , 0.75      , 0.6       , 0.725     ,
       0.65853659, 0.65853659, 0.63414634, 0.45      , 0.625     ,
       0.675     , 0.6       , 0.7       , 0.6       , 0.75      ,
       0.70731707, 0.68292683, 0.63414634, 0.65      , 0.725     ,
       0.6       , 0.525     , 0.575     , 0.775     , 0.6       ,
       0.53658537, 0.51219512, 0.73170732, 0.575     , 0.7    

In [24]:
lsvm_corr_scores_1['test_specificity']

array([0.82978723, 0.7826087 , 0.69565217, 0.72340426, 0.68085106,
       0.72340426, 0.76595745, 0.74468085, 0.74468085, 0.70212766,
       0.70212766, 0.67391304, 0.69565217, 0.72340426, 0.80851064,
       0.65957447, 0.76595745, 0.82978723, 0.70212766, 0.76595745,
       0.72340426, 0.73913043, 0.69565217, 0.70212766, 0.78723404,
       0.70212766, 0.65957447, 0.76595745, 0.87234043, 0.72340426,
       0.80851064, 0.69565217, 0.7826087 , 0.72340426, 0.65957447,
       0.57446809, 0.82978723, 0.78723404, 0.74468085, 0.74468085,
       0.70212766, 0.65217391, 0.65217391, 0.85106383, 0.85106383,
       0.72340426, 0.65957447, 0.74468085, 0.76595745, 0.80851064,
       0.85106383, 0.76086957, 0.69565217, 0.70212766, 0.70212766,
       0.70212766, 0.78723404, 0.70212766, 0.76595745, 0.72340426,
       0.72340426, 0.69565217, 0.73913043, 0.68085106, 0.72340426,
       0.70212766, 0.78723404, 0.78723404, 0.74468085, 0.57446809,
       0.74468085, 0.63043478, 0.73913043, 0.76595745, 0.61702

In [29]:
lsvm_spear_scores_1['test_accuracy']

array([0.71590909, 0.75862069, 0.70114943, 0.71264368, 0.64367816,
       0.72413793, 0.77011494, 0.70114943, 0.74712644, 0.65517241,
       0.72727273, 0.66666667, 0.74712644, 0.67816092, 0.68965517,
       0.63218391, 0.66666667, 0.81609195, 0.72413793, 0.68965517,
       0.63636364, 0.70114943, 0.70114943, 0.66666667, 0.75862069,
       0.71264368, 0.64367816, 0.70114943, 0.66666667, 0.72413793,
       0.77272727, 0.68965517, 0.68965517, 0.71264368, 0.66666667,
       0.64367816, 0.79310345, 0.72413793, 0.77011494, 0.70114943,
       0.67045455, 0.62068966, 0.66666667, 0.71264368, 0.72413793,
       0.67816092, 0.75862069, 0.74712644, 0.72413793, 0.79310345,
       0.73863636, 0.68965517, 0.74712644, 0.6091954 , 0.68965517,
       0.64367816, 0.72413793, 0.72413793, 0.73563218, 0.71264368,
       0.71590909, 0.72413793, 0.66666667, 0.63218391, 0.72413793,
       0.70114943, 0.67816092, 0.63218391, 0.75862069, 0.6091954 ,
       0.65909091, 0.5862069 , 0.75862069, 0.72413793, 0.68965

In [30]:
lsvm_spear_scores_1['test_sensitivity']

array([0.6097561 , 0.80487805, 0.58536585, 0.625     , 0.575     ,
       0.7       , 0.775     , 0.6       , 0.7       , 0.55      ,
       0.70731707, 0.58536585, 0.7804878 , 0.6       , 0.55      ,
       0.65      , 0.625     , 0.75      , 0.625     , 0.6       ,
       0.63414634, 0.63414634, 0.63414634, 0.625     , 0.65      ,
       0.7       , 0.65      , 0.575     , 0.475     , 0.725     ,
       0.70731707, 0.68292683, 0.6097561 , 0.625     , 0.6       ,
       0.675     , 0.725     , 0.65      , 0.725     , 0.625     ,
       0.65853659, 0.51219512, 0.68292683, 0.55      , 0.625     ,
       0.55      , 0.8       , 0.725     , 0.725     , 0.725     ,
       0.63414634, 0.65853659, 0.75609756, 0.475     , 0.65      ,
       0.675     , 0.65      , 0.675     , 0.625     , 0.7       ,
       0.70731707, 0.75609756, 0.56097561, 0.6       , 0.7       ,
       0.7       , 0.6       , 0.575     , 0.75      , 0.6       ,
       0.53658537, 0.53658537, 0.73170732, 0.6       , 0.8    

In [31]:
lsvm_spear_scores_1['test_specificity']

array([0.80851064, 0.7173913 , 0.80434783, 0.78723404, 0.70212766,
       0.74468085, 0.76595745, 0.78723404, 0.78723404, 0.74468085,
       0.74468085, 0.73913043, 0.7173913 , 0.74468085, 0.80851064,
       0.61702128, 0.70212766, 0.87234043, 0.80851064, 0.76595745,
       0.63829787, 0.76086957, 0.76086957, 0.70212766, 0.85106383,
       0.72340426, 0.63829787, 0.80851064, 0.82978723, 0.72340426,
       0.82978723, 0.69565217, 0.76086957, 0.78723404, 0.72340426,
       0.61702128, 0.85106383, 0.78723404, 0.80851064, 0.76595745,
       0.68085106, 0.7173913 , 0.65217391, 0.85106383, 0.80851064,
       0.78723404, 0.72340426, 0.76595745, 0.72340426, 0.85106383,
       0.82978723, 0.7173913 , 0.73913043, 0.72340426, 0.72340426,
       0.61702128, 0.78723404, 0.76595745, 0.82978723, 0.72340426,
       0.72340426, 0.69565217, 0.76086957, 0.65957447, 0.74468085,
       0.70212766, 0.74468085, 0.68085106, 0.76595745, 0.61702128,
       0.76595745, 0.63043478, 0.7826087 , 0.82978723, 0.59574

=========================================================
# Kernal SVM
=========================================================

In [32]:
from sklearn.model_selection import GridSearchCV
gs_ksvm = GridSearchCV(ksvm_model, param_grid_ksvm, cv = 5, verbose=True, n_jobs=-1)
best_ksvm_train = gs_ksvm.fit(pearson_lower, y_basc444)

Fitting 5 folds for each of 4 candidates, totalling 20 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  20 out of  20 | elapsed: 42.7min finished


In [33]:
best_ksvm_train.best_estimator_

SVC(C=3, probability=True)

## Without Feature Selection (RBF kernal SVM)

In [20]:
## Without Feature Selection

from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectFromModel
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif
from sklearn.metrics import confusion_matrix
import time    
from sklearn.model_selection import cross_validate
from sklearn.model_selection import RepeatedStratifiedKFold
    
pipe = make_pipeline( 
    best_ksvm_train.best_estimator_
)
    
t = time.time()

cv = RepeatedStratifiedKFold(n_splits = 10, n_repeats = 10, random_state = 1)

ksvm_corr_scores_0  = cross_validate(pipe, pearson_lower,  y_basc444, cv = cv, scoring = scoring, return_train_score = True, verbose = True, n_jobs = -1)
ksvm_spear_scores_0 = cross_validate(pipe, spearman_lower, y_basc444, cv = cv, scoring = scoring, return_train_score = True, verbose = True, n_jobs = -1)

elapsed = time.time() - t
print("Time = " + str(elapsed/60) + " minutes")

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed: 478.2min
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed: 636.6min finished
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed: 100.4min


Time = 907.9705512682597 minutes


[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed: 271.4min finished


In [24]:
import numpy as np

ksvm_corr_acc_0 = np.mean(ksvm_corr_scores_0['test_accuracy'])
ksvm_corr_acc_std_0 = np.std(ksvm_corr_scores_0['test_accuracy'])
ksvm_corr_sen_0 = np.mean(ksvm_corr_scores_0['test_sensitivity'])
ksvm_corr_sen_std_0 = np.std(ksvm_corr_scores_0['test_sensitivity'])
ksvm_corr_spe_0 = np.mean(ksvm_corr_scores_0['test_specificity'])
ksvm_corr_spe_std_0 = np.std(ksvm_corr_scores_0['test_specificity'])
    
ksvm_spear_acc_0 = np.mean(ksvm_spear_scores_0['test_accuracy'])
ksvm_spear_acc_std_0 = np.std(ksvm_spear_scores_0['test_accuracy'])
ksvm_spear_sen_0 = np.mean(ksvm_spear_scores_0['test_sensitivity'])
ksvm_spear_sen_std_0 = np.std(ksvm_spear_scores_0['test_sensitivity'])
ksvm_spear_spe_0 = np.mean(ksvm_spear_scores_0['test_specificity'])
ksvm_spear_spe_std_0 = np.std(ksvm_spear_scores_0['test_specificity'])


print("Correlation:")
print("=================================")
print("corr_acc = " + str(ksvm_corr_acc_0) + " std = " + str(ksvm_corr_acc_std_0))
print("corr_sen = " + str(ksvm_corr_sen_0) + " std = " + str(ksvm_corr_sen_std_0))
print("corr_spe = " + str(ksvm_corr_spe_0) + " std = " + str(ksvm_corr_spe_std_0))

print("\nSpearman:")
print("=================================")
print("spear_acc = " + str(ksvm_spear_acc_0) + " std = " + str(ksvm_spear_acc_std_0))
print("spear_sen = " + str(ksvm_spear_sen_0) + " std = " + str(ksvm_spear_sen_std_0))
print("spear_spe = " + str(ksvm_spear_spe_0) + " std = " + str(ksvm_spear_spe_std_0))


Correlation:
corr_acc = 0.6735553814002089 std = 0.048818488462254506
corr_sen = 0.6053780487804877 std = 0.07947442148537015
corr_spe = 0.7321739130434785 std = 0.07232723203502549

Spearman:
spear_acc = 0.6710462382445143 std = 0.04654402891042204
spear_sen = 0.6026524390243901 std = 0.07611707140250153
spear_spe = 0.7298057354301575 std = 0.07037242155792159


# Summary Results

In [None]:
from IPython.display import HTML, display

test=[["","","Correlation","","Spearman",""],
      
      ["Classifier","Sensitivity","Specificity", "Accuracy",\
       "Sensitivity","Specificity", "Accuracy"],
      
      ["LR",lr_corr_acc_0,lr_corr_spe_0,lr_corr_sen_0,\
       lr_spear_acc_0,lr_spear_spe_0,lr_spear_sen_0
      ],
      
      ["lSVM",lsvm_corr_acc_0,lsvm_corr_spe_0,lsvm_corr_sen_0,\
       lsvm_spear_acc_0,lsvm_spear_spe_0,lsvm_spear_sen_0
      ],
      
      ["kSVM",ksvm_corr_acc_0,ksvm_corr_spe_0,ksvm_corr_sen_0,\
       ksvm_spear_acc_0,ksvm_spear_spe_0,ksvm_spear_sen_0
      ]
     ]

print("Test:")
print("===========================================================================================")
display(HTML(
   '<table><tr>{}</tr></table>'.format(
       '</tr><tr>'.join(
           '<td>{}</td>'.format('</td><td>'.join(str(_) for _ in row)) for row in test))
))