In [0]:
!pip install --upgrade nilearn

from nilearn.datasets import fetch_abide_pcp
# Fetch the full data and update phenotypic data and cross_validation
abide = fetch_abide_pcp(derivatives = ['rois_aal'], pipeline = 'cpac', quality_checked = False)

y = abide.phenotypic['DX_GROUP']

=====================================
Logistic Regression:
=====================================

In [2]:
measure = 'correlation'
C = [0.01, 0.1, 1]
lr_params = {'C': C}

from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.linear_model import LogisticRegression

skf = StratifiedKFold(n_splits = 5, shuffle = True, random_state = 10)
lr = LogisticRegression()

y[y==2] = 0

from nilearn.connectome import ConnectivityMeasure
from nilearn.connectome import sym_matrix_to_vec

conn_est = ConnectivityMeasure(kind = measure)
conn_matrices = conn_est.fit_transform(abide.rois_aal)
X = sym_matrix_to_vec(conn_matrices)

gcv = GridSearchCV(lr, lr_params, n_jobs = -1, cv = skf, verbose = 1)
gcv.fit(X, y)
best_estimators = gcv.best_estimator_
best_scores = gcv.best_score_


print("best_estimators:",best_estimators)
print("best_scores:",best_scores)

'''
best_estimators: LogisticRegression(C=0.1, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)
best_scores: 0.6618357487922706
'''

  return f(*args, **kwds)


Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed:   29.0s finished


best_estimators: LogisticRegression(C=0.1, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=None, solver='warn',
          tol=0.0001, verbose=0, warm_start=False)
best_scores: 0.6618357487922706


"\nbest_estimators: [LogisticRegression(C=0.01, class_weight=None, dual=False, fit_intercept=True,\n          intercept_scaling=1, max_iter=100, multi_class='warn',\n          n_jobs=None, penalty='l2', random_state=None, solver='warn',\n          tol=0.0001, verbose=0, warm_start=False)]\nbest_scores: [0.6908212560386473]\n"

In [3]:
from sklearn.model_selection import cross_val_score
import numpy as np
#best parameter for each measure
lr = LogisticRegression(C = 0.1)

#recall == sensitivity
recall = cross_val_score(lr, X, y, scoring = 'recall',cv = skf, verbose = 1)
precision = cross_val_score(lr, X, y, scoring = 'precision',cv = skf, verbose = 1)
    
cross_recall = np.mean(recall)
cross_precision = np.mean(precision)
   
print("cross_recall:",cross_recall)
print("cross_precision:",cross_precision)  

'''
cross_recall: 0.6356435643564357
cross_precision: 0.6604963294651136
'''

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:   12.4s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


cross_recall: 0.6356435643564357
cross_precision: 0.6604963294651136


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:   12.4s finished


'\ncross_recall: [0.6673267326732674]\ncross_precision: [0.6904341921519341]\n'

==============================================
Ridge
==============================================

In [4]:
measure = 'correlation'
alpha = [10,100,1000]
rc_params = {'alpha': alpha}

from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.linear_model import RidgeClassifier


skf = StratifiedKFold(n_splits = 5, shuffle = True, random_state = 10)
rc = RidgeClassifier()

y[y==2] = 0

from nilearn.connectome import ConnectivityMeasure
from nilearn.connectome import sym_matrix_to_vec

conn_est = ConnectivityMeasure(kind = measure)
conn_matrices = conn_est.fit_transform(abide.rois_aal)
X = sym_matrix_to_vec(conn_matrices)

gcv = GridSearchCV(rc, rc_params, n_jobs = -1, cv = skf, verbose = 1)
gcv.fit(X, y)
best_estimators = gcv.best_estimator_
best_scores = gcv.best_score_


print("best_estimators:",best_estimators)
print("best_scores:",best_scores)

'''
best_estimators: RidgeClassifier(alpha=100, class_weight=None, copy_X=True, fit_intercept=True,
        max_iter=None, normalize=False, random_state=None, solver='auto',
        tol=0.001)
best_scores: 0.659903381642512
'''


Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed:    9.1s finished


best_estimators: RidgeClassifier(alpha=100, class_weight=None, copy_X=True, fit_intercept=True,
        max_iter=None, normalize=False, random_state=None, solver='auto',
        tol=0.001)
best_scores: 0.659903381642512


"\nbest_estimators: [RidgeClassifier(alpha=1000, class_weight=None, copy_X=True,\n        fit_intercept=True, max_iter=None, normalize=False,\n        random_state=None, solver='auto', tol=0.001)]\nbest_scores: [0.6927536231884058]\n"

In [5]:
from sklearn.model_selection import cross_val_score
import numpy as np
#best parameter for each measure
lr = RidgeClassifier(alpha=100)

#recall == sensitivity
recall = cross_val_score(lr, X, y, scoring = 'recall',cv = skf, verbose = 1)
precision = cross_val_score(lr, X, y, scoring = 'precision',cv = skf, verbose = 1)
    
cross_recall = np.mean(recall)
cross_precision = np.mean(precision)
  
print("cross_recall:",cross_recall)
print("cross_precision:",cross_precision)

'''
cross_recall: 0.6257425742574257
cross_precision: 0.661253353553753
'''

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    2.4s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


cross_recall: 0.6257425742574257
cross_precision: 0.661253353553753


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    2.5s finished


'\ncross_recall: [0.6594059405940593]\ncross_precision: [0.6960889851629156]\n'

===========================================
linearSVC l2
===========================================

In [7]:
measure = 'correlation'
C = [0.0001,0.001,0.01]
svc_params = {'C': C}

from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.svm import LinearSVC

skf = StratifiedKFold(n_splits = 5, shuffle = True, random_state = 10)
svc_l2 = LinearSVC()

y[y==2] = 0

from nilearn.connectome import ConnectivityMeasure
from nilearn.connectome import sym_matrix_to_vec

conn_est = ConnectivityMeasure(kind = measure)
conn_matrices = conn_est.fit_transform(abide.rois_aal)
X = sym_matrix_to_vec(conn_matrices)

gcv = GridSearchCV(svc_l2, svc_params, n_jobs = -1, cv = skf, verbose = 1)
gcv.fit(X, y)
best_estimators = gcv.best_estimator_
best_scores = gcv.best_score_

print("best_estimators:",best_estimators)
print("best_scores:",best_scores)

'''
best_estimators: LinearSVC(C=0.001, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0)
best_scores: 0.6541062801932367
'''

Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed:   14.6s finished


best_estimators: LinearSVC(C=0.001, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0)
best_scores: 0.6541062801932367


"\nbest_estimators: LinearSVC(C=0.001, class_weight=None, dual=True, fit_intercept=True,\n     intercept_scaling=1, loss='squared_hinge', max_iter=1000,\n     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,\n     verbose=0)\nbest_scores: 0.6541062801932367\n"

In [8]:
from sklearn.model_selection import cross_val_score
import numpy as np
#best parameter for each measure
lr = LinearSVC(C=0.001)

#recall == sensitivity
recall = cross_val_score(lr, X, y, scoring = 'recall',cv = skf, verbose = 1)
precision = cross_val_score(lr, X, y, scoring = 'precision',cv = skf, verbose = 1)
    
cross_recall = np.mean(recall)
cross_precision = np.mean(precision)

print("cross_recall:",cross_recall)
print("cross_precision:",cross_precision)

'''
cross_recall: 0.6138613861386139
cross_precision: 0.6567528434298621
'''

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:   10.0s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


cross_recall: 0.6138613861386139
cross_precision: 0.6567528434298621


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:    9.3s finished


'\ncross_recall: [0.6613861386138613]\ncross_precision: [0.6856377307555428]\n'

=====================================
svm rbf
=====================================

In [9]:
measure = 'correlation'
C = [10,100,1000]
svc_params = {'C': C}

from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.svm import SVC

skf = StratifiedKFold(n_splits = 5, shuffle = True, random_state = 10)
svc_rbf = SVC(gamma='auto')

y[y==2] = 0

from nilearn.connectome import ConnectivityMeasure
from nilearn.connectome import sym_matrix_to_vec

conn_est = ConnectivityMeasure(kind = measure)
conn_matrices = conn_est.fit_transform(abide.rois_aal)
X = sym_matrix_to_vec(conn_matrices)

gcv = GridSearchCV(svc_rbf, svc_params, n_jobs = -1, cv = skf, verbose = 1)
gcv.fit(X, y)
best_estimators = gcv.best_estimator_
best_scores = gcv.best_score_

print("best_estimators:",best_estimators)
print("best_scores:",best_scores)

'''
best_estimators: SVC(C=100, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)
best_scores: 0.6560386473429952
'''

Fitting 5 folds for each of 3 candidates, totalling 15 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed:  2.4min finished


best_estimators: SVC(C=100, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)
best_scores: 0.6560386473429952


"\nbest_estimators: SVC(C=10, cache_size=200, class_weight=None, coef0=0.0,\n  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',\n  max_iter=-1, probability=False, random_state=None, shrinking=True,\n  tol=0.001, verbose=False)\nbest_scores: 0.6811594202898551\n"

In [10]:
from sklearn.model_selection import cross_val_score
import numpy as np
#best parameter for each measure
lr = SVC(gamma='auto',C=100)

#recall == sensitivity
recall = cross_val_score(lr, X, y, scoring = 'recall',cv = skf, verbose = 1)
precision = cross_val_score(lr, X, y, scoring = 'precision',cv = skf, verbose = 1)
    
cross_recall = np.mean(recall)
cross_precision = np.mean(precision)

print("cross_recall:",cross_recall)
print("cross_precision:",cross_precision)

'''
cross_recall: 0.6396039603960396
cross_precision: 0.6510164829335571
'''

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:   47.9s finished
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


cross_recall: 0.6396039603960396
cross_precision: 0.6510164829335571


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:   46.8s finished


'\ncross_recall: [0.6613861386138613]\ncross_precision: [0.6856377307555428]\n'