In [91]:
import nibabel as nib
import matplotlib.pyplot as plt
import numpy as np
from sklearn.utils import shuffle
from scipy.stats import uniform
from scipy.stats import randint

from sklearn.svm import SVC
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier

from extract_data import extract_data

np.random.seed(52384)

In [68]:
# extract train and test data
x_train, y_train, x_test, y_test = extract_data('../ds000105_R2.0.2')
x_train2 = x_train.reshape(40*64*x_train.shape[2], x_train.shape[3])
x_test2 = x_test.reshape(40*64*x_train.shape[2], x_test.shape[3])
x_train2 = x_train2.T
x_test2 = x_test2.T
mean_train = np.mean(x_train2, axis=0)
std_train = np.std(x_train2, axis=0)
x_train_norm = (x_train2 - mean_train) / std_train
x_test_norm = (x_test2 - mean_train) / std_train

In [69]:
x_train_combined = np.column_stack((x_train_norm, y_train))
x_train_combined_shuffled = shuffle(x_train_combined, random_state=42)
x_train_norm_shuffled = x_train_combined_shuffled[:, :-1]
y_train_shuffled = x_train_combined_shuffled[:, -1]

x_test_combined = np.column_stack((x_test_norm, y_test))
x_test_combined_shuffled = shuffle(x_test_combined, random_state=42)
x_test_norm_shuffled = x_test_combined_shuffled[:, :-1]
y_test_shuffled = x_test_combined_shuffled[:, -1]

In [78]:
svm = SVC(random_state=0)
hyper_param = {
    'C': np.logspace(-3, 5, 9),
    'kernel': ['poly','rbf'],
    'degree': [1, 2, 3],
    'gamma': ['scale', 'auto'] + list(np.logspace(-8, 3, 12)),
}
model_cv = GridSearchCV(svm, 
                        param_grid = hyper_param,
                        # param_distributions = hyper_param,
                        # n_iter = 100,
                        scoring = 'accuracy',
                        cv = 10, 
                        n_jobs = -1)
search = model_cv.fit(x_train_norm_shuffled, y_train_shuffled)
search.best_params_

{'C': 0.001, 'degree': 3, 'gamma': 0.001, 'kernel': 'poly'}

In [83]:
print('The average BER over CV is:')
print(1-search.cv_results_['mean_test_score'][search.cv_results_['rank_test_score']==1][0])

The average BER over CV is:
0.03552631578947363


In [84]:
model = SVC(kernel=search.best_params_.get('kernel'), gamma=search.best_params_.get('gamma'), C=search.best_params_.get('C'), degree=search.best_params_.get('degree'))
model.fit(x_train_norm_shuffled, y_train_shuffled)

In [85]:
y_pred = model.predict(x_train_norm_shuffled)
print(accuracy_score(y_train_shuffled, y_pred))
print(classification_report(y_train_shuffled, y_pred))

1.0
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00        99
         1.0       1.00      1.00      1.00        99

    accuracy                           1.00       198
   macro avg       1.00      1.00      1.00       198
weighted avg       1.00      1.00      1.00       198



In [86]:
y_pred = model.predict(x_test_norm_shuffled)
print(accuracy_score(y_test_shuffled, y_pred))
print(classification_report(y_test_shuffled, y_pred))

0.3888888888888889
              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00         9
         1.0       0.44      0.78      0.56         9

    accuracy                           0.39        18
   macro avg       0.22      0.39      0.28        18
weighted avg       0.22      0.39      0.28        18



# Tree Method

In [92]:
rf = RandomForestClassifier(random_state=0)
hyper_param = {
    'n_estimators': randint(50, 500),
    'max_depth': [None] + list(range(5, 30, 5)),
    'min_samples_split': randint(2, 20),
    'min_samples_leaf': randint(1, 20),
    'max_features': ['sqrt', 'log2'] + [x_train_norm_shuffled.shape[1]],
    'criterion': ['gini', 'entropy'],
    'bootstrap': [True, False]
}
model_cv = RandomizedSearchCV(rf, 
                        param_distributions = hyper_param,
                        n_iter = 100,
                        scoring = 'accuracy',
                        cv = 10, 
                        n_jobs = -1)
search1 = model_cv.fit(x_train_norm_shuffled, y_train_shuffled)
search1.best_params_

{'bootstrap': False,
 'criterion': 'entropy',
 'max_depth': None,
 'max_features': 'sqrt',
 'min_samples_leaf': 3,
 'min_samples_split': 18,
 'n_estimators': 331}

In [93]:
print('The average balanced accuracy of the validation sets using the best hyperparameters is:')
print(search1.cv_results_['mean_test_score'][search1.cv_results_['rank_test_score']==1][0])

The average balanced accuracy of the validation sets using the best hyperparameters is:
0.9494736842105264


In [94]:
y_pred = model.predict(x_test_norm_shuffled)
print(accuracy_score(y_test_shuffled, y_pred))
print(classification_report(y_test_shuffled, y_pred))

0.3888888888888889
              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00         9
         1.0       0.44      0.78      0.56         9

    accuracy                           0.39        18
   macro avg       0.22      0.39      0.28        18
weighted avg       0.22      0.39      0.28        18

