# SVM Predictor for Cognitive Score

In [1]:
import pandas as pd
# import sys
# import os
import numpy as np
from scipy.io import loadmat

# scikit-learn modules
from sklearn.model_selection import train_test_split # for splitting the data
from sklearn.ensemble import RandomForestRegressor # for building the model
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR, NuSVR
from sklearn import metrics
from sklearn.model_selection import GridSearchCV

In [2]:
ADSP_DATA = 'data/ADSP_PHC_COGN_Dec2023_FILTERED_wfiles.csv'

df_mem = pd.read_csv(ADSP_DATA)
df_exf = pd.read_csv(ADSP_DATA)
df_lan = pd.read_csv(ADSP_DATA)
df_vsp = pd.read_csv(ADSP_DATA)

df_mem = df_mem.drop(columns=['RID', 'VISCODE2', 'PHC_Diagnosis', 'PHC_EXF', 'PHC_LAN', 'PHC_VSP'])
df_exf = df_exf.drop(columns=['RID', 'VISCODE2', 'PHC_Diagnosis', 'PHC_MEM', 'PHC_LAN', 'PHC_VSP'])
df_lan = df_lan.drop(columns=['RID', 'VISCODE2', 'PHC_Diagnosis', 'PHC_EXF', 'PHC_MEM', 'PHC_VSP'])
df_vsp = df_vsp.drop(columns=['RID', 'VISCODE2', 'PHC_Diagnosis', 'PHC_EXF', 'PHC_LAN', 'PHC_MEM'])
df_vsp.head()

Unnamed: 0.1,Unnamed: 0,PHC_VSP,FC_DATA
0,10,0.963,../FMRI_ADNI_DATA/fc/sub-ADNI011S0021_ses-M144...
1,91,0.264,../FMRI_ADNI_DATA/fc/sub-ADNI027S0074_ses-M156...
2,119,-0.041,../FMRI_ADNI_DATA/fc/sub-ADNI027S0120_ses-M132...
3,132,0.264,../FMRI_ADNI_DATA/fc/sub-ADNI068S0127_ses-M138...
4,133,0.963,../FMRI_ADNI_DATA/fc/sub-ADNI068S0127_ses-M150...


## Cognitive Score Predictor (SVM)

In [3]:
# df_mem.dropna(subset=['PHC_MEM'], inplace=True)
# df_mem.shape

(1342, 3)

In [50]:
# df_exf.dropna(subset=['PHC_EXF'], inplace=True)
# df_exf.shape

(1343, 3)

In [82]:
# df_lan.dropna(subset=['PHC_LAN'], inplace=True)
# df_lan.shape

(1343, 3)

In [3]:
df_vsp.dropna(subset=['PHC_VSP'], inplace=True)
df_vsp.shape

(1343, 3)

In [4]:
train, test = train_test_split(df_vsp, test_size=0.2, random_state=42)

In [5]:
# # Get targets for training + testing each predictor
# y_train, y_test = train['PHC_MEM'], test['PHC_MEM']

In [5]:
# Get targets for training + testing each predictor
y_train, y_test = train['PHC_VSP'], test['PHC_VSP']

In [6]:
train.shape

(1074, 3)

In [7]:
def normalise_correlate_fc(fc):
    fc_emp = fc / np.max(fc)
    fc_emp = np.corrcoef(fc_emp)
    return fc_emp

In [8]:
# Get features for training and testing

# Get the FC data as numpy arrays
dim_x = len(train)
x_train = []
x_test = []

for i, file in enumerate(train['FC_DATA'].values):
    arr = loadmat(file)['ROI_activity'][:100, :] # get the first 100 regions
    fc = normalise_correlate_fc(arr)
    x_train.append(fc)

for i, file in enumerate(test['FC_DATA'].values):
    arr = loadmat(file)['ROI_activity'][:100, :] # get the first 100 regions
    fc = normalise_correlate_fc(arr)
    x_test.append(fc)

In [9]:
x_train = np.array(x_train)
x_test = np.array(x_test)

x_train = x_train.reshape(x_train.shape[0], -1)
x_test = x_test.reshape(x_test.shape[0], -1)

In [10]:
# Feature scaling

scaler = StandardScaler().fit(x_train)
x_train_scaled = scaler.transform(x_train)
x_test_scaled = scaler.transform(x_test)

In [11]:
# evaluate 
def eval(model, x_test_scaled, y_test):
    y_pred = model.predict(x_test_scaled)
    r2_score = round(metrics.r2_score(y_test, y_pred),2)

    print(f'r2: {r2_score}')

In [140]:
# fit linear svm:
svr_lin = SVR(kernel = 'linear')
svr_lin.fit(x_train_scaled, y_train)

eval(svr_lin, x_test_scaled, y_test)

r2: -12.69


In [141]:
# fit rbf svm:
svr_rbf = SVR(kernel = 'rbf')
svr_rbf.fit(x_train_scaled, y_train)

eval(svr_rbf, x_test_scaled, y_test)

r2: -0.05


In [142]:
# fit poly svm:
svr_poly = SVR(kernel = 'poly', degree=3, gamma='scale')
svr_poly.fit(x_train_scaled, y_train)

eval(svr_poly, x_test_scaled, y_test)

r2: -0.08


In [55]:
# Grid search for RBF Kernel
svr_rbf = SVR(kernel = 'rbf', C=100, gamma=0.1)
svr_rbf.fit(x_train_scaled, y_train)

eval(svr_rbf, x_test_scaled, y_test)

r2: -0.06


In [12]:
# defining parameter range 
r2_scorer = metrics.make_scorer(metrics.r2_score)

param_grid = {'C': [0.1, 1, 10, 100],  
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001, 'scale'], 
              'kernel': ['rbf']}  
  
grid = GridSearchCV(SVR(), param_grid, refit = True, verbose = 3, scoring=r2_scorer) 
  
# fitting the model for grid search 
grid.fit(x_train_scaled, y_train) 

print(
    "The best parameters are %s with a score of %0.2f"
    % (grid.best_params_, grid.best_score_)
)

Fitting 5 folds for each of 24 candidates, totalling 120 fits
[CV 1/5] END .......C=0.1, gamma=1, kernel=rbf;, score=-0.009 total time=   8.6s
[CV 2/5] END .......C=0.1, gamma=1, kernel=rbf;, score=-0.005 total time=   9.5s
[CV 3/5] END .......C=0.1, gamma=1, kernel=rbf;, score=-0.001 total time=   9.5s
[CV 4/5] END .......C=0.1, gamma=1, kernel=rbf;, score=-0.015 total time=   9.0s
[CV 5/5] END .......C=0.1, gamma=1, kernel=rbf;, score=-0.031 total time=   9.9s
[CV 1/5] END .....C=0.1, gamma=0.1, kernel=rbf;, score=-0.010 total time=   9.0s
[CV 2/5] END .....C=0.1, gamma=0.1, kernel=rbf;, score=-0.005 total time=   8.9s
[CV 3/5] END .....C=0.1, gamma=0.1, kernel=rbf;, score=-0.002 total time=   8.3s
[CV 4/5] END .....C=0.1, gamma=0.1, kernel=rbf;, score=-0.016 total time=   8.9s
[CV 5/5] END .....C=0.1, gamma=0.1, kernel=rbf;, score=-0.031 total time=   9.1s
[CV 1/5] END ....C=0.1, gamma=0.01, kernel=rbf;, score=-0.007 total time=   8.2s
[CV 2/5] END ....C=0.1, gamma=0.01, kernel=rbf;

In [13]:
eval(grid.best_estimator_, x_test_scaled, y_test)

r2: -0.05


In [16]:
# Search for Epsilon
svr_rbf = SVR(kernel = 'rbf', C=0.1, gamma=0.0001, epsilon=0.001)
svr_rbf.fit(x_train_scaled, y_train)

eval(svr_rbf, x_test_scaled, y_test)

r2: -0.02


In [101]:
# Try Nu
model = NuSVR(nu=0.9, C=1, kernel='rbf', gamma=0.0001)
model.fit(x_train_scaled, y_train)

eval(model, x_test_scaled, y_test)

r2: -0.02


## Try Dimensional Reduction (PCA)

In [115]:
from sklearn.decomposition import PCA

print('x_train shape: ', x_train.shape)
print('x_test shape: ', x_test.shape)

pca = PCA(n_components=200)  # Specify the number of components (desired dimensionality)
x_train_reduced = pca.fit_transform(x_train)
print('x_train shape (reduced): ', x_train_reduced.shape)

x_test_reduced = pca.fit_transform(x_test)
print('x_train shape (reduced): ', x_test_reduced.shape)

x_train shape:  (1074, 10000)
x_test shape:  (269, 10000)
x_train shape (reduced):  (1074, 200)
x_train shape (reduced):  (269, 200)


In [124]:
# fit rbf svm:
svr_rbf = SVR(kernel = 'rbf', C=0.001, gamma='scale')
svr_rbf.fit(x_train_reduced, y_train)

eval(svr_rbf, x_test_reduced, y_test)

r2: -0.01
