In [1]:
# Import libraries

import matplotlib as mpl
import matplotlib.pyplot as plt
import sklearn.svm as svm
import sklearn.pipeline as spl
import sklearn.kernel_ridge as skr
import sklearn.model_selection as sms
import sklearn.linear_model as slm
import sklearn.preprocessing as skp
import sklearn.neural_network as snn
import sklearn.metrics as sme
import sklearn.decomposition as sdc
import sklearn.cross_decomposition as skd
import sklearn.feature_selection as skf
import sklearn.ensemble as ske
import sklearn.utils as sku
from sklearn.utils import resample
from sklearnex import patch_sklearn, config_context
from sklearn.cluster import DBSCAN
import numpy as np
import scipy.stats as stats
from IPython.display import HTML
import util

patch_sklearn()

Intel(R) Extension for Scikit-learn* enabled (https://github.com/intel/scikit-learn-intelex)


In [2]:
HTML('''
<style>
.jupyter-matplotlib {
    background-color: #000;
}

.widget-label, .jupyter-matplotlib-header{
    color: #fff;
}

.jupyter-button {
    background-color: #333;
    color: #fff;
}
</style>
''')

In [3]:
# Maybe y should also be scaled? 
#     Performed poorly using Standard and MinMax scalers. Trying with LOOCV to see if predictions stabilize.
#     Does not appear to stabilize predictions with LOOCV (using StandardScaler())
# Perhaps a transform would be more effective, or scaling implemented with consistent cross-validation
# Different scaling methods? 
#     This seems most important for noise-sensitive models like LARS. All other use StandardScaler()
# Transformers?
# Model-specific scaling methods?
#     Yes, see above
# Common cross-validation function?
#     Use built-in functions wherever possible and `utils.gridsearch_pickparams()` elsewhere
# Quantile loss
# RANSAC
# Data augmentation? (Mixup)
# Data generation? (SMOGN)

In [4]:
# Get case IDs
case_list = open('/home/ali/RadDBS-QSM/data/docs/cases_90','r')
lines = case_list.read()
lists = np.loadtxt(case_list.name,comments="#", delimiter=",",unpack=False,dtype=str)
case_id = []
for lines in lists:     
    case_id.append(lines[-9:-7])

# Load scores
file_dir = '/home/ali/RadDBS-QSM/data/docs/QSM anonymus- 6.22.2023-1528.csv'
motor_df = util.filter_scores(file_dir,'pre-dbs updrs','stim','CORNELL ID')
# Find cases with all required scores
subs_init,pre_imp_init,post_imp_init,pre_updrs_off_init = util.get_full_cases(motor_df,
                                                          'CORNELL ID',
                                                          'OFF (pre-dbs updrs)',
                                                          'ON (pre-dbs updrs)',
                                                          'OFF meds ON stim 6mo')
# Load extracted features
npy_dir = '/home/ali/RadDBS-QSM/data/npy/slices/'
phi_dir = '/home/ali/RadDBS-QSM/data/phi/slices/'
roi_path = '/data/Ali/atlas/mcgill_pd_atlas/PD25-subcortical-labels.csv'
n_rois = 6
Phi_all, X_all, R_all, K_all, ID_all = util.load_featstruct(phi_dir,npy_dir+'X/',npy_dir+'R/',npy_dir+'K/',n_rois,939,True)
ids = np.asarray(ID_all).astype(int)
# Find overlap between scored subjects and feature extraction cases
c_cases = np.intersect1d(np.asarray(case_id).astype(int),np.asarray(subs_init).astype(int))
# Complete case indices with respect to feature matrix
c_cases_idx = np.in1d(ids,c_cases)
X_all_c = X_all[c_cases_idx,:,:]
# K_all_c = K_all[c_cases_idx,:,:]
# R_all_c = R_all[c_cases_idx,:,:]
# Re-index the scored subjects with respect to complete cases
s_cases_idx = np.in1d(subs_init,ids[c_cases_idx])
subs_init = subs_init[s_cases_idx]
pre_imp_init = pre_imp_init[s_cases_idx]
post_imp_init = post_imp_init[s_cases_idx]
pre_updrs_off_init = pre_updrs_off_init[s_cases_idx]
per_change_init = post_imp_init
subs = np.asarray(ID_all,dtype=float)[np.in1d(np.asarray(ID_all,dtype=float),subs_init)]
subsc = subs
pre_imp = np.zeros((1,len(subs))).T
post_imp = np.zeros((1,len(subs))).T
pre_updrs_off = np.zeros((1,len(subs))).T
per_change = np.zeros((1,len(subs))).T
for j in np.arange(len(subs)):
    pre_imp[j] = pre_imp_init[subs_init == subs[j]]
    post_imp[j] = post_imp_init[subs_init == subs[j]]
    pre_updrs_off[j] = pre_updrs_off_init[subs_init == subs[j]]
    per_change[j] = per_change_init[subs_init == subs[j]]

results_ls = np.zeros_like(per_change)
results_en = np.zeros_like(per_change)
results_mlp = np.zeros_like(per_change)

Appended 1000 slices
Appended 2000 slices
Appended 3000 slices
Appended 4000 slices
Appended 5000 slices
Appended 6000 slices
Allocated arrays


In [9]:
for j in np.arange(len(subsc)):
    # Split the data
    Js = []
    test_id = subsc[j]
    test_index = subsc == test_id
    train_index = subsc != test_id
    X_train = X_all_c[train_index,:,:]
    X_test = X_all_c[test_index,:,:]
    y_train = per_change[train_index]
    y_test = per_change[test_index]

    # Cross validation
    X0_ss0,scaler_ss,X_test_ss0 = util.model_scale(skp.StandardScaler(),
                                                X_train,train_index,X_test,test_index,pre_updrs_off.ravel())

    # Feature selection
    with np.errstate(divide='ignore', invalid='ignore'):
        sel = skf.SelectKBest(skf.r_regression,k=2000)
        X0_ss = sel.fit_transform(X0_ss0,y_train.ravel())
        X_test_ss = sel.transform(X_test_ss0.reshape([X_test_ss0.shape[0],
                                                X_test_ss0.shape[1]*X_test_ss0.shape[2]]))
    
    # Nx = np.random.normal(0,np.var(y_train),X0_ss.shape)
    # Ny = np.random.normal(0,np.var(y_train),y_train.shape)
    # LASSO
    lasso = slm.LassoCV(max_iter=1e4,n_jobs=-1,verbose=False) 
    est_ls = lasso.fit(X0_ss,y_train.ravel())
    results_ls[j] = np.mean(est_ls.predict(X_test_ss))

    # ElasticNet
    en = slm.ElasticNetCV(max_iter=1e4,n_jobs=-1,verbose=False) 
    est_en = en.fit(X0_ss,y_train.ravel())
    results_en[j] = np.mean(est_en.predict(X_test_ss))

    # # Output results
    if test_id != subsc[j+1]:
        print('Lasso predicts',str(results_ls[j]),'with regularization',
            str(est_ls.alpha_),'for case',str(test_id),'with actual improvement',str(per_change[j]))



Lasso predicts [0.72594711] with regularization 0.006823313583042418 for case 1.0 with actual improvement [0.83076923]
Lasso predicts [0.52012964] with regularization 0.010003793771010532 for case 2.0 with actual improvement [0.90909091]
Lasso predicts [0.64509633] with regularization 0.004742222809976233 for case 3.0 with actual improvement [0.55555556]
Lasso predicts [0.71107172] with regularization 0.006553959885338278 for case 6.0 with actual improvement [0.95238095]
Lasso predicts [0.71451823] with regularization 0.006722728380966948 for case 9.0 with actual improvement [0.88888889]
Lasso predicts [0.55999933] with regularization 0.006257328043364847 for case 10.0 with actual improvement [0.59259259]
Lasso predicts [0.69572006] with regularization 0.00620317436730984 for case 11.0 with actual improvement [0.73684211]
Lasso predicts [0.63133886] with regularization 0.005033319874095925 for case 12.0 with actual improvement [0.5]
Lasso predicts [0.68303593] with regularization 0.005

In [None]:
util.eval_prediction(np.vstack((pre_imp,
                               results_en,
                               results_ls)),
                               per_change,
                               ['LCT',
                                'ElasticNet',
                                'Lasso',
                                ],(30,5))
plt.ylim([0,2])
plt.xlim([0,2])