In [1]:
# Import libraries
import matplotlib.pyplot as plt
import sklearn.model_selection as sms
import sklearn.linear_model as slm
import sklearn.preprocessing as skp
import sklearn.feature_selection as skf
from sklearn.cluster import DBSCAN
import numpy as np
from IPython.display import HTML
import util
from scipy.spatial import cKDTree
from lassonet import LassoNetRegressor, LassoNetRegressorCV
import os
from torch.optim import SGD
from functools import partial
import torch
import torch.nn.functional as F


In [2]:
# Get case IDs
case_list = open('/home/ali/RadDBS-QSM/data/docs/cases_90','r')
lines = case_list.read()
lists = np.loadtxt(case_list.name,comments="#", delimiter=",",unpack=False,dtype=str)
case_id = []
for lines in lists:     
    case_id.append(lines[-9:-7])

# Load scores
file_dir = '/home/ali/RadDBS-QSM/data/docs/QSM anonymus- 6.22.2023-1528.csv'
motor_df = util.filter_scores(file_dir,'pre-dbs updrs','stim','CORNELL ID')
# Find cases with all required scores
subs,pre_imp,post_imp,pre_updrs_off = util.get_full_cases(motor_df,
                                                          'CORNELL ID',
                                                          'OFF (pre-dbs updrs)',
                                                          'ON (pre-dbs updrs)',
                                                          'OFF meds ON stim 6mo')
# Load extracted features
npy_dir = '/home/ali/RadDBS-QSM/data/npy/'
phi_dir = '/home/ali/RadDBS-QSM/data/phi/phi/'
roi_path = '/data/Ali/atlas/mcgill_pd_atlas/PD25-subcortical-labels.csv'
n_rois = 6
all_rois = False
Phi_all, X_all, R_all, K_all, ID_all = util.load_featstruct(phi_dir,npy_dir+'X/',npy_dir+'R/',npy_dir+'K/',n_rois,1595,all_rois)
ids = np.asarray(ID_all).astype(int)
# Find overlap between scored subjects and feature extraction cases
c_cases = np.intersect1d(np.asarray(case_id).astype(int),np.asarray(subs).astype(int))
# Complete case indices with respect to feature matrix
c_cases_idx = np.in1d(ids,c_cases)

X_all_c, K, R, subsc, pre_imp, pre_updrs_off, per_change = util.re_index(X_all,K_all,R_all,c_cases_idx,subs,ids,all_rois,pre_imp,pre_updrs_off,post_imp)

Allocated arrays
Created feature matrix
Created ROI matrix
Created feature label matrix
['Left red nucleus' 'Left substantia nigra' 'Left subthalamic nucleus'
 'Right Substantia nigra' 'Right red nucleus' 'Right subthalamic nucleus']


In [8]:
# Training parameters
scoring = 'r2'
results_bls = np.zeros_like(per_change)
results_ls = np.zeros_like(per_change)
all_rois = False
scale_together = False
epsilon = 1e-1
# Train
for j in np.arange(len(subsc)):
    test_id = subsc[j]
    test_index = subsc == test_id
    train_index = subsc != test_id
    X_train = X_all_c[train_index,:,:]
    X_test = X_all_c[test_index,:,:]
    y_train = per_change[train_index]
    y_test = per_change[test_index]
    # Cross validation
    X0_ss0,scaler_ss,X_test_ss0 = util.model_scale(skp.StandardScaler(),
                                                X_train,train_index,X_test,
                                                test_index,pre_updrs_off,all_rois,scale_together)
    with np.errstate(divide='ignore', invalid='ignore'):
      # Feature selection
      sel = skf.SelectKBest(skf.r_regression,k=1700)
      X0_ss = sel.fit_transform(X0_ss0,y_train)
      X_test_ss = sel.transform(X_test_ss0)
      y_n = cKDTree(X0_ss).query(X_test_ss, k=1)[1]
    
    # LASSO
    lasso = slm.LassoCV(max_iter=int(1e4),n_jobs=-1)
    est_ls = lasso.fit(X0_ss,y_train)
    results_ls[j] = est_ls.predict(X_test_ss).item()
  
    # LASSONet
    # Xy = np.dot(X0_ss.T,y_train)
    # if Xy.ndim == 1:
    #     Xy = Xy[:, np.newaxis]
    # alpha_max = np.sqrt(np.sum(Xy**2, axis=1)).max()/len(y_train) 
    # alphas = np.linspace(alpha_max*1e-2,alpha_max,100)
    lr = 1e-3
    lassonet = LassoNetRegressor(#CV(
    #M=10, # Approximate standard LASSO
    #batch_size=len(y_train), # Leads to gradient descent optimization
    #hidden_dims=(100,),
    #n_iters=(int(1e4),int(1e2)), # Iterations for objective function and path
    #lambda_seq=alphas, # Path multipler and initial lambda disregarded
    #optim=partial(SGD,lr=lr,momentum=0.9,weight_decay=0), # Approximate coordinate descent
    #backtrack=False, # Approximate monotonic coordinate descent path
    #verbose=1,
    torch_seed=0,
    random_state=0,
    #gamma=0, # No L2 regularization
    )

    est_lsn = lassonet.fit(X0_ss,y_train)
    #print(est_lsn.best_cv_score_)
    print(est_ls.score(X0_ss,y_train))
    results_bls[j] = est_lsn.predict(X_test_ss).item()

    # Training status
    print('Lasso predicts',str(np.round(results_ls[j],2)),
          'with regularization',str(np.round(est_ls.alpha_,5)),
          'and',str(np.sum(est_ls.coef_!=0)),'nonzero coefficients',
          'and LassoNet predicts',str(np.round(results_bls[j],2)),
          #'with regularization',str(np.round(est_lsn.best_lambda_,5)),
          #'and',str(np.sum(np.asarray(est_lsn.feature_importances_ != 0.0))),'nonzero coefficients',
          'for case',str(int(subsc[j])),'with',str(np.round(per_change[j],2)))



Features start to disappear at current_lambda=8.478.


0.9999950063229057
Lasso predicts 0.44 with regularization 0.0001 and 60 nonzero coefficients and LassoNet predicts 0.63 for case 67 with 0.48


Features start to disappear at current_lambda=8.478.


0.9999951804904189
Lasso predicts 0.54 with regularization 0.0001 and 50 nonzero coefficients and LassoNet predicts 0.62 for case 75 with 0.97


KeyboardInterrupt: 

In [None]:
util.eval_prediction(np.vstack((pre_imp,
                               results_ls,
                               results_bls,
                               )),
                               per_change,
                               ['LCT',
                                'Lasso',
                                'LassoNet'
                                ],(30,5))
plt.ylim([0,2])
plt.xlim([0,2])
plt.style.use('default')

