In [None]:
# Preliminary data notebook for
# NIH: Imaging Guided Intervention Surgery Study Section

# Exploratory aim: evaluate presurgical scans between STN and GPi targets
#   Given retrospective GPi acquisitions?
#   Search for radiomic differentiators for STN versus GPi selection in presurgical scans

In [None]:
# Import libraries
from scipy.cluster.hierarchy import dendrogram, linkage
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
import nibabel as nib
from sklearn.svm import SVR
from sklearn.linear_model import LassoCV
from sklearn.linear_model import Lasso
from sklearn.linear_model import RidgeCV
from sklearn.linear_model import ElasticNetCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold
from sklearn.feature_selection import SelectFromModel
from sklearn.metrics import r2_score
import SimpleITK as sitk
import six
from radiomics import featureextractor 
import numpy as np
import os
import pickle
import pandas as pd
import logging
from scipy.stats import linregress
from sklearn.linear_model import QuantileRegressor
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import r_regression
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import RANSACRegressor
import smogn
import pandas
from collections import Counter

In [None]:
plt.style.use('dark_background')
plt.rcParams["figure.figsize"] = (45,5)
def remove_keymap_conflicts(new_keys_set):
    for prop in plt.rcParams:
        if prop.startswith('keymap.'):
            keys = plt.rcParams[prop]
            remove_list = set(keys) & new_keys_set
            for key in remove_list:
                keys.remove(key)

def multi_slice_viewer(volume):
    remove_keymap_conflicts({'j', 'k'})
    fig, ax = plt.subplots()
    ax.volume = volume
    ax.index = volume.shape[0]//2
    ax.imshow(volume[ax.index])
    fig.canvas.mpl_connect('key_press_event', process_key)

def process_key(event):
    fig = event.canvas.figure
    ax = fig.axes[0]
    if event.key == 'j':
        previous_slice(ax)
    elif event.key == 'k':
        next_slice(ax)
    fig.canvas.draw()
    

def previous_slice(ax):
    volume = ax.volume
    ax.index = (ax.index-1) % volume.shape[0] 
    ax.images[0].set_array(volume[ax.index])

def next_slice(ax):
    volume = ax.volume
    ax.index = (ax.index+1) % volume.shape[0]
    ax.images[0].set_array(volume[ax.index])

In [None]:
# Set window level
level = 0
window = 500
m1=level-window/2
m2=level+window/2
visualize = 1
reload = 0
# Load data
nrows = 256
ncols = 256
nslices = 160
segs = []
qsms = []
laros = []
voxel_sizes = []
trackers = []
q_directory = '/media/mts_dbs/dbs/complete_cases/nii/qsm/'
s_directory = '/media/mts_dbs/dbs/complete_cases/nii/seg/'
s_directory = os.listdir(s_directory)
s_directory = sorted(s_directory)

case_list = []
d_count = 0
if reload == 1:
    for seg_filename in s_directory:
        id = seg_filename[12:14]
        seg = nib.load('/media/mts_dbs/dbs/complete_cases/nii/seg/'+seg_filename)
        voxel_size = seg.header['pixdim'][0:3]
        voxel_sizes.append(voxel_size)
        segs.append(seg.get_fdata())
        qsm = nib.load('/media/mts_dbs/dbs/complete_cases/nii/qsm/qsm_'+str(id)+'.nii.gz')
        qsms.append(qsm.get_fdata())
        print('Appending arrays with segmentation',seg_filename,'and QSM','qsm_'+str(id)+'.nii.gz')
        case_list.append('qsm_'+str(id)+'.nii.gz')
        n_cases = len(segs)
        d_count = d_count+1
        qsms_wl = np.asarray(qsms)
        segs_wl = np.asarray(segs)
        with open('./pickles/segs', 'wb') as fp:  
            pickle.dump(segs, fp)

        with open('./pickles/qsms', 'wb') as fp:  
            pickle.dump(qsms, fp)

else:
    with open('./pickles/segs', "rb") as fp:  
        segs = pickle.load(fp)

    with open('./pickles/qsms', "rb") as fp:  
        qsms = pickle.load(fp)

In [None]:
if visualize == 0:
    qsms_wl = np.asarray(qsms)
    segs_wl = np.asarray(segs)
    qsms_wl[qsms_wl < m1] = m1
    qsms_wl[qsms_wl > m2] = m2
    n_cases = len(segs)
    multi_slice_viewer(np.hstack(((np.vstack(qsms_wl[:n_cases//2,:,:,:]/1000+0*segs_wl[:n_cases//2,:,:,:]).T),
                                  (np.vstack(qsms_wl[(n_cases-n_cases//2):,:,:,:]/1000+0*segs_wl[(n_cases-n_cases//2):,:,:,:]).T))))
   
                                    
    label_min = np.partition(np.unique(seg.get_fdata().ravel()), 1)[1]
    label_max = np.amax(seg.get_fdata())

In [None]:
file_dir = '/data/Ali/RadDBS-QSM/src/csv'
# Load patient data
os.chdir(file_dir)
df = pd.read_csv('QSM anonymus- 6.22.2023-1528.csv')

In [None]:
# Make a copy
dfd = df.copy()
# Drop blank columns
for (columnName, columnData) in dfd.iteritems():
    if columnData.isnull().all():
        print('Dropping NaN column at',columnName)
        dfd.drop(columnName,axis=1,inplace=True)
# Add relevant column names from headers
for (columnName, columnData) in dfd.iteritems():
        dfd.rename(columns={columnName:columnName+': '+columnData.values[0]},inplace=True)

def drop_prefix(self, prefix):
    self.columns = self.columns.str.lstrip(prefix)
    return self

pd.core.frame.DataFrame.drop_prefix = drop_prefix

dfd.drop_prefix('Unnamed:')        
for (columnName, columnData) in dfd.iteritems():
    if columnName[1].isdigit():
        dfd.rename(columns={columnName:columnName[4:]},inplace=True)

# Make a copy for motor symptoms
motor_df = dfd.copy()
# Drop non-motor (III) columns
for (columnName, columnData) in motor_df.iteritems():
    if 'pre-dbs updrs' in columnName:
        next
    elif 'stim' in columnName:
        next
    elif 'CORNELL ID' in columnName:
        next
    else:
        motor_df.drop(columnName,axis=1,inplace=True)

# Drop subheader
motor_df = motor_df.tail(-1)
motor_df = motor_df.replace('na',np.nan)
motor_df

In [None]:
s_directory

In [None]:
id = []
for seg_filename in s_directory:
    id.append(seg_filename[12:14])

In [None]:
id

In [None]:
subs

In [None]:
df_post_dbs_off_meds_on_stim = motor_df['OFF meds ON stim 6mo'].to_numpy().astype('float')
df_post_dbs_off_meds_off_stim = motor_df[' off stim off med 6mo'].to_numpy().astype('float')
df_pre_dbs_off_meds = motor_df['OFF (pre-dbs updrs)'].to_numpy().astype('float')
df_pre_dbs_on_meds = motor_df['ON (pre-dbs updrs)'].to_numpy().astype('float')

cases = ~np.isnan(df_pre_dbs_off_meds+df_pre_dbs_on_meds+df_post_dbs_off_meds_on_stim)
pre_dbs_meds_improvement = (df_pre_dbs_off_meds[cases]-df_pre_dbs_on_meds[cases])/df_pre_dbs_off_meds[cases]
dbs_off_meds_improvement = (df_pre_dbs_off_meds[cases]-df_post_dbs_off_meds_on_stim[cases])/df_pre_dbs_off_meds[cases]
motor_df['CORNELL ID'].replace('only Ct data ', np.nan, inplace=True)
pids = motor_df['CORNELL ID'].to_numpy().astype('float')
subs = pids[cases]
subs_in = (np.intersect1d(subs,np.asarray(id).astype(float)))


In [None]:
df_pre_dbs_off_meds_in = df_pre_dbs_off_meds[cases]

In [None]:
pids

In [None]:
df_pre_dbs_off_meds

In [None]:
pre_dbs_off_meds_in = df_pre_dbs_off_meds_in[np.in1d(subs,np.asarray(id).astype(float))]

In [None]:
subs_in

In [None]:
pre_dbs_off_meds_in

In [None]:
s_directory

In [None]:
pre_dbs_off_meds_in

In [None]:
# Plot the data and fit like Zaidel et. al Figure 3C
plt.rcParams["figure.figsize"] = (7,7)
lr_rho_med = linregress(dbs_off_meds_improvement,pre_dbs_meds_improvement)
plt.scatter(dbs_off_meds_improvement,pre_dbs_meds_improvement)
plt.plot(dbs_off_meds_improvement,dbs_off_meds_improvement*lr_rho_med.slope+lr_rho_med.intercept,'-r')
text = f"$y={lr_rho_med.slope:0.3f}\;x{lr_rho_med.intercept:+0.3f}$\n$r = {lr_rho_med.rvalue:0.3f}$\n$p = {lr_rho_med.pvalue:0.3f}$"
plt.gca().text(0.05, 0.95, text,transform=plt.gca().transAxes,
     fontsize=14, verticalalignment='top')
plt.xlabel(r'$\rho_{stim}$')
plt.ylabel(r'$\rho_{med}$')
plt.ylim([0,1.25])
plt.xlim([0,1.25])
plt.title('UPDRS-III improvement')
plt.style.use('dark_background')
plt.show;

In [None]:
logger = logging.getLogger("radiomics")
logger.setLevel(logging.ERROR)
reextract = 1
# Assume all voxel sizes are identical
voxel_size = (0.5,0.5,0.5)
fv_count = 0
if reextract == 1:
    # Generate feature structure Phi from all ROIs and all cases
    extractor = featureextractor.RadiomicsFeatureExtractor()
    extractor.enableAllFeatures()
    extractor.enableAllImageTypes()
    extractor.enableFeatureClassByName('shape2D',enabled = False)

    seg_labels_all = [0,1,2,3,4,5,6,7]
    Phi_gt = []
    seg_labels = []
    reextract = 0
    x_row_gt = []

    keylib = []
    roilib = []
    loop_count = 1
    n_rois = 6
    roi_names = []
    roi_txt = pd.read_csv("/data/Ali/atlas/mcgill_pd_atlas/PD25-subcortical-labels.csv")
    roi_df = roi_txt.astype(str)
    for i in np.arange(subs_in.__len__()):
        seg_sitk = sitk.GetImageFromArray(segs[i])
        seg_sitk.SetSpacing(voxel_size)
        qsm_sitk_gt = sitk.GetImageFromArray(qsms[i])
        qsm_sitk_gt.SetSpacing(voxel_size)
        for j in seg_labels_all:
            if 0 < j < 7:
                fv_count = 0
                featureVector_gt = extractor.execute(qsm_sitk_gt,seg_sitk,label=int(j));
                Phi_gt.append(featureVector_gt)
                for key, value in six.iteritems(featureVector_gt):
                    if 'diagnostic' in key:
                        next
                    else:
                        x_row_gt.append(featureVector_gt[key])
                        fv_count = fv_count+1
                        keylib.append(key)
                        roilib.append(j)
                        mask = np.row_stack([roi_df[row].str.contains(str(int(roilib[-1])), na = False) for row in roi_df])
                        roi_names.append(np.asarray(roi_df.iloc[mask.any(axis=0),1])[0])
                x_row_gt.append(pre_dbs_off_meds_in[i])
                fv_count = fv_count+1
                print('Extracting features for subject',subs_in[i],'ROI',j,'and appending feature matrix with vector of length',fv_count,'with UPDRS score',df_pre_dbs_off_meds[i])
                
    X0_gt = np.array(x_row_gt)
    np.save('./npy/X0_gt_msw_rois.npy',X0_gt)
    K = np.asarray(keylib)
    R = np.asarray(roi_names)
    np.save('./npy/K_msw.npy',K)
    print('Saving ground truth feature vector')
    with open('./phi/Phi_mcl_gt_roi_msw', 'wb') as fp:  
        pickle.dump(Phi_gt, fp)

else:
    X0_gt = np.load('./npy/X0_gt_msw_rois.npy')
    K = np.load('./npy/K_msw.npy')
    R = np.load('./npy/R_msw.npy')
    n_rois = R.shape[0]-1
    with open('./phi/Phi_mcl_gt_roi_msw', "rb") as fp:  
        Phi_gt = pickle.load(fp)


    

In [None]:
np.arange(pids[cases.__len__()])

In [None]:
segs.__len__()

In [None]:
# Compute number of features and check ROIs
n_cases = len(cases)
n_roisc = Phi_gt.__len__()/n_cases
L = int(len(X0_gt)/n_cases)
n_features = int(L/n_rois)

In [None]:
# Allocate arrays
x_row_gt = X0_gt.tolist()
X = np.zeros((n_cases,n_rois,n_features)).transpose((0,2,1))
X = X0_gt.reshape((n_cases,n_rois,n_features)).transpose((0,2,1))
ut_ls = np.zeros((cases.__len__()))
ut_qr = np.zeros((cases.__len__()))

# Normalize testing and training cases together
#   Set with_mean=False to preserve data sparsity
#   And with_std=False 
#   However, need a significant number of samples to do this
X_all = X.reshape(n_cases,((n_features)*n_rois))
# Add UPDRS
X_all_t = np.concatenate((X_all,df_pre_dbs_off_meds.reshape(df_pre_dbs_off_meds.__len__(),1)),axis=1)
scaler = StandardScaler()
# Transform feature matrix and UPDRS
X_all_t = scaler.fit_transform(X_all)

In [None]:
# Create data frame for SMOGN generation
D = pd.DataFrame(np.hstack((X_all_t,(dbs_off_meds_improvement.reshape(dbs_off_meds_improvement.__len__(),1)))))
for col in D.columns:
    D.rename(columns={col:str(col)},inplace=True)

# Specify phi relevance values
Rm = [[np.min(dbs_off_meds_improvement),  1, 0],  ## over-sample ("minority")
    [np.median(dbs_off_meds_improvement), 0, 0],  ## under-sample ("majority")
    ]

# Conduct SMOGN
print('Prior to SMOGN sampling, mean is',X_all_t.mean(),'standard deviation is',X_all_t.std())
X_smogn = smogn.smoter(data = D, y = str(D.columns[-1]),rel_method = 'manual',rel_ctrl_pts_rg = Rm)

# Drop label
X_in_s = np.array(X_smogn)[:,:-1] 
print('After SMOGN sampling, mean is',X_in_s.mean(),'standard deviation is',X_in_s.std())
X_in_s = scaler.fit_transform(X_in_s)
print('Standardizing the SMOGN dataset gives, mean',X_in_s.mean(),'standard deviation',X_in_s.std())

for j in np.arange(X_in_s.shape[1]):
    if np.array_equal(X_in_s[:,j],np.array(X_smogn)[:,-1]) == 0:
        next
    else:
        print('Labels detected at column',j)

In [None]:
Cs = np.zeros_like(X_in_s)
C = np.zeros_like(X_all_t)
for j in np.arange(len(cases)):
        # Add UPDRS after scaling
        # Initialize training feature matrix
        X_in = X_all_t
        X_in = np.delete(X_in,j,axis=0)
        if j < np.array(X_smogn)[:,:-1].shape[0]: 
                # Drop the label in SMOGN array
                X_in_s = np.array(X_smogn)[:,:-1] 
                # Drop the test case features
                X_in_s = np.delete(X_in_s,j,axis=0)
                # Create training label array from the SMOGN array
                smogn_per_change_in = np.asarray(X_smogn)[:,-1]
                # Drop the test case labels
                smogn_per_change_in = np.delete(smogn_per_change_in,j,axis=0)
                # Train LASSO on SMOGN
                clf_s = Lasso(alpha=1e-4,max_iter=10000).fit(X_in_s,smogn_per_change_in)
                # Get the features LASSO-SMOGN uses
                Cs[j] = clf_s.coef_

        # Initialize training labels
        per_change_in = dbs_off_meds_improvement
        per_change_in = np.delete(per_change_in,j,axis=0)
  
        # Cross-validation for model selection
        # Identify most important features
        clf_ls = Lasso(alpha=1e-4,max_iter=10000).fit(X_in,per_change_in)
        print('Fit complete')
        ut_ls[j] = clf_ls.predict(X_all_t[j,:].reshape(1, -1))
        ut_qr[j] = clf_s.predict(X_all_t[j,:].reshape(1, -1))
        C[j] = clf_ls.coef_

        print('Patient ID',str(dbs_off_meds_improvement[j]),'with pre-surgical UPDRS score',str(pre_dbs_meds_improvement[int(j)]),'at feature matrix row',str(j))

In [None]:
%matplotlib inline
plt.rcParams["figure.figsize"] = (25,5)
# Cross validation results
[fig,ax] = plt.subplots(1,3,sharex=True, sharey=True)
lr_prepost = linregress(pre_dbs_meds_improvement,dbs_off_meds_improvement)
ax[0].scatter(pre_dbs_meds_improvement,dbs_off_meds_improvement,)
ax[0].plot(pre_dbs_meds_improvement,pre_dbs_meds_improvement*lr_prepost.slope+lr_prepost.intercept,'-r')
ax[0].set_title('LCT')
ax[0].set_ylabel("DBS improvement")
ax[0].set_xlabel("Prediction")
ax[0].set_ylim([0, 2])
text = f"$y={lr_prepost.slope:0.3f}\;x{lr_prepost.intercept:+0.3f}$\n$r = {lr_prepost.rvalue:0.3f}$\n$p = {lr_prepost.pvalue:0.3f}$"
ax[0].text(0.05, 0.95, text,transform=ax[0].transAxes,
     fontsize=14, verticalalignment='top')
ax[0].hlines(0.4,0,1,linestyle='dashed',color='white')
ax[0].vlines(0.4,0,2,linestyle='dashed',color='white')

lr_pred_ls = linregress(ut_ls,dbs_off_meds_improvement)
ax[1].scatter(ut_ls,dbs_off_meds_improvement)
ax[1].plot(ut_ls,ut_ls*lr_pred_ls.slope+lr_pred_ls.intercept,'-r')
ax[1].set_title('Fully-sampled LASSO')
ax[1].set_ylabel("DBS improvement")
ax[1].set_xlabel("Prediction")
text = f"$y={lr_pred_ls.slope:0.3f}\;x{lr_pred_ls.intercept:+0.3f}$\n$r = {lr_pred_ls.rvalue:0.3f}$\n$p = {lr_pred_ls.pvalue:0.3f}$"
ax[1].text(0.05, 0.95, text,transform=ax[1].transAxes,
     fontsize=14, verticalalignment='top')
ax[1].hlines(0.4,0,1,linestyle='dashed',color='white')
ax[1].vlines(0.4,0,2,linestyle='dashed',color='white')


lr_pred_qr = linregress(ut_qr,dbs_off_meds_improvement)
ax[2].scatter(ut_qr,dbs_off_meds_improvement)
ax[2].plot(ut_qr,ut_qr*lr_pred_qr.slope+lr_pred_qr.intercept,'-r')
ax[2].set_title('Fully-sampled LASSO with SMOGN')
ax[2].set_ylabel("DBS improvement")
ax[2].set_xlabel("Prediction")
text = f"$y={lr_pred_qr.slope:0.3f}\;x{lr_pred_qr.intercept:+0.3f}$\n$r = {lr_pred_qr.rvalue:0.3f}$\n$p = {lr_pred_qr.pvalue:0.10f}$"
ax[2].text(0.05, 0.95, text,transform=ax[2].transAxes,
     fontsize=14, verticalalignment='top')
ax[2].hlines(0.4,0,1,linestyle='dashed',color='white')
ax[2].vlines(0.4,0,2,linestyle='dashed',color='white')


plt.show

In [None]:
rfs = []
rfss = []
for j in np.arange(cases.__len__()):
    Kr =  K.reshape((n_cases,n_rois,(n_features-1))).transpose((0,2,1))
    Kr_extended = np.zeros((n_cases,n_rois,n_features)).transpose((0,2,1)).astype('str')
    Kr_extended[:,0:n_features-1,:] = Kr
    Kr_extended[:,-1,:] = 'po_updrs'
    rfs.append(Kr_extended[j,np.asarray(C[j]!=0).reshape((n_rois,n_features)).transpose((1,0))])
    if j < Cs.shape[0]:
        rfss.append(Kr_extended[j,np.asarray(Cs[j]!=0).reshape((n_rois,n_features)).transpose((1,0))])

In [None]:
plt.rcParams["figure.figsize"] = (30,15)
fig, axes = plt.subplots(3,1,sharey=True)
plt.subplots_adjust(left=0.1,
                    bottom=0.1, 
                    right=0.9, 
                    top=0.9, 
                    wspace=0.4, 
                    hspace=0.8)

R = [item for sublist in rfs for item in sublist]
letter_counts = Counter(R)
df = pandas.DataFrame.from_dict(letter_counts, orient='index')
df.sort_values(0, inplace=True)
df.plot(ax=axes[0],y=0, kind='bar', legend=False)

Rs = [item for sublist in rfss for item in sublist]
letter_countss = Counter(Rs)
dfs = pandas.DataFrame.from_dict(letter_counts, orient='index')
dfs.sort_values(0, inplace=True)
dfs.plot(ax=axes[1],y=0, kind='bar', legend=False)

Ru = [item for sublist in R[np.in1d(R,Rs).astype(int)[0]] for item in sublist]
letter_countss = Counter(Ru)
dfu = pandas.DataFrame.from_dict(letter_counts, orient='index')
dfu.sort_values(0, inplace=True)
dfu.plot(ax=axes[2],y=0, kind='bar', legend=False)