In [1]:
# Import built-in and custom functions

import numpy as np, pandas as pd

import Code.misc_splitting as ms
import Code.lesion_selection as ls
import Code.lesion_aggregation as la
import Code.feature_handling as fh
import Code.survival_analysis as sa

In [2]:
# Load the data

radiomics = pd.read_csv('Data/SARC021/SARC021_radiomics.csv')
clinical = pd.read_csv('Data/SARC021/SARC021_clinical.csv')

In [4]:
# Modify parameters as necessary
aggName = 'smallest'
inclMetsFlag = False


df_imaging = radiomics.copy()
df_clinical = clinical.copy()
train,test = ms.randomSplit(df_imaging,df_clinical,0.8,False)


pipe_dict = {
                'train' : [train,True],
                'test'  : [test,False]
            }

func_dict = {
                'largest'  : [ls.selectLargestLesion, lambda x: fh.featureSelection(fh.featureReduction(x,numMetsFlag=inclMetsFlag,scaleFlag=True),numFeatures=10,numMetsFlag=inclMetsFlag)],
                'smallest' : [ls.selectSmallestLesion, lambda x: fh.featureSelection(fh.featureReduction(x,numMetsFlag=inclMetsFlag,scaleFlag=True),numFeatures=10,numMetsFlag=inclMetsFlag)],
                #'primary'  : [ls.selectPrimaryTumor, lambda x: fh.featureSelection(fh.featureReduction(x,numMetsFlag=inclMetsFlag,scaleFlag=True),numFeatures=10,numMetsFlag=inclMetsFlag)],
                #'lung'     : [ls.selectLargestLungLesion, lambda x: fh.featureSelection(fh.featureReduction(x,numMetsFlag=inclMetsFlag,scaleFlag=True),numFeatures=10,numMetsFlag=inclMetsFlag)],
                'UWA'      : [la.calcUnweightedAverage, lambda x: fh.featureSelection(fh.featureReduction(x,numMetsFlag=inclMetsFlag,scaleFlag=True),numFeatures=10,numMetsFlag=inclMetsFlag)],
                'VWA'      : [la.calcVolumeWeightedAverage, lambda x: fh.featureSelection(fh.featureReduction(x,numMetsFlag=inclMetsFlag,scaleFlag=True),numFeatures=10,numMetsFlag=inclMetsFlag)],
                'VWANLrg'  : [la.calcVolumeWeightedAverageNLargest, lambda x: fh.featureSelection(fh.featureReduction(x,numMetsFlag=inclMetsFlag,scaleFlag=True),numFeatures=10,numMetsFlag=inclMetsFlag)],            
                'cosine'   : [la.calcCosineMetrics, lambda x: x],
                'concat'   : [la.concatenateNLargest, lambda x: fh.featureSelection(x,scaleFlag=True)]
            }
 
    
# ----- TRAINING SET -----
# isolate the patients in the defined split (i.e., train/test)
df_imaging_train = df_imaging[df_imaging.USUBJID.isin(pipe_dict['train'][0])].reset_index()
df_clinical_train = df_clinical[df_clinical.USUBJID.isin(pipe_dict['train'][0])].reset_index()

trainingSet = func_dict[aggName][1](func_dict[aggName][0](df_imaging_train,df_clinical_train,numMetsFlag=inclMetsFlag).drop('USUBJID',axis=1))




100%|██████████| 10/10 [00:00<00:00, 16.41it/s]

selected features: ['wavelet-LLL_glszm_LargeAreaLowGrayLevelEmphasis', 'exponential_gldm_SmallDependenceHighGrayLevelEmphasis', 'square_firstorder_Minimum', 'exponential_glcm_Idmn', 'square_glszm_SmallAreaLowGrayLevelEmphasis', 'original_shape_VoxelVolume', 'original_glcm_InverseVariance', 'wavelet-HLL_gldm_LargeDependenceLowGrayLevelEmphasis', 'exponential_glrlm_ShortRunHighGrayLevelEmphasis', 'exponential_glcm_Idm']





In [6]:
# ----- TESTING SET -----
# isolate the patients in the defined split (i.e., train/test)
df_imaging_test = df_imaging[df_imaging.USUBJID.isin(pipe_dict['test'][0])].reset_index()
df_clinical_test = df_clinical[df_clinical.USUBJID.isin(pipe_dict['test'][0])].reset_index()

testingSet = func_dict[aggName][0](df_imaging_test,df_clinical_test,scaleFlag=True,numMetsFlag=inclMetsFlag).drop('USUBJID',axis=1)[trainingSet.columns]

# ----- MODELLING -----
# Cox PH
best_params_CPH = sa.CPH_bootstrap(trainingSet,aggName,'OS',pipe_dict['train'][1])
sa.CPH_bootstrap(testingSet,aggName,'OS',pipe_dict['test'][1],param_grid=best_params_CPH)

# Lasso-Cox
best_params_LAS = sa.LASSO_COX_bootstrap(trainingSet,aggName,'OS',pipe_dict['train'][1])
sa.LASSO_COX_bootstrap(testingSet,aggName,'OS',pipe_dict['test'][1],param_grid=best_params_LAS)

# RSF
#best_params_RSF = sa.RSF_bootstrap(trainingSet,aggName,'OS',pipe_dict['train'][1])
#sa.RSF_bootstrap(testingSet,aggName,'OS',pipe_dict['test'][1],param_grid=best_params_RSF)

smallest CPH training:  0.607 (0.572-0.634)
smallest CPH testing: 0.591
smallest Lasso-Cox training:  0.607 (0.572-0.636)
smallest Lasso-Cox testing: 0.591
