In [1]:
import os 
from multiprocessing import Pool, cpu_count
from radiomics import featureextractor
import pandas as pd

First we extract the 'raw' csv, with all the information from Pyradiomics

# Raw csv extraction

In [2]:
def rad_extraction(nifti_folder: str, patients_list: list, fraction: str, img_name: str, mask_type: str, params: str): 
    assert mask_type in ['GTV', 'PTV'], "mask_type should be either GTV or PTV"
    extractor = featureextractor.RadiomicsFeatureExtractor(params)
    
    k = 0 
    for patient in patients_list:
        
        img_path = nifti_folder + patient + "/" + fraction + '/' + img_name + '.nii.gz'
        mask_path = nifti_folder + patient + "/" + fraction + '/mask_' + mask_type + '.nii.gz'
        try: 
            result = extractor.execute(img_path, mask_path)
        except Exception as e:
            print(f"Error processing {patient}: {e}")
            
        feature_names = list(result.keys())
        if k == 0:
            output_df = pd.DataFrame(index=patients_list, columns=feature_names)

        for feature_name in feature_names:
            output_df.loc[patient, feature_name] = str(result[feature_name])

        k += 1
    return output_df


## GTV extraction
### Simu

In [4]:
params_file_name = 'Params_MRIdian.yaml'
fraction = 'SIMU'
mask_type = 'GTV'
img_name = 'Aniso_N4_filtered_image' # TODO: run!!!! 

output_file_path = '/home/tachennf/Documents/delta-rad/data/Madrid/raw_csv/' + fraction + '_' + mask_type + '_extracted_features.csv'

nifti_folder = "/home/tachennf/Documents/delta-rad/data/Madrid/nifti_data/"
patients_list = os.listdir(nifti_folder)
output_df = rad_extraction(nifti_folder, patients_list, fraction, img_name, mask_type, params_file_name)
output_df.to_csv(output_file_path, index=True)


GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Avera

In [5]:
output_df.head()


Unnamed: 0,diagnostics_Versions_PyRadiomics,diagnostics_Versions_Numpy,diagnostics_Versions_SimpleITK,diagnostics_Versions_PyWavelet,diagnostics_Versions_Python,diagnostics_Configuration_Settings,diagnostics_Configuration_EnabledImageTypes,diagnostics_Image-original_Hash,diagnostics_Image-original_Dimensionality,diagnostics_Image-original_Spacing,...,original_gldm_LargeDependenceLowGrayLevelEmphasis,original_gldm_LowGrayLevelEmphasis,original_gldm_SmallDependenceEmphasis,original_gldm_SmallDependenceHighGrayLevelEmphasis,original_gldm_SmallDependenceLowGrayLevelEmphasis,original_ngtdm_Busyness,original_ngtdm_Coarseness,original_ngtdm_Complexity,original_ngtdm_Contrast,original_ngtdm_Strength
Mont10,v3.1.0,1.24.4,2.4.0,1.4.1,3.8.20,"{'minimumROIDimensions': 2, 'minimumROISize': ...",{'Original': {}},a68499f96011b0441ac17a3d7452dc01910a9f9b,3D,"(1.5, 1.5, 2.999999761581421)",...,0.0696602164517163,0.0067220936698969,0.3325642792437132,134.7149118700065,0.0028750101636112,0.1699573374975758,0.0093372916718314,1287.9187538004132,0.1525177905721789,4.64362907334703
Mont39,v3.1.0,1.24.4,2.4.0,1.4.1,3.8.20,"{'minimumROIDimensions': 2, 'minimumROISize': ...",{'Original': {}},2b091a54974a8ce5a2022f950afa8f777b77ada2,3D,"(1.4939024448394775, 1.5, 2.999999761581421)",...,0.792970557705377,0.0337407771912891,0.1824811242431283,21.017576661333354,0.0074642169710253,0.3035984722848974,0.0172198281977147,245.15415540298164,0.0660453646912064,3.376976425857462
Mont38,v3.1.0,1.24.4,2.4.0,1.4.1,3.8.20,"{'minimumROIDimensions': 2, 'minimumROISize': ...",{'Original': {}},cd011890af2c6c4a3f88b2ca66915f9ab8ab0dc3,3D,"(1.5, 1.504424810409546, 3.0)",...,0.2727802595811297,0.0101278770049747,0.1413790355351991,25.48536407447622,0.0017652133558192,0.4201325713635331,0.0073887637885799,301.5510926224392,0.0657086428141856,1.5291746184619035
Mont1,v3.1.0,1.24.4,2.4.0,1.4.1,3.8.20,"{'minimumROIDimensions': 2, 'minimumROISize': ...",{'Original': {}},33e1c0f81cfcce860e589e3cad3aef6559d1c543,3D,"(1.5, 1.5, 3.000000238418579)",...,0.1516184582590862,0.0087429776126372,0.2349889606278589,84.8070710646567,0.0019274158774937,1.011215819988191,0.0010779177438026,3133.7395075952204,0.080637261751653,2.376555923864709
Mont32,v3.1.0,1.24.4,2.4.0,1.4.1,3.8.20,"{'minimumROIDimensions': 2, 'minimumROISize': ...",{'Original': {}},aaceea91b3836fd2c3eab2a428a129f9a875bc47,3D,"(1.5, 1.5, 2.999999761581421)",...,0.254623775751883,0.0117534756766798,0.1847167656338546,41.83177436330133,0.0022647630257989,3.7769683854291474,0.0006318266377635,823.7748649470784,0.0871453031667772,0.2730129530980905


### F1

In [6]:
fraction = 'F1'
output_file_path = '/home/tachennf/Documents/delta-rad/data/Madrid/raw_csv/' + fraction + '_' + mask_type + '_extracted_features.csv'

output_df = rad_extraction(nifti_folder, patients_list, fraction, img_name, mask_type, params_file_name)
output_df.to_csv(output_file_path, index=True)

GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Avera

In [7]:
output_df.shape

(37, 154)

### F2

In [8]:
fraction = 'F2'
output_file_path = '/home/tachennf/Documents/delta-rad/data/Madrid/raw_csv/' + fraction + '_' + mask_type + '_extracted_features.csv'

output_df = rad_extraction(nifti_folder, patients_list, fraction, img_name, mask_type, params_file_name)
output_df.to_csv(output_file_path, index=True)

GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Avera

### F3

In [9]:
fraction = 'F3'
output_file_path = '/home/tachennf/Documents/delta-rad/data/Madrid/raw_csv/' + fraction + '_' + mask_type + '_extracted_features.csv'

output_df = rad_extraction(nifti_folder, patients_list, fraction, img_name, mask_type, params_file_name)
output_df.to_csv(output_file_path, index=True)

GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Avera

### F4

In [10]:
fraction = 'F4'
output_file_path = '/home/tachennf/Documents/delta-rad/data/Madrid/raw_csv/' + fraction + '_' + mask_type + '_extracted_features.csv'

output_df = rad_extraction(nifti_folder, patients_list, fraction, img_name, mask_type, params_file_name)
output_df.to_csv(output_file_path, index=True)

GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Avera

### F5

In [11]:
fraction = 'F5'
output_file_path = '/home/tachennf/Documents/delta-rad/data/Madrid/raw_csv/' + fraction + '_' + mask_type + '_extracted_features.csv'

output_df = rad_extraction(nifti_folder, patients_list, fraction, img_name, mask_type, params_file_name)
output_df.to_csv(output_file_path, index=True)

GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Avera

## PTV extraction

### Simu

In [12]:
params_file_name = 'Params_MRIdian.yaml'
fraction = 'SIMU'
mask_type = 'PTV'
img_name = 'Aniso_N4_filtered_image' 

output_file_path = '/home/tachennf/Documents/delta-rad/data/Madrid/raw_csv/' + fraction + '_' + mask_type + '_extracted_features.csv'

nifti_folder = "/home/tachennf/Documents/delta-rad/data/Madrid/nifti_data/"
patients_list = os.listdir(nifti_folder)
output_df = rad_extraction(nifti_folder, patients_list, fraction, img_name, mask_type, params_file_name)
output_df.to_csv(output_file_path, index=True)

GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Avera

### F1

In [13]:
fraction = 'F1'
output_file_path = '/home/tachennf/Documents/delta-rad/data/Madrid/raw_csv/' + fraction + '_' + mask_type + '_extracted_features.csv'

nifti_folder = "/home/tachennf/Documents/delta-rad/data/Madrid/nifti_data/"
patients_list = os.listdir(nifti_folder)
output_df = rad_extraction(nifti_folder, patients_list, fraction, img_name, mask_type, params_file_name)
output_df.to_csv(output_file_path, index=True)

GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Avera

### F2

In [14]:
fraction = 'F2'
output_file_path = '/home/tachennf/Documents/delta-rad/data/Madrid/raw_csv/' + fraction + '_' + mask_type + '_extracted_features.csv'

nifti_folder = "/home/tachennf/Documents/delta-rad/data/Madrid/nifti_data/"
patients_list = os.listdir(nifti_folder)
output_df = rad_extraction(nifti_folder, patients_list, fraction, img_name, mask_type, params_file_name)
output_df.to_csv(output_file_path, index=True)

GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Avera

### F3

In [15]:
fraction = 'F3'
output_file_path = '/home/tachennf/Documents/delta-rad/data/Madrid/raw_csv/' + fraction + '_' + mask_type + '_extracted_features.csv'

nifti_folder = "/home/tachennf/Documents/delta-rad/data/Madrid/nifti_data/"
patients_list = os.listdir(nifti_folder)
output_df = rad_extraction(nifti_folder, patients_list, fraction, img_name, mask_type, params_file_name)
output_df.to_csv(output_file_path, index=True)

GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Avera

### F4

In [16]:
fraction = 'F4'
output_file_path = '/home/tachennf/Documents/delta-rad/data/Madrid/raw_csv/' + fraction + '_' + mask_type + '_extracted_features.csv'

nifti_folder = "/home/tachennf/Documents/delta-rad/data/Madrid/nifti_data/"
patients_list = os.listdir(nifti_folder)
output_df = rad_extraction(nifti_folder, patients_list, fraction, img_name, mask_type, params_file_name)
output_df.to_csv(output_file_path, index=True)

GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Avera

### F5

In [17]:
fraction = 'F5'
output_file_path = '/home/tachennf/Documents/delta-rad/data/Madrid/raw_csv/' + fraction + '_' + mask_type + '_extracted_features.csv'

nifti_folder = "/home/tachennf/Documents/delta-rad/data/Madrid/nifti_data/"
patients_list = os.listdir(nifti_folder)
output_df = rad_extraction(nifti_folder, patients_list, fraction, img_name, mask_type, params_file_name)
output_df.to_csv(output_file_path, index=True)

GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Avera

# Filter raw csv
## Repeatability analysis ICM

In [37]:
repeat_file = "/home/tachennf/Documents/delta-rad/data/ICM_0.35T/filtered_features_gtv.xlsx"
repeatable_features_gtv_df = pd.read_excel(repeat_file, index_col=0)
repeatable_features_gtv_df.head()

Unnamed: 0_level_0,original_shape_Elongation,original_shape_Flatness,original_shape_LeastAxisLength,original_shape_MajorAxisLength,original_shape_Maximum2DDiameterColumn,original_shape_Maximum2DDiameterRow,original_shape_Maximum2DDiameterSlice,original_shape_Maximum3DDiameter,original_shape_MeshVolume,original_shape_MinorAxisLength,...,original_ngtdm_Coarseness,original_ngtdm_Contrast,Patient Rxmic,Fraction,Volume,Récidive Locale,Récidive Méta,Décès,Récidive,Évènement
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Patient38_IRM_simu_mridian_gtv,0.431793,0.370555,24.678556,66.598953,53.068955,57.813322,47.606047,68.904864,24300.18231,28.756943,...,0.001856,0.175404,Patient 38,Simulation mridian,GTV,0,0,1,0,1
Patient38_mridian_ttt_2_gtv,0.442144,0.371772,24.636047,66.266497,52.185463,57.813322,47.606047,69.633631,24369.47426,29.299306,...,0.001997,0.128942,Patient 38,2,GTV,0,0,1,0,1
Patient38_mridian_ttt_4_gtv,0.442374,0.371341,24.625888,66.316118,53.068955,57.813322,47.606047,68.904864,24464.7507,29.336497,...,0.002107,0.153118,Patient 38,4,GTV,0,0,1,0,1
Patient38_mridian_ttt_5_gtv,0.442374,0.371341,24.625888,66.316118,53.068955,57.813322,47.606047,68.904864,24464.7507,29.336497,...,0.002032,0.169598,Patient 38,5,GTV,0,0,1,0,1
Patient10_IRM_simu_GIE_gtv,0.672158,0.549602,25.401274,46.217551,46.704549,54.477126,38.744658,54.865907,18814.74989,31.065518,...,0.003128,0.828998,Patient 10,Simulation GIE,GTV,0,1,1,1,1


In [38]:
repeatable_features = repeatable_features_gtv_df.columns[:-8]
print('Number of repeatable features:', len(repeatable_features))

Number of repeatable features: 81


## Reproducibility analysis ICM

In [39]:
icc_file = "/home/tachennf/Documents/delta-rad/data/ICM_0.35T/results_icc.csv"
icc_df = pd.read_csv(icc_file)
icc_df.head()

Unnamed: 0,Feature,ICC
0,original_shape_SurfaceArea,0.964
1,original_firstorder_RootMeanSquared,0.316
2,original_shape_SurfaceVolumeRatio,0.985
3,original_shape_Maximum2DDiameterColumn,0.985
4,original_glcm_Imc2,0.826


In [40]:
threshold = 0.75
reproductible_features = icc_df[icc_df['ICC'] >= threshold]

In [41]:
filtered_features = list(set(repeatable_features) & set(reproductible_features['Feature']))


## Filtered 
### SIMU GTV

In [42]:
simu_df = pd.read_csv('/home/tachennf/Documents/delta-rad/data/Madrid/raw_csv/SIMU_GTV_extracted_features.csv', index_col=0)
filtered_simu_df = simu_df[filtered_features]
filtered_simu_df.to_csv('/home/tachennf/Documents/delta-rad/data/Madrid/filtered_csv/simu_gtv.csv', index=True)
filtered_simu_df.head(5)

Unnamed: 0,original_shape_Sphericity,original_glrlm_LongRunEmphasis,original_glrlm_RunLengthNonUniformityNormalized,original_gldm_DependenceNonUniformity,original_gldm_GrayLevelNonUniformity,original_glcm_SumEntropy,original_shape_Maximum2DDiameterSlice,original_glcm_Correlation,original_shape_Maximum2DDiameterColumn,original_ngtdm_Coarseness,...,original_shape_LeastAxisLength,original_firstorder_Entropy,original_glcm_Idm,original_glszm_ZoneEntropy,original_glrlm_RunVariance,original_glrlm_RunEntropy,original_glrlm_RunPercentage,original_shape_Maximum3DDiameter,original_glrlm_RunLengthNonUniformity,original_shape_Flatness
Mont10,0.681037,1.240127,0.867236,282.84993,55.507714,5.684689,30.450913,0.749476,38.641659,0.009337,...,17.135989,4.822023,0.229505,6.656038,0.081036,5.189219,0.92928,39.187858,1149.866,0.539026
Mont39,0.774345,1.661803,0.748215,56.367041,65.247191,4.169496,18.584859,0.533214,22.170397,0.01722,...,10.703229,3.493069,0.420449,5.566407,0.251577,4.231849,0.846442,23.844844,339.649095,0.503594
Mont38,0.752608,1.687161,0.733044,135.528213,124.670846,4.389333,26.28296,0.597222,28.466726,0.007389,...,17.172183,3.669701,0.405943,6.152571,0.258689,4.415851,0.840728,30.668267,789.439152,0.720539
Mont1,0.689326,1.37101,0.822783,1577.043466,546.024051,5.475522,49.332752,0.80361,58.316653,0.001078,...,35.181256,4.604248,0.277431,7.105892,0.133726,5.135233,0.900133,63.234757,7676.606275,0.642585
Mont32,0.631497,1.457574,0.792842,2553.441751,1054.022527,5.202837,78.78146,0.712656,63.4026,0.000632,...,40.949026,4.343913,0.311482,7.041696,0.165527,4.944509,0.881327,81.320502,13049.1997,0.654236


### F1

In [43]:
f1_df = pd.read_csv('/home/tachennf/Documents/delta-rad/data/Madrid/raw_csv/F1_GTV_extracted_features.csv', index_col=0)
filtered_f1_df = f1_df[filtered_features]
filtered_f1_df.to_csv('/home/tachennf/Documents/delta-rad/data/Madrid/filtered_csv/f1_gtv.csv', index=True)
filtered_f1_df.head(5)

Unnamed: 0,original_shape_Sphericity,original_glrlm_LongRunEmphasis,original_glrlm_RunLengthNonUniformityNormalized,original_gldm_DependenceNonUniformity,original_gldm_GrayLevelNonUniformity,original_glcm_SumEntropy,original_shape_Maximum2DDiameterSlice,original_glcm_Correlation,original_shape_Maximum2DDiameterColumn,original_ngtdm_Coarseness,...,original_shape_LeastAxisLength,original_firstorder_Entropy,original_glcm_Idm,original_glszm_ZoneEntropy,original_glrlm_RunVariance,original_glrlm_RunEntropy,original_glrlm_RunPercentage,original_shape_Maximum3DDiameter,original_glrlm_RunLengthNonUniformity,original_shape_Flatness
Mont10,0.676434,1.366571,0.824211,227.7109,95.866622,4.960418,33.004984,0.612915,39.693094,0.006769,...,16.939371,4.266198,0.296037,6.406298,0.132069,4.778233,0.90063,39.993172,1097.228374,0.551064
Mont39,0.754164,1.266766,0.865918,100.419355,18.407258,5.769836,18.584859,0.841954,21.929407,0.035786,...,9.662655,4.957776,0.247617,6.436004,0.09464,5.284771,0.925868,23.166653,398.692953,0.475863
Mont38,0.737445,1.959555,0.670898,68.720183,123.149083,3.979632,23.338048,0.730832,26.981301,0.010735,...,13.548696,3.31893,0.498227,5.644389,0.363488,4.298019,0.794725,28.837644,466.681449,0.56281
Mont1,0.691686,1.498829,0.782794,1493.445121,948.719036,4.649499,47.717537,0.566427,59.086095,0.000765,...,37.022867,3.900546,0.331288,6.65997,0.182952,4.546411,0.874261,63.066467,7882.479576,0.662501
Mont32,0.643833,1.44195,0.798924,2446.589085,965.085489,5.284571,80.943903,0.732849,62.473883,0.000682,...,40.172991,4.411955,0.307003,6.973166,0.161227,5.001305,0.884831,81.874042,12502.033858,0.648594


### F2

In [44]:
f2_df = pd.read_csv('/home/tachennf/Documents/delta-rad/data/Madrid/raw_csv/F2_GTV_extracted_features.csv', index_col=0)
filtered_f2_df = f2_df[filtered_features]
filtered_f2_df.to_csv('/home/tachennf/Documents/delta-rad/data/Madrid/filtered_csv/f2_gtv.csv', index=True)
filtered_f2_df.head(5)

Unnamed: 0,original_shape_Sphericity,original_glrlm_LongRunEmphasis,original_glrlm_RunLengthNonUniformityNormalized,original_gldm_DependenceNonUniformity,original_gldm_GrayLevelNonUniformity,original_glcm_SumEntropy,original_shape_Maximum2DDiameterSlice,original_glcm_Correlation,original_shape_Maximum2DDiameterColumn,original_ngtdm_Coarseness,...,original_shape_LeastAxisLength,original_firstorder_Entropy,original_glcm_Idm,original_glszm_ZoneEntropy,original_glrlm_RunVariance,original_glrlm_RunEntropy,original_glrlm_RunPercentage,original_shape_Maximum3DDiameter,original_glrlm_RunLengthNonUniformity,original_shape_Flatness
Mont10,0.70734,1.260043,0.86507,288.051771,74.53951,5.259862,33.004984,0.580974,39.39073,0.006195,...,16.942794,4.553842,0.232548,6.462259,0.092058,4.948113,0.925854,39.525399,1176.508662,0.559049
Mont39,0.73587,1.226722,0.882091,88.657568,22.255583,5.091404,16.622804,0.524065,21.2526,0.022517,...,11.21242,4.351479,0.228743,5.897988,0.081361,4.655861,0.935866,22.230236,333.186471,0.585944
Mont38,0.774195,1.644071,0.738673,97.853007,84.534521,4.499194,20.423841,0.718811,24.45,0.012692,...,14.907314,3.695888,0.411581,5.914717,0.233181,4.410934,0.846753,26.333456,564.397601,0.656721
Mont1,0.709575,1.68786,0.728806,1131.889952,1106.526785,4.343008,47.044635,0.580109,58.362195,0.000851,...,36.409011,3.56496,0.39825,6.430057,0.254731,4.355652,0.838425,62.410058,6533.616679,0.675162
Mont32,0.651377,1.524802,0.773657,2341.528504,1055.304544,5.228857,80.268191,0.77383,62.473883,0.000692,...,40.613126,4.345792,0.336727,7.015145,0.193831,4.999337,0.868121,84.209656,12553.060148,0.63964


### F3

In [45]:
f3_df = pd.read_csv('/home/tachennf/Documents/delta-rad/data/Madrid/raw_csv/F3_GTV_extracted_features.csv', index_col=0)
filtered_f3_df = f3_df[filtered_features]   
filtered_f3_df.to_csv('/home/tachennf/Documents/delta-rad/data/Madrid/filtered_csv/f3_gtv.csv', index=True)
filtered_f3_df.head(5)

Unnamed: 0,original_shape_Sphericity,original_glrlm_LongRunEmphasis,original_glrlm_RunLengthNonUniformityNormalized,original_gldm_DependenceNonUniformity,original_gldm_GrayLevelNonUniformity,original_glcm_SumEntropy,original_shape_Maximum2DDiameterSlice,original_glcm_Correlation,original_shape_Maximum2DDiameterColumn,original_ngtdm_Coarseness,...,original_shape_LeastAxisLength,original_firstorder_Entropy,original_glcm_Idm,original_glszm_ZoneEntropy,original_glrlm_RunVariance,original_glrlm_RunEntropy,original_glrlm_RunPercentage,original_shape_Maximum3DDiameter,original_glrlm_RunLengthNonUniformity,original_shape_Flatness
Mont10,0.696081,1.268186,0.859475,318.878499,85.980345,5.239984,36.629694,0.598448,39.693094,0.005896,...,17.897686,4.517293,0.24241,6.62544,0.093503,4.926966,0.923214,40.521157,1333.175887,0.569298
Mont39,0.785173,1.247085,0.870572,111.702899,27.786232,5.248832,18.584859,0.630953,21.2526,0.018968,...,11.727629,4.515172,0.242834,6.284128,0.08596,4.863974,0.929627,23.109238,447.522391,0.585858
Mont38,0.768245,1.97791,0.665785,63.180645,126.223226,3.839393,24.231692,0.672811,24.88087,0.010922,...,14.042114,3.080811,0.508333,5.434153,0.36608,4.061327,0.792953,27.372355,411.557602,0.596775
Mont1,0.713131,1.448869,0.794542,1370.77069,580.084812,5.130144,45.901217,0.712587,57.02671,0.001093,...,34.782342,4.270143,0.311252,6.958285,0.16045,4.859012,0.883365,61.961444,6860.439607,0.637644
Mont32,0.637624,1.434831,0.801524,2964.780474,1014.137084,5.420063,81.124234,0.772222,64.048838,0.000628,...,42.230031,4.536483,0.302946,7.124642,0.158433,5.112643,0.886368,83.464914,14893.446536,0.655197


### F4

In [46]:
f4_df = pd.read_csv('/home/tachennf/Documents/delta-rad/data/Madrid/raw_csv/F4_GTV_extracted_features.csv', index_col=0)
filtered_f4_df = f4_df[filtered_features]
filtered_f4_df.to_csv('/home/tachennf/Documents/delta-rad/data/Madrid/filtered_csv/f4_gtv.csv', index=True)
filtered_f4_df.head(5)

Unnamed: 0,original_shape_Sphericity,original_glrlm_LongRunEmphasis,original_glrlm_RunLengthNonUniformityNormalized,original_gldm_DependenceNonUniformity,original_gldm_GrayLevelNonUniformity,original_glcm_SumEntropy,original_shape_Maximum2DDiameterSlice,original_glcm_Correlation,original_shape_Maximum2DDiameterColumn,original_ngtdm_Coarseness,...,original_shape_LeastAxisLength,original_firstorder_Entropy,original_glcm_Idm,original_glszm_ZoneEntropy,original_glrlm_RunVariance,original_glrlm_RunEntropy,original_glrlm_RunPercentage,original_shape_Maximum3DDiameter,original_glrlm_RunLengthNonUniformity,original_shape_Flatness
Mont10,0.703437,1.600607,0.741491,156.514695,126.76639,4.431575,31.480529,0.618228,37.912833,0.007557,...,16.335832,3.623408,0.402823,6.180073,0.211956,4.335299,0.850733,38.226888,839.145284,0.53794
Mont39,0.769709,1.183192,0.894356,174.131054,28.82906,5.542182,19.078661,0.613196,24.066662,0.013984,...,12.183614,4.792842,0.198425,6.415944,0.061224,5.073095,0.944664,25.409185,593.527116,0.530051
Mont38,0.77962,1.801518,0.707597,61.229318,98.573295,4.005392,26.28296,0.516978,23.733158,0.01039,...,11.30789,3.309615,0.457155,5.651537,0.304276,4.183516,0.820587,27.7579,401.343371,0.469898
Mont1,0.692931,1.541799,0.770094,1190.233647,747.735214,4.746066,47.939738,0.638401,57.629203,0.001012,...,34.631655,3.932964,0.348147,6.645762,0.200435,4.600207,0.866,61.918549,6397.790453,0.636723
Mont32,0.622666,1.489186,0.782775,2516.324634,1063.924697,5.230818,81.5,0.746545,66.491214,0.000665,...,41.557741,4.353876,0.32563,7.079818,0.178008,4.979509,0.874754,83.209864,13087.473285,0.664686


### F5

In [47]:
f5_df = pd.read_csv('/home/tachennf/Documents/delta-rad/data/Madrid/raw_csv/F5_GTV_extracted_features.csv', index_col=0)
filtered_f5_df = f5_df[filtered_features]
filtered_f5_df.to_csv('/home/tachennf/Documents/delta-rad/data/Madrid/filtered_csv/f5_gtv.csv', index=True) 
filtered_f5_df.head(5)

Unnamed: 0,original_shape_Sphericity,original_glrlm_LongRunEmphasis,original_glrlm_RunLengthNonUniformityNormalized,original_gldm_DependenceNonUniformity,original_gldm_GrayLevelNonUniformity,original_glcm_SumEntropy,original_shape_Maximum2DDiameterSlice,original_glcm_Correlation,original_shape_Maximum2DDiameterColumn,original_ngtdm_Coarseness,...,original_shape_LeastAxisLength,original_firstorder_Entropy,original_glcm_Idm,original_glszm_ZoneEntropy,original_glrlm_RunVariance,original_glrlm_RunEntropy,original_glrlm_RunPercentage,original_shape_Maximum3DDiameter,original_glrlm_RunLengthNonUniformity,original_shape_Flatness
Mont10,0.689727,1.385631,0.814441,189.332248,86.728013,4.812427,30.927076,0.622876,37.912833,0.008317,...,15.769588,4.072973,0.317888,6.362329,0.135732,4.599904,0.895828,38.226888,897.461307,0.513664
Mont39,0.782041,1.21212,0.880777,152.708571,36.274286,5.231041,19.078661,0.517001,22.170397,0.013112,...,13.216196,4.472781,0.215604,6.122315,0.070814,4.79678,0.937253,23.677118,578.710673,0.657247
Mont38,0.752497,2.105158,0.644825,56.983957,90.663102,4.345576,26.28296,0.915249,26.981301,0.016515,...,12.438752,3.583452,0.53119,5.695592,0.414775,4.603611,0.776429,31.5227,377.412627,0.460048
Mont1,0.701623,1.69169,0.731953,1148.392684,930.53186,4.654728,50.556283,0.6831,60.661409,0.000973,...,35.311028,3.845724,0.393456,6.621979,0.261411,4.628658,0.839404,62.728533,6722.042876,0.63997
Mont32,0.640953,1.351174,0.828388,3051.322268,785.882518,5.601014,80.268191,0.79263,63.4026,0.000663,...,40.329935,4.712799,0.266644,7.266024,0.124697,5.213823,0.904169,82.488308,13916.71169,0.645964


# Delta-radiomics

## F1/F2

In [48]:
madrid_path = '/home/tachennf/Documents/delta-rad/data/Madrid/filtered_csv/'

f1_df = pd.read_csv(madrid_path + 'f1_gtv.csv', index_col=0) # load first table 
f2_df = pd.read_csv(madrid_path + 'f2_gtv.csv', index_col=0) # load second table
f1_f2_df = pd.DataFrame(index=f1_df.index, columns=f1_df.columns) # create delta rad table with same columns and index as first table
for col in f1_df.columns:
    f1_f2_df[col] = (f2_df[col] - f1_df[col]) / f1_df[col] # calculate the difference and assign it to the delta rad table
f1_f2_df.to_csv(madrid_path + 'f1_f2_gtv.csv', index=True) # save the delta rad table

## F1/F3

In [49]:
f3_df = pd.read_csv(madrid_path + 'f3_gtv.csv', index_col=0) # load third table
f1_f3_df = pd.DataFrame(index=f1_df.index, columns=f1_df.columns) # create delta rad table with same columns and index as first table
for col in f1_df.columns:
    f1_f3_df[col] = (f3_df[col] - f1_df[col]) / f1_df[col] # calculate the difference and assign it to the delta rad table
f1_f3_df.to_csv(madrid_path + 'f1_f3_gtv.csv', index=True) # save the delta rad table

## F1/F4

In [50]:
f4_df = pd.read_csv(madrid_path + 'f4_gtv.csv', index_col=0) # load fourth table
f1_f4_df = pd.DataFrame(index=f1_df.index, columns=f1_df.columns) # create delta rad table with same columns and index as first table
for col in f1_df.columns:
    f1_f4_df[col] = (f4_df[col] - f1_df[col]) / f1_df[col] # calculate the difference and assign it to the delta rad table
f1_f4_df.to_csv(madrid_path + 'f1_f4_gtv.csv', index=True) # save the delta rad table

## F1/F5

In [51]:
f5_df = pd.read_csv(madrid_path + 'f5_gtv.csv', index_col=0) # load fifth table
f1_f5_df = pd.DataFrame(index=f1_df.index, columns=f1_df.columns) # create delta rad table with same columns and index as first table
for col in f1_df.columns:
    f1_f5_df[col] = (f5_df[col] - f1_df[col]) / f1_df[col] # calculate the difference and assign it to the delta rad table
f1_f5_df.to_csv(madrid_path + 'f1_f5_gtv.csv', index=True) # save the delta rad table

## F2/F3

In [52]:
f2_f3_df = pd.DataFrame(index=f2_df.index, columns=f2_df.columns) # create delta rad table with same columns and index as first table
for col in f2_df.columns:
    f2_f3_df[col] = (f3_df[col] - f2_df[col]) / f2_df[col] # calculate the difference and assign it to the delta rad table
f2_f3_df.to_csv(madrid_path + 'f2_f3_gtv.csv', index=True) # save the delta rad table

## SIMU/F1

In [53]:
simu_df = pd.read_csv(madrid_path + 'simu_gtv.csv', index_col=0) # load first table
simu_f1_df = pd.DataFrame(index=f1_df.index, columns=f1_df.columns) # create delta rad table with same columns and index as first table
for col in f1_df.columns:
    simu_f1_df[col] = (f1_df[col] - simu_df[col]) / simu_df[col] # calculate the difference and assign it to the delta rad table
simu_f1_df.to_csv(madrid_path + 'simu_f1_gtv.csv', index=True) # save the delta rad table

## SIMU/F3

In [54]:
simu_f3_df = pd.DataFrame(index=f3_df.index, columns=f3_df.columns) # create delta rad table with same columns and index as first table
for col in f3_df.columns:
    simu_f3_df[col] = (f3_df[col] - simu_df[col]) / simu_df[col] # calculate the difference and assign it to the delta rad table
simu_f3_df.to_csv(madrid_path + 'simu_f3_gtv.csv', index=True) # save the delta rad table

## SIMU/F5

In [55]:
simu_f5_df = pd.DataFrame(index=f5_df.index, columns=f5_df.columns) # create delta rad table with same columns and index as first table
for col in f5_df.columns:
    simu_f5_df[col] = (f5_df[col] - simu_df[col]) / simu_df[col] # calculate the difference and assign it to the delta rad table
simu_f5_df.to_csv(madrid_path + 'simu_f5_gtv.csv', index=True) # save the delta rad table