# Radiomics csv extraction
Create csv file by fraction (or delta-fraction) from 'filtered_features_gtv.xlsx'

In [48]:
import pandas as pd 
import numpy as np

# Clinical data

In [49]:
clinical_data_df = pd.read_csv('/home/tachennf/Documents/delta-rad/data/ICM_0.35T/clinical_data_clean.csv', index_col=0, sep=';')
clinical_data_df

Unnamed: 0,Date diagnostic,Date debut chimio,date fin de ttt RT,Date de recidive locale,Situation de la recidive locale,Date de recidive méta,Date de deces,Cause de Deces,Date dernières nouvelles
1403778,13/11/2018,01/01/2019,28/10/2019,,,18/12/2019,28/05/2020,2,
1906503,21/03/2019,01/04/2019,19/11/2019,01/07/2020,1.0,15/04/2021,27/08/2021,2,
1904130,04/07/2019,10/07/2019,11/12/2019,,,18/09/2020,,,06/04/2023
1905976,22/01/2019,01/02/2019,16/10/2019,,,06/03/2020,06/08/2020,5,
1907649,28/05/2019,13/06/2019,08/01/2020,,,30/04/2020,01/08/2020,2,
...,...,...,...,...,...,...,...,...,...
2204364,17/01/2022,05/02/2022,27/07/2022,,,,,,04/04/2023
2201397,16/03/2022,06/04/2022,05/08/2022,,,27/09/2022,,,28/02/2023
2204013,02/12/2021,15/01/2022,05/08/2022,,,,,,06/03/2023
2102092,23/02/2021,25/03/2021,01/09/2021,02/12/2021,,,06/04/2022,1,


In [50]:
print(clinical_data_df['Situation de la recidive locale '].value_counts())

Situation de la recidive locale 
1.0    3
2.0    3
Name: count, dtype: int64


## Transform to date format

In [51]:
date_outcome_df = pd.DataFrame(index=clinical_data_df.index, columns=['date fin de ttt RT', 'Date de recidive locale '])
date_outcome_df['date fin de ttt RT'] = pd.to_datetime(clinical_data_df['date fin de ttt RT'], format="%d/%m/%Y", errors='coerce')
date_outcome_df['Date de recidive locale '] = pd.to_datetime(clinical_data_df['Date de recidive locale '], format="%d/%m/%Y", errors='coerce')
date_outcome_df['Date dernières nouvelles'] = pd.to_datetime(clinical_data_df['Date dernières nouvelles'], format="%d/%m/%Y", errors='coerce')
date_outcome_df.head()

Unnamed: 0,date fin de ttt RT,Date de recidive locale,Date dernières nouvelles
1403778,2019-10-28,NaT,NaT
1906503,2019-11-19,2020-07-01,NaT
1904130,2019-12-11,NaT,2023-04-06
1905976,2019-10-16,NaT,NaT
1907649,2020-01-08,NaT,NaT


## One year LC status

In [52]:
date_outcome_df['difference end RT and local recurrence'] = (date_outcome_df['Date de recidive locale '] - date_outcome_df['date fin de ttt RT']).dt.days
one_year_in_days = 365 

In [53]:
date_outcome_df['one_year_status'] = date_outcome_df['difference end RT and local recurrence'] <= one_year_in_days
date_outcome_df

Unnamed: 0,date fin de ttt RT,Date de recidive locale,Date dernières nouvelles,difference end RT and local recurrence,one_year_status
1403778,2019-10-28,NaT,NaT,,False
1906503,2019-11-19,2020-07-01,NaT,225.0,True
1904130,2019-12-11,NaT,2023-04-06,,False
1905976,2019-10-16,NaT,NaT,,False
1907649,2020-01-08,NaT,NaT,,False
...,...,...,...,...,...
2204364,2022-07-27,NaT,2023-04-04,,False
2201397,2022-08-05,NaT,2023-02-28,,False
2204013,2022-08-05,NaT,2023-03-06,,False
2102092,2021-09-01,2021-12-02,NaT,92.0,True


In [54]:
print(date_outcome_df['one_year_status'].value_counts())

one_year_status
False    77
True     14
Name: count, dtype: int64


### Follow-up

In [55]:
date_outcome_df['difference today and end RT'] = (date_outcome_df['Date dernières nouvelles'] - date_outcome_df['date fin de ttt RT']).dt.days
date_outcome_df

Unnamed: 0,date fin de ttt RT,Date de recidive locale,Date dernières nouvelles,difference end RT and local recurrence,one_year_status,difference today and end RT
1403778,2019-10-28,NaT,NaT,,False,
1906503,2019-11-19,2020-07-01,NaT,225.0,True,
1904130,2019-12-11,NaT,2023-04-06,,False,1212.0
1905976,2019-10-16,NaT,NaT,,False,
1907649,2020-01-08,NaT,NaT,,False,
...,...,...,...,...,...,...
2204364,2022-07-27,NaT,2023-04-04,,False,251.0
2201397,2022-08-05,NaT,2023-02-28,,False,207.0
2204013,2022-08-05,NaT,2023-03-06,,False,213.0
2102092,2021-09-01,2021-12-02,NaT,92.0,True,


In [56]:
date_outcome_df.sort_index(inplace=True)
date_outcome_df

Unnamed: 0,date fin de ttt RT,Date de recidive locale,Date dernières nouvelles,difference end RT and local recurrence,one_year_status,difference today and end RT
400444,2022-04-20,NaT,NaT,,False,
601828,2022-07-19,NaT,NaT,,False,
1000305,2022-05-23,2022-10-05,2023-04-04,135.0,True,316.0
1104589,2020-05-25,NaT,NaT,,False,
1403778,2019-10-28,NaT,NaT,,False,
...,...,...,...,...,...,...
8203368,2021-09-17,NaT,2023-02-07,,False,508.0
8403324,2021-09-08,NaT,NaT,,False,
8502745,2021-04-14,NaT,NaT,,False,
9604419,2021-08-27,NaT,NaT,,False,


In [57]:
date_outcome_df.to_csv('/home/tachennf/Documents/delta-rad/data/ICM_0.35T/date_outcome_df.csv', sep=';', index=True)

### Patient ID

In [58]:
liste_patients_df = pd.read_csv('/home/tachennf/Documents/delta-rad/data/ICM_0.35T/liste_id_patients.csv', index_col=0, sep=';')
liste_patients_df

Unnamed: 0,ID ICM
Patient 1,1403778
Patient 2,1906503
Patient 3,1904130
Patient 4,1905976
Patient 5,1904380
...,...
Patient 82,1705579
Patient 83,2204364
Patient 84,2201397
Patient 85,2204013


In [59]:
liste_patients_df.sort_values(by='ID ICM', inplace=True)
liste_patients_df

Unnamed: 0,ID ICM
Patient 73,400444
Patient 81,601828
Patient 76,1000305
Patient 16,1104589
Patient 1,1403778
...,...
Patient 85,2204013
Patient 83,2204364
Patient 52,8203368
Patient 50,8403324


In [63]:
# remove patient in clinical_data_df that are not in liste_patients_df
filtered_date_outcome_df = date_outcome_df[date_outcome_df.index.isin(liste_patients_df['ID ICM'])]
filtered_date_outcome_df.to_csv('/home/tachennf/Documents/delta-rad/data/ICM_0.35T/filtered_date_outcome_df.csv', sep=';', index=True)

In [64]:
# remove the patients of liste_patients_df that are not in clinical_data_df
filtered_liste_patients_df = liste_patients_df[liste_patients_df['ID ICM'].isin(filtered_date_outcome_df.index)]

In [65]:
# for each patient in date_outcome_df, get his index name in liste_patients_df
filtered_date_outcome_df.index = filtered_liste_patients_df.index 
filtered_date_outcome_df

Unnamed: 0,date fin de ttt RT,Date de recidive locale,Date dernières nouvelles,difference end RT and local recurrence,one_year_status,difference today and end RT
Patient 73,2022-04-20,NaT,NaT,,False,
Patient 81,2022-07-19,NaT,NaT,,False,
Patient 76,2022-05-23,2022-10-05,2023-04-04,135.0,True,316.0
Patient 16,2020-05-25,NaT,NaT,,False,
Patient 1,2019-10-28,NaT,NaT,,False,
...,...,...,...,...,...,...
Patient 85,2022-08-05,NaT,2023-03-06,,False,213.0
Patient 83,2022-07-27,NaT,2023-04-04,,False,251.0
Patient 52,2021-09-17,NaT,2023-02-07,,False,508.0
Patient 50,2021-09-08,NaT,NaT,,False,


In [66]:
# create one dataframe with the index filtered_date_outcome_df and the one_year_status (but binary 0 or 1 instead of True and False) 
one_year_status_df = pd.DataFrame(index=filtered_date_outcome_df.index, columns=['one_year_status'])
one_year_status_df['one_year_status'] = filtered_date_outcome_df['one_year_status'].astype(int)
one_year_status_df

Unnamed: 0,one_year_status
Patient 73,0
Patient 81,0
Patient 76,1
Patient 16,0
Patient 1,0
...,...
Patient 85,0
Patient 83,0
Patient 52,0
Patient 50,0


In [67]:
print(one_year_status_df['one_year_status'].value_counts())

one_year_status
0    71
1    14
Name: count, dtype: int64


In [68]:
one_year_status_df.to_csv('/home/tachennf/Documents/delta-rad/data/ICM_0.35T/extracted_radiomics/outcomes.csv', sep=',', index=True)

## Repeatability analysis

In [2]:
repeat_file = "/home/tachennf/Documents/delta-rad/data/ICM_0.35T/filtered_features_gtv.xlsx"
repeatable_features_gtv_df = pd.read_excel(repeat_file, index_col=0)
repeatable_features_gtv_df.head()

Unnamed: 0_level_0,original_shape_Elongation,original_shape_Flatness,original_shape_LeastAxisLength,original_shape_MajorAxisLength,original_shape_Maximum2DDiameterColumn,original_shape_Maximum2DDiameterRow,original_shape_Maximum2DDiameterSlice,original_shape_Maximum3DDiameter,original_shape_MeshVolume,original_shape_MinorAxisLength,...,original_ngtdm_Coarseness,original_ngtdm_Contrast,Patient Rxmic,Fraction,Volume,Récidive Locale,Récidive Méta,Décès,Récidive,Évènement
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Patient38_IRM_simu_mridian_gtv,0.431793,0.370555,24.678556,66.598953,53.068955,57.813322,47.606047,68.904864,24300.18231,28.756943,...,0.001856,0.175404,Patient 38,Simulation mridian,GTV,0,0,1,0,1
Patient38_mridian_ttt_2_gtv,0.442144,0.371772,24.636047,66.266497,52.185463,57.813322,47.606047,69.633631,24369.47426,29.299306,...,0.001997,0.128942,Patient 38,2,GTV,0,0,1,0,1
Patient38_mridian_ttt_4_gtv,0.442374,0.371341,24.625888,66.316118,53.068955,57.813322,47.606047,68.904864,24464.7507,29.336497,...,0.002107,0.153118,Patient 38,4,GTV,0,0,1,0,1
Patient38_mridian_ttt_5_gtv,0.442374,0.371341,24.625888,66.316118,53.068955,57.813322,47.606047,68.904864,24464.7507,29.336497,...,0.002032,0.169598,Patient 38,5,GTV,0,0,1,0,1
Patient10_IRM_simu_GIE_gtv,0.672158,0.549602,25.401274,46.217551,46.704549,54.477126,38.744658,54.865907,18814.74989,31.065518,...,0.003128,0.828998,Patient 10,Simulation GIE,GTV,0,1,1,1,1


In [3]:
repeatable_features = repeatable_features_gtv_df.columns[:-8]
print('Number of repeatable features:', len(repeatable_features))

Number of repeatable features: 81


In [4]:
print(repeatable_features)

Index(['original_shape_Elongation', 'original_shape_Flatness',
       'original_shape_LeastAxisLength', 'original_shape_MajorAxisLength',
       'original_shape_Maximum2DDiameterColumn',
       'original_shape_Maximum2DDiameterRow',
       'original_shape_Maximum2DDiameterSlice',
       'original_shape_Maximum3DDiameter', 'original_shape_MeshVolume',
       'original_shape_MinorAxisLength', 'original_shape_Sphericity',
       'original_shape_SurfaceArea', 'original_shape_SurfaceVolumeRatio',
       'original_shape_VoxelVolume', 'original_firstorder_10Percentile',
       'original_firstorder_90Percentile', 'original_firstorder_Energy',
       'original_firstorder_Entropy', 'original_firstorder_InterquartileRange',
       'original_firstorder_Kurtosis', 'original_firstorder_Maximum',
       'original_firstorder_MeanAbsoluteDeviation', 'original_firstorder_Mean',
       'original_firstorder_Median', 'original_firstorder_Range',
       'original_firstorder_RobustMeanAbsoluteDeviation',
   

## Reproductibility analysis

In [7]:
icc_file = "/home/tachennf/Documents/delta-rad/data/ICM_0.35T/results_icc.csv"
icc_df = pd.read_csv(icc_file)
icc_df.head()

Unnamed: 0,Feature,ICC
0,original_shape_SurfaceArea,0.964
1,original_firstorder_RootMeanSquared,0.316
2,original_shape_SurfaceVolumeRatio,0.985
3,original_shape_Maximum2DDiameterColumn,0.985
4,original_glcm_Imc2,0.826


In [8]:
threshold = 0.75
reproductible_features = icc_df[icc_df['ICC'] >= threshold]
print('Number of reproductible features (with ICC >', threshold, ') :', len(reproductible_features))


Number of reproductible features (with ICC > 0.75 ) : 58


## Repeatable and reproducible features

In [9]:
# intersection between repeatable features and reproductible features
filtered_features = list(set(repeatable_features) & set(reproductible_features['Feature']))
print('Number of repetable and reproducible features:', len(filtered_features))

Number of repetable and reproducible features: 49


Out of 107 features, only 49 are kept so we remove 58 features. 

In [10]:
print(filtered_features)

['original_glcm_JointEntropy', 'original_glcm_Idn', 'original_glcm_Id', 'original_shape_Maximum2DDiameterRow', 'original_glcm_MCC', 'original_glrlm_ShortRunEmphasis', 'original_shape_VoxelVolume', 'original_glrlm_GrayLevelNonUniformity', 'original_glcm_Imc1', 'original_glcm_InverseVariance', 'original_shape_SurfaceVolumeRatio', 'original_glcm_Imc2', 'original_gldm_LargeDependenceEmphasis', 'original_shape_Maximum3DDiameter', 'original_glrlm_RunLengthNonUniformityNormalized', 'original_gldm_DependenceNonUniformity', 'original_shape_Flatness', 'original_shape_Maximum2DDiameterColumn', 'original_glcm_Correlation', 'original_shape_MajorAxisLength', 'original_glcm_MaximumProbability', 'original_shape_MeshVolume', 'original_shape_LeastAxisLength', 'original_firstorder_Entropy', 'original_glcm_Idmn', 'original_firstorder_Uniformity', 'original_glrlm_RunEntropy', 'original_glcm_SumEntropy', 'original_shape_Elongation', 'original_shape_SurfaceArea', 'original_glszm_ZoneEntropy', 'original_glszm

## Filter table

In [11]:
filtered_gtv_df = repeatable_features_gtv_df[repeatable_features_gtv_df.columns[:-8]].loc[:, filtered_features]
filtered_gtv_df

Unnamed: 0_level_0,original_glcm_JointEntropy,original_glcm_Idn,original_glcm_Id,original_shape_Maximum2DDiameterRow,original_glcm_MCC,original_glrlm_ShortRunEmphasis,original_shape_VoxelVolume,original_glrlm_GrayLevelNonUniformity,original_glcm_Imc1,original_glcm_InverseVariance,...,original_glrlm_RunPercentage,original_glcm_Idm,original_shape_Sphericity,original_glrlm_GrayLevelNonUniformityNormalized,original_gldm_GrayLevelNonUniformity,original_shape_Maximum2DDiameterSlice,original_firstorder_Kurtosis,original_gldm_DependenceEntropy,original_glcm_JointEnergy,original_glszm_LargeAreaHighGrayLevelEmphasis
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Patient38_IRM_simu_mridian_gtv,9.296847,0.912290,0.286330,57.813322,0.657849,0.952777,24468.72055,190.926217,-0.111253,0.198082,...,0.936841,0.194175,0.607379,0.036070,205.558584,47.606047,2.792427,7.284733,0.002152,5938.112207
Patient38_mridian_ttt_2_gtv,8.881243,0.918255,0.322940,57.813322,0.689003,0.940297,24538.01250,217.844776,-0.122338,0.232458,...,0.920226,0.232382,0.609360,0.041780,239.276386,47.606047,3.013965,7.324008,0.002954,12300.061020
Patient38_mridian_ttt_4_gtv,8.965991,0.917098,0.329904,57.813322,0.759629,0.937656,24633.28894,200.358813,-0.146088,0.237727,...,0.916220,0.239814,0.610461,0.038445,219.398031,47.606047,2.555804,7.459851,0.002655,11179.429970
Patient38_mridian_ttt_5_gtv,9.331947,0.915969,0.298745,57.813322,0.730941,0.948086,24633.28894,179.600406,-0.135938,0.210590,...,0.930150,0.208133,0.610461,0.033948,193.812588,47.606047,2.595478,7.451735,0.002042,6318.359375
Patient10_IRM_simu_GIE_gtv,11.692852,0.914240,0.165475,54.477126,0.806036,0.980282,18968.67186,45.638871,-0.251375,0.095591,...,0.973497,0.091445,0.606230,0.010718,47.090535,38.744658,2.281913,8.129849,0.000381,18557.740680
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Patient21_mridian_ttt_1_gtv,9.417531,0.925365,0.292583,67.541819,0.698756,0.949313,46473.24606,343.800646,-0.115514,0.202774,...,0.932217,0.201275,0.533882,0.034367,372.545429,70.373728,2.937300,7.431735,0.002050,9207.704603
Patient21_mridian_ttt_2_gtv,9.673882,0.928125,0.291257,67.541819,0.735026,0.947406,49574.06091,327.195701,-0.131991,0.203683,...,0.929239,0.202278,0.636310,0.030759,357.278850,70.373728,3.333079,7.645271,0.001867,11987.990760
Patient21_mridian_ttt_3_gtv,9.363763,0.928359,0.303768,67.541819,0.722325,0.944065,50310.28790,371.783880,-0.126162,0.217034,...,0.924673,0.214623,0.638639,0.034608,409.237238,70.373728,3.497868,7.531429,0.002321,13047.518340
Patient21_mridian_ttt_4_gtv,9.384238,0.924847,0.301061,67.797028,0.716552,0.946298,49179.96293,358.299533,-0.122955,0.213320,...,0.928144,0.211062,0.534827,0.033993,391.036280,70.373728,2.693550,7.489155,0.002147,9061.905427


## Get patient list

In [12]:
patient_list = np.unique(repeatable_features_gtv_df['Patient Rxmic'].values)
print(patient_list.shape)

(83,)


3 patients were ruled out 

In [13]:
def extract_rad(extracted_feat_df, fraction: str, patient_list: list, features_list: list): 
    data = []
    error_patients = set()
    for patient in patient_list: 
        row_data = []
        try:
            line = extracted_feat_df.loc[(extracted_feat_df['Patient Rxmic'] == patient) & (extracted_feat_df['Fraction'] == fraction)].iloc[0]
            for feature in features_list:
                feature_value = line[feature]
                row_data.append(feature_value) # add the value to raw_data 
        except IndexError:
            for feature in features_list: 
                row_data.append(None)
            error_patients.add(patient)
            print("Error with patient ", patient)

        data.append(row_data)

    return data, error_patients

# Get features

## Simu features

In [14]:
data_simu, error_patients = extract_rad(repeatable_features_gtv_df, fraction='Simulation mridian', patient_list=patient_list, features_list=filtered_features)

Error with patient  Patient 17
Error with patient  Patient 18
Error with patient  Patient 19
Error with patient  Patient 21
Error with patient  Patient 22
Error with patient  Patient 23
Error with patient  Patient 24
Error with patient  Patient 32
Error with patient  Patient 57
Error with patient  Patient 66


Those are patients who did not have the MRIdian simulation. They got simulation at 1.5T instead. 

In [15]:
simu_gtv_df = pd.DataFrame(data_simu, index=patient_list, columns=filtered_features)
simu_gtv_df.head()

Unnamed: 0,original_glcm_JointEntropy,original_glcm_Idn,original_glcm_Id,original_shape_Maximum2DDiameterRow,original_glcm_MCC,original_glrlm_ShortRunEmphasis,original_shape_VoxelVolume,original_glrlm_GrayLevelNonUniformity,original_glcm_Imc1,original_glcm_InverseVariance,...,original_glrlm_RunPercentage,original_glcm_Idm,original_shape_Sphericity,original_glrlm_GrayLevelNonUniformityNormalized,original_gldm_GrayLevelNonUniformity,original_shape_Maximum2DDiameterSlice,original_firstorder_Kurtosis,original_gldm_DependenceEntropy,original_glcm_JointEnergy,original_glszm_LargeAreaHighGrayLevelEmphasis
Patient 1,9.459772,0.925074,0.319977,69.193452,0.850239,0.936791,57035.93799,412.358534,-0.161965,0.232486,...,0.914187,0.233873,0.620984,0.034244,465.737737,67.797028,2.86773,7.777353,0.002607,30809.85499
Patient 10,9.121655,0.913871,0.316878,56.582369,0.73692,0.944792,21173.02208,164.281722,-0.142684,0.231669,...,0.92583,0.226624,0.58077,0.036296,177.880753,36.629694,2.489548,7.393406,0.002365,8648.95427
Patient 11,7.260484,0.946192,0.435826,69.995168,0.647232,0.891533,51224.07552,829.237301,-0.109078,0.348995,...,0.853821,0.362001,0.657315,0.082086,1027.238079,49.332752,7.305087,7.068595,0.011753,547640.9543
Patient 12,8.203637,0.931228,0.379405,53.962602,0.769851,0.917521,29791.20861,325.866594,-0.150878,0.294375,...,0.890257,0.295542,0.728817,0.053204,379.236953,39.693094,4.218738,7.28995,0.005702,52709.72484
Patient 13,8.900018,0.926487,0.326043,60.133526,0.729234,0.937422,42164.15279,375.681497,-0.121388,0.241209,...,0.916069,0.237198,0.700884,0.042118,419.164955,49.494058,3.847257,7.376553,0.003239,18177.12628


In [16]:
simu_gtv_df.to_csv('/home/tachennf/Documents/delta-rad/data/ICM_0.35T/extracted_radiomics/simu_mridian_gtv.csv')

## F1 features

In [17]:
data_f1, error_patients = extract_rad(repeatable_features_gtv_df, fraction='1', patient_list=patient_list, features_list=filtered_features)

Error with patient  Patient 38
Error with patient  Patient 80


In [18]:
f1_gtv_df = pd.DataFrame(data_f1, index=patient_list, columns=filtered_features)
f1_gtv_df.to_csv('/home/tachennf/Documents/delta-rad/data/ICM_0.35T/extracted_radiomics/f1_gtv.csv')

## F2 features

In [19]:
data_f2, error_patients = extract_rad(repeatable_features_gtv_df, fraction='2', patient_list=patient_list, features_list=filtered_features)

Error with patient  Patient 62
Error with patient  Patient 74
Error with patient  Patient 80
Error with patient  Patient 82
Error with patient  Patient 84
Error with patient  Patient 85


In [20]:
f2_gtv_df = pd.DataFrame(data_f2, index=patient_list, columns=filtered_features)
f2_gtv_df.to_csv('/home/tachennf/Documents/delta-rad/data/ICM_0.35T/extracted_radiomics/f2_gtv.csv')

## F3 features

In [21]:
data_f3, error_patients = extract_rad(repeatable_features_gtv_df, fraction='3', patient_list=patient_list, features_list=filtered_features)

Error with patient  Patient 38
Error with patient  Patient 56
Error with patient  Patient 63
Error with patient  Patient 74


In [22]:
f3_gtv_df = pd.DataFrame(data_f3, index=patient_list, columns=filtered_features)
f3_gtv_df.to_csv('/home/tachennf/Documents/delta-rad/data/ICM_0.35T/extracted_radiomics/f3_gtv.csv')

## F4 features

In [23]:
data_f4, error_patients = extract_rad(repeatable_features_gtv_df, fraction='4', patient_list=patient_list, features_list=filtered_features)    

Error with patient  Patient 70
Error with patient  Patient 74
Error with patient  Patient 80
Error with patient  Patient 81


In [24]:
f4_gtv_df = pd.DataFrame(data_f4, index=patient_list, columns=filtered_features)
f4_gtv_df.to_csv('/home/tachennf/Documents/delta-rad/data/ICM_0.35T/extracted_radiomics/f4_gtv.csv')

## F5 features

In [25]:
data_f5, error_patients = extract_rad(repeatable_features_gtv_df, fraction='5', patient_list=patient_list, features_list=filtered_features)

Error with patient  Patient 54
Error with patient  Patient 61
Error with patient  Patient 64
Error with patient  Patient 66
Error with patient  Patient 71
Error with patient  Patient 79
Error with patient  Patient 86


In [26]:
f5_gtv_df = pd.DataFrame(data_f5, index=patient_list, columns=filtered_features)
f5_gtv_df.to_csv('/home/tachennf/Documents/delta-rad/data/ICM_0.35T/extracted_radiomics/f5_gtv.csv')

# Get delta-rad features

In [27]:
def extract_delta_rad(extracted_feat_df, fraction1: str, fraction2: str, patient_list: list, features_list: list): 
    '''Compute the delta radiomics features by computing ratio between two fractions. '''

    data = []
    error_patients = set()
    for patient in patient_list: 
        row_data = []
        try:
            line1 = extracted_feat_df.loc[(extracted_feat_df['Patient Rxmic'] == patient) & (extracted_feat_df['Fraction'] == fraction1)].iloc[0]
            line2 = extracted_feat_df.loc[(extracted_feat_df['Patient Rxmic'] == patient) & (extracted_feat_df['Fraction'] == fraction2)].iloc[0]
            for feature in features_list:
                feature_value1 = line1[feature]
                feature_value2 = line2[feature]
                delta_feature = feature_value2 / feature_value1 
                row_data.append(delta_feature) # add the value to raw_data 
        except IndexError:
            for feature in features_list: 
                row_data.append(None)
            error_patients.add(patient)
            print("Error with patient ", patient)

        data.append(row_data)

    return data, error_patients

def extract_delta_rad2(extracted_feat_df, fraction1: str, fraction2: str, patient_list: list, features_list: list): 
    '''Compute the delta radiomics features by computing the relative difference between two fractions. '''
    data = []
    error_patients = set()
    for patient in patient_list: 
        row_data = []
        try:
            line1 = extracted_feat_df.loc[(extracted_feat_df['Patient Rxmic'] == patient) & (extracted_feat_df['Fraction'] == fraction1)].iloc[0]
            line2 = extracted_feat_df.loc[(extracted_feat_df['Patient Rxmic'] == patient) & (extracted_feat_df['Fraction'] == fraction2)].iloc[0]
            for feature in features_list:
                feature_value1 = line1[feature]
                feature_value2 = line2[feature]
                delta_feature = (feature_value2 - feature_value1) / feature_value1
                row_data.append(delta_feature) # add the value to raw_data 
        except IndexError:
            for feature in features_list: 
                row_data.append(None)
            error_patients.add(patient)
            print("Error with patient ", patient)

        data.append(row_data)

    return data, error_patients

## Simu/F1

In [28]:
# relative difference technique
data_simu_f1, error_patients = extract_delta_rad2(repeatable_features_gtv_df, fraction1='Simulation mridian', fraction2='1', patient_list=patient_list, features_list=filtered_features)
simu_f1_gtv_df = pd.DataFrame(data_simu_f1, index=patient_list, columns=filtered_features)
simu_f1_gtv_df.to_csv('/home/tachennf/Documents/delta-rad/data/ICM_0.35T/extracted_radiomics/rd_simu_f1_gtv.csv')

Error with patient  Patient 17
Error with patient  Patient 18
Error with patient  Patient 19
Error with patient  Patient 21
Error with patient  Patient 22
Error with patient  Patient 23
Error with patient  Patient 24
Error with patient  Patient 32
Error with patient  Patient 38
Error with patient  Patient 57
Error with patient  Patient 66
Error with patient  Patient 80


## Simu/F3

In [29]:
data_simu_f3, error_patients = extract_delta_rad2(repeatable_features_gtv_df, fraction1='Simulation mridian', fraction2='3', patient_list=patient_list, features_list=filtered_features)
simu_f3_gtv_df = pd.DataFrame(data_simu_f3, index=patient_list, columns=filtered_features)
simu_f3_gtv_df.to_csv('/home/tachennf/Documents/delta-rad/data/ICM_0.35T/extracted_radiomics/rd_simu_f3_gtv.csv')

Error with patient  Patient 17
Error with patient  Patient 18
Error with patient  Patient 19
Error with patient  Patient 21
Error with patient  Patient 22
Error with patient  Patient 23
Error with patient  Patient 24
Error with patient  Patient 32
Error with patient  Patient 38
Error with patient  Patient 56
Error with patient  Patient 57
Error with patient  Patient 63
Error with patient  Patient 66
Error with patient  Patient 74


## Simu/F5

In [30]:
data_simu_f5, error_patients = extract_delta_rad2(repeatable_features_gtv_df, fraction1='Simulation mridian', fraction2='5', patient_list=patient_list, features_list=filtered_features)
simu_f5_gtv_df = pd.DataFrame(data_simu_f5, index=patient_list, columns=filtered_features)
simu_f5_gtv_df.to_csv('/home/tachennf/Documents/delta-rad/data/ICM_0.35T/extracted_radiomics/rd_simu_f5_gtv.csv')

Error with patient  Patient 17
Error with patient  Patient 18
Error with patient  Patient 19
Error with patient  Patient 21
Error with patient  Patient 22
Error with patient  Patient 23
Error with patient  Patient 24
Error with patient  Patient 32
Error with patient  Patient 54
Error with patient  Patient 57
Error with patient  Patient 61
Error with patient  Patient 64
Error with patient  Patient 66
Error with patient  Patient 71
Error with patient  Patient 79
Error with patient  Patient 86


## F1/F2

In [31]:
# relative difference technique
data_f1_f2, error_patients = extract_delta_rad2(repeatable_features_gtv_df, fraction1='1', fraction2='2', patient_list=patient_list, features_list=filtered_features)
f1_f2_gtv_df = pd.DataFrame(data_f1_f2, index=patient_list, columns=filtered_features)
f1_f2_gtv_df.to_csv('/home/tachennf/Documents/delta-rad/data/ICM_0.35T/extracted_radiomics/rd_f1_f2_gtv.csv')

Error with patient  Patient 38
Error with patient  Patient 62
Error with patient  Patient 74
Error with patient  Patient 80
Error with patient  Patient 82
Error with patient  Patient 84
Error with patient  Patient 85


## F2/F3

In [32]:
# relative difference technique
data_f2_f3, error_patients = extract_delta_rad2(repeatable_features_gtv_df, fraction1='2', fraction2='3', patient_list=patient_list, features_list=filtered_features)
f2_f3_gtv_df = pd.DataFrame(data_f2_f3, index=patient_list, columns=filtered_features)
f2_f3_gtv_df.to_csv('/home/tachennf/Documents/delta-rad/data/ICM_0.35T/extracted_radiomics/rd_f2_f3_gtv.csv')

Error with patient  Patient 38
Error with patient  Patient 56
Error with patient  Patient 62
Error with patient  Patient 63
Error with patient  Patient 74
Error with patient  Patient 80
Error with patient  Patient 82
Error with patient  Patient 84
Error with patient  Patient 85


## F1/F3

In [33]:
# relative differnce technique
data_f1_f3, error_patients = extract_delta_rad2(repeatable_features_gtv_df, fraction1='1', fraction2='3', patient_list=patient_list, features_list=filtered_features)
f1_f3_gtv_df = pd.DataFrame(data_f1_f3, index=patient_list, columns=filtered_features)
f1_f3_gtv_df.to_csv('/home/tachennf/Documents/delta-rad/data/ICM_0.35T/extracted_radiomics/rd_f1_f3_gtv.csv')

Error with patient  Patient 38
Error with patient  Patient 56
Error with patient  Patient 63
Error with patient  Patient 74
Error with patient  Patient 80


## F1/F4

In [34]:
# relative difference technique
data_f1_f4, error_patients = extract_delta_rad2(repeatable_features_gtv_df, fraction1='1', fraction2='4', patient_list=patient_list, features_list=filtered_features)
f1_f4_gtv_df = pd.DataFrame(data_f1_f4, index=patient_list, columns=filtered_features)
f1_f4_gtv_df.to_csv('/home/tachennf/Documents/delta-rad/data/ICM_0.35T/extracted_radiomics/rd_f1_f4_gtv.csv')

Error with patient  Patient 38
Error with patient  Patient 70
Error with patient  Patient 74
Error with patient  Patient 80
Error with patient  Patient 81


## F1/F5

In [35]:
# relative difference technique
data_f1_f5, error_patients = extract_delta_rad2(repeatable_features_gtv_df, fraction1='1', fraction2='5', patient_list=patient_list, features_list=filtered_features)
f1_f5_gtv_df = pd.DataFrame(data_f1_f5, index=patient_list, columns=filtered_features)
f1_f5_gtv_df.to_csv('/home/tachennf/Documents/delta-rad/data/ICM_0.35T/extracted_radiomics/rd_f1_f5_gtv.csv')

Error with patient  Patient 38
Error with patient  Patient 54
Error with patient  Patient 61
Error with patient  Patient 64
Error with patient  Patient 66
Error with patient  Patient 71
Error with patient  Patient 79
Error with patient  Patient 80
Error with patient  Patient 86


# Get patient outcomes

In [36]:
outcome_features = list(repeatable_features_gtv_df.columns)[-5:]
outcome_features

['Récidive Locale', 'Récidive Méta', 'Décès', 'Récidive', 'Évènement']

In [37]:
def extract_outcomes(extracted_feat_df, patient_list: list, features_list: list): 
    data = []
    error_patients = set()
    for patient in patient_list: 
        row_data = []
        try:
            line = extracted_feat_df.loc[(extracted_feat_df['Patient Rxmic'] == patient)].iloc[0]
            for feature in features_list:
                outcome_value = line[feature]
                row_data.append(outcome_value) # add the value to raw_data 
        except IndexError:
            for feature in features_list: 
                row_data.append(None)
            error_patients.add(patient)
            print("Error with patient ", patient)

        data.append(row_data)

    return data, error_patients

In [38]:
outcome_data, error_patients = extract_outcomes(repeatable_features_gtv_df, patient_list, outcome_features)

In [39]:
outcome_df = pd.DataFrame(outcome_data, index=patient_list, columns=outcome_features)
outcome_df.to_csv('/home/tachennf/Documents/delta-rad/data/ICM_0.35T/extracted_radiomics/outcomes.csv')

In [40]:
outcome_df

Unnamed: 0,Récidive Locale,Récidive Méta,Décès,Récidive,Évènement
Patient 1,0,1,1,1,1
Patient 10,0,1,1,1,1
Patient 11,0,1,1,1,1
Patient 12,0,0,0,0,0
Patient 13,0,1,0,1,1
...,...,...,...,...,...
Patient 83,0,0,0,0,0
Patient 84,0,1,0,1,1
Patient 85,0,0,0,0,0
Patient 86,0,0,1,0,1


## Outcomes statistics

In [41]:
outcome_df["Récidive Locale"].value_counts()

Récidive Locale
0    63
1    20
Name: count, dtype: int64

In [42]:
outcome_df["Récidive Méta"].value_counts()

Récidive Méta
1    62
0    21
Name: count, dtype: int64

In [43]:
outcome_df["Décès"].value_counts()

Décès
0    48
1    35
Name: count, dtype: int64