# Radiomics csv extraction
Create csv file by fraction (or delta-fraction) from 'filtered_features_gtv.xlsx'

In [1]:
import pandas as pd 
import csv
import pingouin
import matplotlib.pyplot as plt 
import numpy as np
import seaborn as sns

### Load csv

In [2]:
patient_file = "/mnt/c/Users/tachenne/delta-rad/filtered_features_gtv.xlsx"
filtered_features_gtv_df = pd.read_excel(patient_file, index_col=0)
filtered_features_gtv_df.head()

Unnamed: 0_level_0,original_shape_Elongation,original_shape_Flatness,original_shape_LeastAxisLength,original_shape_MajorAxisLength,original_shape_Maximum2DDiameterColumn,original_shape_Maximum2DDiameterRow,original_shape_Maximum2DDiameterSlice,original_shape_Maximum3DDiameter,original_shape_MeshVolume,original_shape_MinorAxisLength,...,original_ngtdm_Coarseness,original_ngtdm_Contrast,Patient Rxmic,Fraction,Volume,Récidive Locale,Récidive Méta,Décès,Récidive,Évènement
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Patient38_IRM_simu_mridian_gtv,0.431793,0.370555,24.678556,66.598953,53.068955,57.813322,47.606047,68.904864,24300.18231,28.756943,...,0.001856,0.175404,Patient 38,Simulation mridian,GTV,0,0,1,0,1
Patient38_mridian_ttt_2_gtv,0.442144,0.371772,24.636047,66.266497,52.185463,57.813322,47.606047,69.633631,24369.47426,29.299306,...,0.001997,0.128942,Patient 38,2,GTV,0,0,1,0,1
Patient38_mridian_ttt_4_gtv,0.442374,0.371341,24.625888,66.316118,53.068955,57.813322,47.606047,68.904864,24464.7507,29.336497,...,0.002107,0.153118,Patient 38,4,GTV,0,0,1,0,1
Patient38_mridian_ttt_5_gtv,0.442374,0.371341,24.625888,66.316118,53.068955,57.813322,47.606047,68.904864,24464.7507,29.336497,...,0.002032,0.169598,Patient 38,5,GTV,0,0,1,0,1
Patient10_IRM_simu_GIE_gtv,0.672158,0.549602,25.401274,46.217551,46.704549,54.477126,38.744658,54.865907,18814.74989,31.065518,...,0.003128,0.828998,Patient 10,Simulation GIE,GTV,0,1,1,1,1


### Get patient list

In [6]:
patient_list = np.unique(filtered_features_gtv_df['Patient Rxmic'].values)
print(patient_list.shape)

(83,)


### Get features

In [46]:
features = list(filtered_features_gtv_df.columns)[:-8]
features 


['original_shape_Elongation',
 'original_shape_Flatness',
 'original_shape_LeastAxisLength',
 'original_shape_MajorAxisLength',
 'original_shape_Maximum2DDiameterColumn',
 'original_shape_Maximum2DDiameterRow',
 'original_shape_Maximum2DDiameterSlice',
 'original_shape_Maximum3DDiameter',
 'original_shape_MeshVolume',
 'original_shape_MinorAxisLength',
 'original_shape_Sphericity',
 'original_shape_SurfaceArea',
 'original_shape_SurfaceVolumeRatio',
 'original_shape_VoxelVolume',
 'original_firstorder_10Percentile',
 'original_firstorder_90Percentile',
 'original_firstorder_Energy',
 'original_firstorder_Entropy',
 'original_firstorder_InterquartileRange',
 'original_firstorder_Kurtosis',
 'original_firstorder_Maximum',
 'original_firstorder_MeanAbsoluteDeviation',
 'original_firstorder_Mean',
 'original_firstorder_Median',
 'original_firstorder_Range',
 'original_firstorder_RobustMeanAbsoluteDeviation',
 'original_firstorder_RootMeanSquared',
 'original_firstorder_Skewness',
 'origina

In [47]:
print(len(features))

81


3 patients were rules out 

In [63]:
def extract_rad(extracted_feat_df, fraction: str, patient_list: list, features_list: list): 
    data = []
    error_patients = set()
    for patient in patient_list: 
        row_data = []
        try:
            line = extracted_feat_df.loc[(extracted_feat_df['Patient Rxmic'] == patient) & (extracted_feat_df['Fraction'] == fraction)].iloc[0]
            for feature in features_list:
                feature_value = line[feature]
                row_data.append(feature_value) # add the value to raw_data 
        except IndexError:
            for feature in features_list: 
                row_data.append(None)
            error_patients.add(patient)
            print("Error with patient ", patient)

        data.append(row_data)

    return data, error_patients

## Simu features

In [64]:
data_simu, error_patients = extract_rad(filtered_features_gtv_df, fraction='Simulation mridian', patient_list=patient_list, features_list=features)

Error with patient  Patient 17
Error with patient  Patient 18
Error with patient  Patient 19
Error with patient  Patient 21
Error with patient  Patient 22
Error with patient  Patient 23
Error with patient  Patient 24
Error with patient  Patient 32
Error with patient  Patient 57
Error with patient  Patient 66


In [66]:
simu_gtv_df = pd.DataFrame(data_simu, index=patient_list, columns=features)
simu_gtv_df.head()

Unnamed: 0,original_shape_Elongation,original_shape_Flatness,original_shape_LeastAxisLength,original_shape_MajorAxisLength,original_shape_Maximum2DDiameterColumn,original_shape_Maximum2DDiameterRow,original_shape_Maximum2DDiameterSlice,original_shape_Maximum3DDiameter,original_shape_MeshVolume,original_shape_MinorAxisLength,...,original_gldm_DependenceNonUniformity,original_gldm_DependenceNonUniformityNormalized,original_gldm_DependenceVariance,original_gldm_GrayLevelNonUniformity,original_gldm_LargeDependenceEmphasis,original_gldm_LargeDependenceHighGrayLevelEmphasis,original_gldm_SmallDependenceEmphasis,original_ngtdm_Busyness,original_ngtdm_Coarseness,original_ngtdm_Contrast
Patient 1,0.809098,0.540257,32.780488,60.675736,68.556955,69.193452,67.797028,77.197323,56816.33303,49.092636,...,2257.463629,0.17141,4.637846,465.737737,15.078056,6698.875171,0.308881,1.301331,0.001009,0.212588
Patient 10,0.579832,0.477183,25.195758,52.80104,52.083538,56.582369,36.629694,56.956781,21011.88221,30.615753,...,947.524033,0.193807,2.838401,177.880753,11.413991,4109.477194,0.311449,0.54222,0.002563,0.167525
Patient 11,0.610618,0.589149,36.211498,61.464026,63.29775,69.995168,49.332752,76.67933,51006.63593,37.53102,...,1279.819919,0.108203,8.149638,1027.238079,31.195807,5265.488502,0.170264,1.678677,0.000794,0.037831
Patient 12,0.670671,0.593019,28.770337,48.515066,52.666912,53.962602,39.693094,54.865907,29645.58725,32.537632,...,961.93909,0.139837,4.871202,379.236953,19.719291,3075.038668,0.217825,1.131467,0.001644,0.093174
Patient 13,0.875893,0.645176,31.084952,48.180582,55.443965,60.133526,49.494058,60.683304,41996.87769,42.201017,...,1690.589975,0.173643,3.605871,419.164955,13.732334,3086.406738,0.29025,1.174338,0.001131,0.121467


In [67]:
simu_gtv_df.to_csv('/mnt/c/Users/tachenne/delta-rad/extracted_radiomics/simu_gtv.csv')

## F1 features

In [68]:
data_f1, error_patients = extract_rad(filtered_features_gtv_df, fraction='1', patient_list=patient_list, features_list=features)

Error with patient  Patient 38
Error with patient  Patient 80


In [69]:
f1_gtv_df = pd.DataFrame(data_f1, index=patient_list, columns=features)
f1_gtv_df.to_csv('/mnt/c/Users/tachenne/delta-rad/extracted_radiomics/f1_gtv.csv')

## F2 features

In [70]:
data_f2, error_patients = extract_rad(filtered_features_gtv_df, fraction='2', patient_list=patient_list, features_list=features)

Error with patient  Patient 62
Error with patient  Patient 74
Error with patient  Patient 80
Error with patient  Patient 82
Error with patient  Patient 84
Error with patient  Patient 85


In [71]:
f2_gtv_df = pd.DataFrame(data_f2, index=patient_list, columns=features)
f2_gtv_df.to_csv('/mnt/c/Users/tachenne/delta-rad/extracted_radiomics/f2_gtv.csv')

## F3 features

In [72]:
data_f3, error_patients = extract_rad(filtered_features_gtv_df, fraction='3', patient_list=patient_list, features_list=features)

Error with patient  Patient 38
Error with patient  Patient 56
Error with patient  Patient 63
Error with patient  Patient 74


In [73]:
f3_gtv_df = pd.DataFrame(data_f3, index=patient_list, columns=features)
f3_gtv_df.to_csv('/mnt/c/Users/tachenne/delta-rad/extracted_radiomics/f3_gtv.csv')

## Delta - Rad features

In [74]:
def extract_delta_rad(extracted_feat_df, fraction1: str, fraction2: str, patient_list: list, features_list: list): 
    data = []
    error_patients = set()
    for patient in patient_list: 
        row_data = []
        try:
            line1 = extracted_feat_df.loc[(extracted_feat_df['Patient Rxmic'] == patient) & (extracted_feat_df['Fraction'] == fraction1)].iloc[0]
            line2 = extracted_feat_df.loc[(extracted_feat_df['Patient Rxmic'] == patient) & (extracted_feat_df['Fraction'] == fraction2)].iloc[0]
            for feature in features_list:
                feature_value1 = line1[feature]
                feature_value2 = line2[feature]
                delta_feature = feature_value2 / feature_value1 
                row_data.append(delta_feature) # add the value to raw_data 
        except IndexError:
            for feature in features_list: 
                row_data.append(None)
            error_patients.add(patient)
            print("Error with patient ", patient)

        data.append(row_data)

    return data, error_patients

## F1/F2

In [76]:
data_f1_f2, error_patients = extract_delta_rad(filtered_features_gtv_df, fraction1='1', fraction2='2', patient_list=patient_list, features_list=features)

Error with patient  Patient 38
Error with patient  Patient 62
Error with patient  Patient 74
Error with patient  Patient 80
Error with patient  Patient 82
Error with patient  Patient 84
Error with patient  Patient 85


In [77]:
f1_f2_gtv_df = pd.DataFrame(data_f1_f2, index=patient_list, columns=features)
f1_f2_gtv_df.to_csv('/mnt/c/Users/tachenne/delta-rad/extracted_radiomics/f1_f2_gtv.csv')

## F2/F3

In [78]:
data_f2_f3, error_patients = extract_delta_rad(filtered_features_gtv_df, fraction1='2', fraction2='3', patient_list=patient_list, features_list=features)

Error with patient  Patient 38
Error with patient  Patient 56
Error with patient  Patient 62
Error with patient  Patient 63
Error with patient  Patient 74
Error with patient  Patient 80
Error with patient  Patient 82
Error with patient  Patient 84
Error with patient  Patient 85


In [79]:
f2_f3_gtv_df = pd.DataFrame(data_f2_f3, index=patient_list, columns=features)
f2_f3_gtv_df.to_csv('/mnt/c/Users/tachenne/delta-rad/extracted_radiomics/f2_f3_gtv.csv')

## F1/F3

In [80]:
data_f1_f3, error_patients = extract_delta_rad(filtered_features_gtv_df, fraction1='1', fraction2='3', patient_list=patient_list, features_list=features)

Error with patient  Patient 38
Error with patient  Patient 56
Error with patient  Patient 63
Error with patient  Patient 74
Error with patient  Patient 80


In [81]:
f1_f3_gtv_df = pd.DataFrame(data_f1_f3, index=patient_list, columns=features)
f1_f3_gtv_df.to_csv('/mnt/c/Users/tachenne/delta-rad/extracted_radiomics/f1_f3_gtv.csv')

## Get patient outcomes

In [3]:
outcome_features = list(filtered_features_gtv_df.columns)[-5:]
outcome_features

['Récidive Locale', 'Récidive Méta', 'Décès', 'Récidive', 'Évènement']

In [5]:
def extract_outcomes(extracted_feat_df, patient_list: list, features_list: list): 
    data = []
    error_patients = set()
    for patient in patient_list: 
        row_data = []
        try:
            line = extracted_feat_df.loc[(extracted_feat_df['Patient Rxmic'] == patient)].iloc[0]
            for feature in features_list:
                outcome_value = line[feature]
                row_data.append(outcome_value) # add the value to raw_data 
        except IndexError:
            for feature in features_list: 
                row_data.append(None)
            error_patients.add(patient)
            print("Error with patient ", patient)

        data.append(row_data)

    return data, error_patients

In [10]:
outcome_data, error_patients = extract_outcomes(filtered_features_gtv_df, patient_list, outcome_features)

In [12]:
outcome_df = pd.DataFrame(outcome_data, index=patient_list, columns=outcome_features)
outcome_df.to_csv('/mnt/c/Users/tachenne/delta-rad/extracted_radiomics/outcomes.csv')

In [13]:
outcome_df

Unnamed: 0,Récidive Locale,Récidive Méta,Décès,Récidive,Évènement
Patient 1,0,1,1,1,1
Patient 10,0,1,1,1,1
Patient 11,0,1,1,1,1
Patient 12,0,0,0,0,0
Patient 13,0,1,0,1,1
...,...,...,...,...,...
Patient 83,0,0,0,0,0
Patient 84,0,1,0,1,1
Patient 85,0,0,0,0,0
Patient 86,0,0,1,0,1


## Outcomes statistics

In [14]:
outcome_df["Récidive Locale"].value_counts()

Récidive Locale
0    63
1    20
Name: count, dtype: int64

In [26]:
print('We need {} patients with Récidive locale in the test set. '.format(round(20*0.3)))

We need 6 patients with Récidive locale in the test set. 


In [15]:
outcome_df["Récidive Méta"].value_counts()

Récidive Méta
1    62
0    21
Name: count, dtype: int64

In [27]:
print('We need {} patients with Récidive Méta in the test set. '.format(int(62*0.3)))

We need 18 patients with Récidive Méta in the test set. 


In [16]:
outcome_df["Décès"].value_counts()

Décès
0    48
1    35
Name: count, dtype: int64

In [28]:
print('We need {} patients with Décès in the test set. '.format(int(35*0.3)))

We need 10 patients with Décès in the test set. 
