# Features extraction

#### Note: run inside an environment with the most recent version of numpy and pyradiomics

In [16]:
import os
import six
import pandas as pd
import radiomics
import SimpleITK as sitk
from radiomics import featureextractor
from functools import reduce
import numpy as np
from skimage.measure import find_contours
import pylidc as pl
from pylidc.utils import consensus
import matplotlib.pyplot as plt

### 2D features extration

In [20]:
# Path were the images are stored
input_directory = "/home/cmonteiro/Aulas/Laboratório de IA e CD/Projeto 1/Imagens/manifest-1600709154662/LIDC-IDRI"

# Path to the setup file from radiomics
params_file = "/home/cmonteiro/pyradiomics-master/pyradiomics-master/examples/exampleSettings/Params.yaml"

# Initialization of the extractor of features
extractor = featureextractor.RadiomicsFeatureExtractor(params_file)

# List to store the features
results_list = []

# Ordered list of all the subfolders
patient_folders = sorted(os.listdir(input_directory))


for patient_folder in patient_folders:
    patient_folder_path = os.path.join(input_directory, patient_folder)
    print(patient_folder_path)
    
    # Id of the patient - 'LIDC-IDRI-xxxx'
    patient_id = os.path.basename(patient_folder_path)

    
    for root, _, files in os.walk(patient_folder_path):
        for file in files:
            if file.endswith('.dcm'):
                # Path to the DICOM image
                image_path = os.path.join(root, file)
                
                patient_id = os.path.basename(patient_folder_path)
    
                # Construction of the path to the folder patient_id_mask
                mask_folder_path = os.path.join(input_directory, f"{patient_id}_mask")
            
                with open(mask_folder_path, 'rb') as file:
                    mask = pickle.load(file)
                
                # Extraction of the features using the mask
                result = extractor.execute(image_path, mask)
                
                print('Result type:', type(result))
                print('')
                print('Calculated features')
                for key, value in six.iteritems(result):
                    print('\t', key, ':', value)

                result['PatientID'] = patient_folder
                results_list.append(result)


# Create a dataframe from the results_list list
features_2D = pd.DataFrame(results_list)

features_2D = features_2D.rename(columns={'PatientID' : 'Patient_ID'})

# Set "PatientID" as the index of the dataframe
features_2D.set_index('Patient_ID', inplace=True)

# Exportation of the DataFrame to a .csv file so it can be used on another notebook
features_2D.to_csv('features_2D.csv', index=False)

/home/cmonteiro/Aulas/Laboratório de IA e CD/Projeto 1/Imagens/manifest-1600709154662/LIDC-IDRI/LICENSE
/home/cmonteiro/Aulas/Laboratório de IA e CD/Projeto 1/Imagens/manifest-1600709154662/LIDC-IDRI/LICENSE_mask
/home/cmonteiro/Aulas/Laboratório de IA e CD/Projeto 1/Imagens/manifest-1600709154662/LIDC-IDRI/LIDC-IDRI-0068


FileNotFoundError: [Errno 2] No such file or directory: '/home/cmonteiro/Aulas/Laboratório de IA e CD/Projeto 1/Imagens/manifest-1600709154662/LIDC-IDRI/LIDC-IDRI-0068_mask'

In [21]:
df.head()

Unnamed: 0,diagnostics_Versions_PyRadiomics,diagnostics_Versions_Numpy,diagnostics_Versions_SimpleITK,diagnostics_Versions_PyWavelet,diagnostics_Versions_Python,diagnostics_Configuration_Settings,diagnostics_Configuration_EnabledImageTypes,diagnostics_Image-original_Hash,diagnostics_Image-original_Dimensionality,diagnostics_Image-original_Spacing,...,original_gldm_GrayLevelNonUniformity,original_gldm_GrayLevelVariance,original_gldm_HighGrayLevelEmphasis,original_gldm_LargeDependenceEmphasis,original_gldm_LargeDependenceHighGrayLevelEmphasis,original_gldm_LargeDependenceLowGrayLevelEmphasis,original_gldm_LowGrayLevelEmphasis,original_gldm_SmallDependenceEmphasis,original_gldm_SmallDependenceHighGrayLevelEmphasis,original_gldm_SmallDependenceLowGrayLevelEmphasis
0,v3.1.0,1.26.0,2.3.0,1.1.1,3.9.5,"{'minimumROIDimensions': 2, 'minimumROISize': ...",{'Original': {}},5c9ce3ca174f0f8324aa4d277e0fef82dc5ac566,3D,"(0.7812499999999999, 0.7812499999999999, 6.499...",...,186.8143582306019,39.19271419906397,280.4065748126662,8.661590524534686,2335.051970026589,0.0765059073671082,0.0086002740947983,0.379601671307114,110.305639457282,0.0035453562622343


### 3D feature extraction

In [None]:
# Path were the images are stored
input_directory = "/home/cmonteiro/Aulas/Laboratório de IA e CD/Projeto 1/Imagens/manifest-1600709154662/LIDC-IDRI"

# Path to the setup file from radiomics
params_file = "/home/cmonteiro/pyradiomics-master/pyradiomics-master/examples/exampleSettings/Params.yaml"

# Initialization of the extractor of features
extractor = featureextractor.RadiomicsFeatureExtractor(params_file)

# List to store the features
results_list = []

# Ordered list of all the subfolders
patient_folders = sorted(os.listdir(input_directory))

for root, dirs, files in os.walk(input_folder):
    for dir in dirs:
        # Path to the folder with the 3D images
        image_folder = os.path.join(root, dir)
        
        # Extraction of the features of the volume 3D
        result = extractor.execute(image_folder)
        
        results_list.append(result)

# Create a dataframe from the results_list list
df = pd.DataFrame(results_list)