# XML Feature Extraction - 2D and 3D

#### Note: run inside an environment with numpy==1.20.3 e pylidc==0.2.2

In [8]:
import os
import six
import pickle
import pandas as pd
import SimpleITK as sitk
from functools import reduce
import numpy as np
from scipy.stats import mode
from skimage.measure import find_contours
from skimage import io
import pydicom
import pylidc as pl
from pylidc.utils import consensus
import matplotlib.pyplot as plt

#### std_limit = 0.5

In [9]:
%cd /home/cmonteiro/Aulas/Laboratório\ de\ IA\ e\ CD/Projeto\ 1
%cd std_limit_0.5

/home/cmonteiro/Aulas/Laboratório de IA e CD/Projeto 1
/home/cmonteiro/Aulas/Laboratório de IA e CD/Projeto 1/std_limit_0.5


In [10]:
# Path were the images are stored
input_directory = "/home/cmonteiro/Aulas/Laboratório de IA e CD/Projeto 1/Imagens/LIDC-IDRI"

# Path to the setup file from radiomics
params_file = "/home/cmonteiro/pyradiomics-master/pyradiomics-master/examples/exampleSettings/Params.yaml"

# Ordered list of all the subfolders
patient_folders = sorted(os.listdir(input_directory))
nodule_id=0

# Dictionary with all the correspondancies
nodule_data = {
    'Nodule_id': [],
    'Patient_id': [],
    'Subtlety': [],
    'Internalstructure': [],
    'Calcification': [],
    'Sphericity': [],
    'Margin': [],
    'Lobulation': [],
    'Spiculation': [],
    'Texture': [],
    'Malignancy': []
}

malignancy_names = {
    1: "1-Highly Unlikely",
    2: "2-Moderately Unlikely",
    3: "3-Indeterminate",
    4: "4-Moderately Suspicious",
    5: "5-Highly Suspicious"
}

std_limit = 0.5

for patient_folder in patient_folders:
    patient_folder_path = os.path.join(input_directory, patient_folder)
    
    # Id of the patient - 'LIDC-IDRI-xxxx'
    patient_id = os.path.basename(patient_folder_path)
    print(patient_id)
    patient_scans = pl.query(pl.Scan).filter(pl.Scan.patient_id == patient_id, pl.Scan.annotations.any()).all()
    
    for scan in patient_scans:
        nods = scan.cluster_annotations()
        
        # Iteration for each node
        for anns in nods:
            nodule_id+=1
            
            nodule_characteristics = []

            # 50% consensus
            cmask, cbbox, masks = consensus(anns, clevel=0.5, pad=[(20, 20), (20, 20), (0, 0)])

            for i, mask in enumerate(masks):
                characteristics = anns[i].feature_vals()
                nodule_characteristics.append(characteristics)

            # Condição: se houve alguma característica extraída, caso contrário, o nódulo não será incluído
            if nodule_characteristics:
                # Calcular o desvio padrão para cada característica
                std_deviations = [np.std(characteristic) for characteristic in zip(*nodule_characteristics)]

                # Verificar se alguma característica tem desvio padrão maior do que o limite
                if any(std > std_limit for std in std_deviations):
                    # Ignore este nódulo se houver desacordo
                    print(f"Nodule ID {nodule_id}")

                    continue
                #GUARDAR NODULE_ID CMASK PARA USAR COM O RADIOMICS
                filename = f"{nodule_id}_cmask.npy"
                np.save(filename, cmask)
                # Extract the values of the features
                feature_values = {
                    'Nodule_id': nodule_id,
                    'Patient_id': patient_id,
                    'Subtlety': nodule_characteristics[0][0],
                    'Internalstructure': nodule_characteristics[0][1],
                    'Calcification': nodule_characteristics[0][2],
                    'Sphericity': nodule_characteristics[0][3],
                    'Margin': nodule_characteristics[0][4],
                    'Lobulation': nodule_characteristics[0][5],
                    'Spiculation': nodule_characteristics[0][6],
                    'Texture': nodule_characteristics[0][7],
                    'Malignancy': malignancy_names.get(nodule_characteristics[0][8])
                }

                # Append the feature values to the nodule_data dictionary
                for key, value in feature_values.items():
                    nodule_data[key].append(value)
                    
features = pd.DataFrame(nodule_data)

# Set nodule_id as the index
features.set_index('Nodule_id', inplace=True)
features.to_csv('features_pylidc_0.5.csv', index=False)

LIDC-IDRI-0001
Nodule ID 1
LIDC-IDRI-0002
Nodule ID 2
LIDC-IDRI-0003
Nodule ID 4
Nodule ID 5
Nodule ID 6
LIDC-IDRI-0004
Nodule ID 7
LIDC-IDRI-0005
Nodule ID 8
Nodule ID 9
LIDC-IDRI-0006
Nodule ID 12
Nodule ID 14
LIDC-IDRI-0007
Nodule ID 15
LIDC-IDRI-0008
Nodule ID 17
Nodule ID 18
LIDC-IDRI-0009
LIDC-IDRI-0010
Nodule ID 22


KeyboardInterrupt: 

#### std_limit = 1.0

In [4]:
%cd /home/cmonteiro/Aulas/Laboratório\ de\ IA\ e\ CD/Projeto\ 1
%cd std_limit_1.0

/home/cmonteiro/Aulas/Laboratório de IA e CD/Projeto 1
/home/cmonteiro/Aulas/Laboratório de IA e CD/Projeto 1/std_limit_1.0


In [11]:
# Path were the images are stored
input_directory = "/home/cmonteiro/Aulas/Laboratório de IA e CD/Projeto 1/Imagens/LIDC-IDRI"

# Path to the setup file from radiomics
params_file = "/home/cmonteiro/pyradiomics-master/pyradiomics-master/examples/exampleSettings/Params.yaml"

# Ordered list of all the subfolders
patient_folders = sorted(os.listdir(input_directory))
nodule_id=0

# Dictionary with all the correspondancies
nodule_data = {
    'Nodule_id': [],
    'Patient_id': [],
    'Subtlety': [],
    'Internalstructure': [],
    'Calcification': [],
    'Sphericity': [],
    'Margin': [],
    'Lobulation': [],
    'Spiculation': [],
    'Texture': [],
    'Malignancy': []
}

malignancy_names = {
    1: "1-Highly Unlikely",
    2: "2-Moderately Unlikely",
    3: "3-Indeterminate",
    4: "4-Moderately Suspicious",
    5: "5-Highly Suspicious"
}

std_limit = 1.0

for patient_folder in patient_folders:
    patient_folder_path = os.path.join(input_directory, patient_folder)
    
    # Id of the patient - 'LIDC-IDRI-xxxx'
    patient_id = os.path.basename(patient_folder_path)
    print(patient_id)
    patient_scans = pl.query(pl.Scan).filter(pl.Scan.patient_id == patient_id, pl.Scan.annotations.any()).all()
    
    for scan in patient_scans:
        nods = scan.cluster_annotations()
        
        # Iteration for each node
        for anns in nods:
            nodule_id+=1
            
            nodule_characteristics = []

            # 50% consensus
            cmask, cbbox, masks = consensus(anns, clevel=0.5, pad=[(20, 20), (20, 20), (0, 0)])

            for i, mask in enumerate(masks):
                characteristics = anns[i].feature_vals()
                nodule_characteristics.append(characteristics)

            # Condição: se houve alguma característica extraída, caso contrário, o nódulo não será incluído
            if nodule_characteristics:
                # Calcular o desvio padrão para cada característica
                std_deviations = [np.std(characteristic) for characteristic in zip(*nodule_characteristics)]

                # Verificar se alguma característica tem desvio padrão maior do que o limite
                if any(std > std_limit for std in std_deviations):
                    # Ignore este nódulo se houver desacordo
                    print(f"Nodule ID {nodule_id}")

                    continue
                #GUARDAR NODULE_ID CBBOX NUM CSV PARA USAR COM O RADIOMICS
                filename = f"{nodule_id}_cmask.npy"
                np.save(filename, cmask)
                # Extract the values of the features
                feature_values = {
                    'Nodule_id': nodule_id,
                    'Patient_id': patient_id,
                    'Subtlety': nodule_characteristics[0][0],
                    'Internalstructure': nodule_characteristics[0][1],
                    'Calcification': nodule_characteristics[0][2],
                    'Sphericity': nodule_characteristics[0][3],
                    'Margin': nodule_characteristics[0][4],
                    'Lobulation': nodule_characteristics[0][5],
                    'Spiculation': nodule_characteristics[0][6],
                    'Texture': nodule_characteristics[0][7],
                    'Malignancy': malignancy_names.get(nodule_characteristics[0][8])
                }

                # Append the feature values to the nodule_data dictionary
                for key, value in feature_values.items():
                    nodule_data[key].append(value)
                    
features = pd.DataFrame(nodule_data)

# Set nodule_id as the index
features.set_index('Nodule_id', inplace=True)
features.to_csv('features_pylidc_1.0.csv', index=False)

LIDC-IDRI-0001
Nodule ID 1
LIDC-IDRI-0002
LIDC-IDRI-0003
Nodule ID 4
Nodule ID 5
Nodule ID 6
LIDC-IDRI-0004
Nodule ID 7
LIDC-IDRI-0005
Nodule ID 8
LIDC-IDRI-0006
Nodule ID 14
LIDC-IDRI-0007
Nodule ID 15
LIDC-IDRI-0008
Nodule ID 17
Nodule ID 18
LIDC-IDRI-0009
LIDC-IDRI-0010


KeyboardInterrupt: 

#### std_limit = 1.5

In [6]:
%cd /home/cmonteiro/Aulas/Laboratório\ de\ IA\ e\ CD/Projeto\ 1
%cd std_limit_1.5

/home/cmonteiro/Aulas/Laboratório de IA e CD/Projeto 1
/home/cmonteiro/Aulas/Laboratório de IA e CD/Projeto 1/std_limit_1.5


In [12]:
# Path were the images are stored
input_directory = "/home/cmonteiro/Aulas/Laboratório de IA e CD/Projeto 1/Imagens/LIDC-IDRI"

# Path to the setup file from radiomics
params_file = "/home/cmonteiro/pyradiomics-master/pyradiomics-master/examples/exampleSettings/Params.yaml"

# Ordered list of all the subfolders
patient_folders = sorted(os.listdir(input_directory))
nodule_id=0

# Dictionary with all the correspondancies
nodule_data = {
    'Nodule_id': [],
    'Patient_id': [],
    'Subtlety': [],
    'Internalstructure': [],
    'Calcification': [],
    'Sphericity': [],
    'Margin': [],
    'Lobulation': [],
    'Spiculation': [],
    'Texture': [],
    'Malignancy': []
}

malignancy_names = {
    1: "1-Highly Unlikely",
    2: "2-Moderately Unlikely",
    3: "3-Indeterminate",
    4: "4-Moderately Suspicious",
    5: "5-Highly Suspicious"
}

std_limit = 1.5

for patient_folder in patient_folders:
    patient_folder_path = os.path.join(input_directory, patient_folder)
    
    # Id of the patient - 'LIDC-IDRI-xxxx'
    patient_id = os.path.basename(patient_folder_path)
    print(patient_id)
    patient_scans = pl.query(pl.Scan).filter(pl.Scan.patient_id == patient_id, pl.Scan.annotations.any()).all()
    
    for scan in patient_scans:
        nods = scan.cluster_annotations()
        
        # Iteration for each node
        for anns in nods:
            nodule_id+=1
            
            nodule_characteristics = []

            # 50% consensus
            cmask, cbbox, masks = consensus(anns, clevel=0.5, pad=[(20, 20), (20, 20), (0, 0)])

            for i, mask in enumerate(masks):
                characteristics = anns[i].feature_vals()
                nodule_characteristics.append(characteristics)

            # Condição: se houve alguma característica extraída, caso contrário, o nódulo não será incluído
            if nodule_characteristics:
                # Calcular o desvio padrão para cada característica
                std_deviations = [np.std(characteristic) for characteristic in zip(*nodule_characteristics)]

                # Verificar se alguma característica tem desvio padrão maior do que o limite
                if any(std > std_limit for std in std_deviations):
                    # Ignore este nódulo se houver desacordo
                    print(f"Nodule ID {nodule_id}")

                    continue
                #GUARDAR NODULE_ID CBBOX NUM CSV PARA USAR COM O RADIOMICS
                filename = f"{nodule_id}_cmask.npy"
                np.save(filename, cmask)
                # Extract the values of the features
                feature_values = {
                    'Nodule_id': nodule_id,
                    'Patient_id': patient_id,
                    'Subtlety': nodule_characteristics[0][0],
                    'Internalstructure': nodule_characteristics[0][1],
                    'Calcification': nodule_characteristics[0][2],
                    'Sphericity': nodule_characteristics[0][3],
                    'Margin': nodule_characteristics[0][4],
                    'Lobulation': nodule_characteristics[0][5],
                    'Spiculation': nodule_characteristics[0][6],
                    'Texture': nodule_characteristics[0][7],
                    'Malignancy': malignancy_names.get(nodule_characteristics[0][8])
                }

                # Append the feature values to the nodule_data dictionary
                for key, value in feature_values.items():
                    nodule_data[key].append(value)
                    
features = pd.DataFrame(nodule_data)

# Set nodule_id as the index
features.set_index('Nodule_id', inplace=True)
features.to_csv('features_pylidc_1.5.csv', index=False)

LIDC-IDRI-0001
LIDC-IDRI-0002
LIDC-IDRI-0003
LIDC-IDRI-0004
LIDC-IDRI-0005
LIDC-IDRI-0006
LIDC-IDRI-0007
LIDC-IDRI-0008


KeyboardInterrupt: 