In [1]:
import numpy as np
import pandas as pd
import radiomics
from radiomics import featureextractor
import os
import xml.etree.ElementTree as ET
import SimpleITK as sitk
from __future__ import print_function
import yaml
import csv
import shutil


In [3]:

def move_and_rename_xml_files(base_dir, folders, destination_dir):
    if not os.path.exists(destination_dir):
        os.makedirs(destination_dir)

    file_counter = 1

    # Iterar pelas pastas originais
    for folder in folders:
        folder_path = os.path.join(base_dir, folder)

        # Iterar pelos arquivos na pasta
        for xml_file in os.listdir(folder_path):
            if xml_file.endswith(".xml"):
                source_path = os.path.join(folder_path, xml_file)
                destination_path = os.path.join(destination_dir, f"{file_counter}.xml")
                
                # Mover e renomear o arquivo
                shutil.move(source_path, destination_path)
                print(f"Arquivo {xml_file} movido e renomeado para {file_counter}.xml")
                
                file_counter += 1
    print("Concluido com sucesso!")

# Diretório base onde estão as pastas
base_dir = "LIDC_XML_only/tcia-lidc-xml"
folders = ['157', '185', '186', '187', '188', '189']

# Diretório de destino para os arquivos renomeados
destination_dir = "renamed_files"

# Chamar a função para mover e renomear os arquivos
move_and_rename_xml_files(base_dir, folders, destination_dir)

NameError: name 'os' is not defined

In [16]:

def extract_uids_from_xml(xml_file):
    try:
        tree = ET.parse(xml_file)
        root = tree.getroot()
        
        # Extrair StudyInstanceUID e SeriesInstanceUID
        ns = {'ns': 'http://www.nih.gov'}  # Namespace no XML
        study_uid_element = root.find('.//ns:StudyInstanceUID', ns)
        series_uid_element = root.find('.//ns:SeriesInstanceUid', ns)
        
        if study_uid_element is None or series_uid_element is None:
            return None, None  # Ignorar arquivos sem as tags
        
        study_uid = study_uid_element.text
        series_uid = series_uid_element.text
        
        return study_uid, series_uid
    except ET.ParseError as e:
        print(f"Erro ao analisar o arquivo XML {xml_file}: {e}")
        return None, None  # Retorna None se houver erro ao processar o XML

def iterate_xml_files_and_store_uids_csv(base_dir, output_file):
    # Abrir arquivo CSV para escrita
    with open(output_file, mode='w', newline='') as csv_file:
        fieldnames = ['Arquivo', 'StudyInstanceUID', 'SeriesInstanceUID']
        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
        
        # Escrever o cabeçalho
        writer.writeheader()
        
        # Iterar pelos arquivos XML na pasta
        for xml_file in os.listdir(base_dir):  # Usa base_dir aqui
            if xml_file.endswith(".xml"):
                xml_path = os.path.join(base_dir, xml_file)
                
                study_uid, series_uid = extract_uids_from_xml(xml_path)
                
                # Só escrever se os UIDs foram encontrados
                if study_uid and series_uid:
                    writer.writerow({
                        'Arquivo': xml_file,
                        'StudyInstanceUID': study_uid,
                        'SeriesInstanceUID': series_uid
                    })

# Diretório base onde estão os arquivos renomeados
base_dir = "LIDC_XML_only/renamed_files"
output_file = "uids.csv"

# Chamar a função para iterar e guardar os UIDs em um CSV
iterate_xml_files_and_store_uids_csv(base_dir, output_file)

print(f"UIDs salvos em {output_file}") 

UIDs salvos em uids.csv


mascara dos arquivos xml

In [2]:

def create_mask_from_xml(dicom_image, xml_file):

    tree = ET.parse(xml_file)
    root = tree.getroot()

    size = dicom_image.GetSize()
    mask = np.zeros(size, dtype=np.uint8)
    
    for lesion in root.findall('.//lesion'):
        for roi in lesion.findall('.//roi'):
            for edgeMap in roi.findall('.//edgeMap'):
                x_coord = edgeMap.find('xCoord')
                y_coord = edgeMap.find('yCoord')
                
                if x_coord is not None and y_coord is not None:
                    x = int(x_coord.text)
                    y = int(y_coord.text)
                    
                    if 0 <= x < size[0] and 0 <= y < size[1]: 
                        mask[y, x] = 1
                else:
                    print("Coordenadas não encontradas em edgeMap.")

    mask_image = sitk.GetImageFromArray(mask)
    mask_image.CopyInformation(dicom_image)
    
    return mask_image

extração de features dos arquivos dicom

In [3]:

def feature_extraction():
    base_dir = r'pre_processing/LIDC-IDRI-files'  # pasta com subpastas de pacientes
    outPath = r'pre_processing/feature_extraction'  
    xml_csv_path = r'/Users/gabrielasimon/Desktop/LAB-IACD-24-25/pre_processing/uids.csv'  # CSV com os UIDs

    # ver se o diretório de saída existe
    if not os.path.exists(outPath):
        os.makedirs(outPath)

    outputFilepath = os.path.join(outPath, 'radiomics_features.csv')

    params = os.path.join(outPath, 'exampleSettings', 'Params.yaml')

    if os.path.isfile(params):
        extractor = featureextractor.RadiomicsFeatureExtractor(params)
    else:
        settings = {
            'binWidth': 25,
            'resampledPixelSpacing': None,
            'interpolator': sitk.sitkBSpline,
            'enableCExtensions': True
        }
        extractor = featureextractor.RadiomicsFeatureExtractor(**settings)

    print('Tipos de imagem habilitados:', extractor.enabledImagetypes)
    print('Features habilitadas:', extractor.enabledFeatures)
    print('Configurações atuais:', extractor.settings)

    uids_df = pd.read_csv(xml_csv_path)

    results = pd.DataFrame()

    for patient_folder in os.listdir(base_dir):
        patient_path = os.path.join(base_dir, patient_folder)
        
        if os.path.isdir(patient_path):
            dicom_files = [os.path.join(patient_path, f) for f in os.listdir(patient_path) if f.endswith('.dcm')]

            if len(dicom_files) > 0:
                imageFilepath = dicom_files[0]  

                # XML correspondente no CSV
                dicom_metadata = sitk.ReadImage(imageFilepath)
                study_uid = dicom_metadata.GetMetaData("0020|000D")  
                series_uid = dicom_metadata.GetMetaData("0020|000E")  

                xml_row = uids_df[(uids_df['StudyInstanceUID'] == study_uid) & 
                                  (uids_df['SeriesInstanceUID'] == series_uid)]
                
                if not xml_row.empty:
                    xml_file = xml_row['Arquivo'].values[0]
                    xml_file_path = os.path.join(base_dir, 'path_to_xml_files', xml_file)

                    try:
                        mask_image = create_mask_from_xml(dicom_metadata, xml_file_path)

                        print(f"Processando Paciente: {patient_folder}, Imagem: {os.path.basename(imageFilepath)}, Máscara: {os.path.basename(xml_file)}")

                        try:
                            # extração de features
                            result = pd.Series(extractor.execute(imageFilepath, mask_image))
                            result['PatientID'] = patient_folder
                            result['Image'] = os.path.basename(imageFilepath)
                            result['Mask'] = os.path.basename(xml_file)
                            
                            results = results.append(result, ignore_index=True)
                            
                        except Exception:
                            print(f'FALHA NA EXTRAÇÃO DE FEATURES para o paciente {patient_folder}: {e}')

                    except Exception as e:
                        print(f"Erro ao criar a máscara para o paciente {patient_folder}: {e}")

                else:
                    print(f"XML correspondente não encontrado para o paciente {patient_folder}. Pulando.")

    print('Extração completa, escrevendo CSV')
    results.to_csv(outputFilepath, index=False, na_rep='NaN')
    print('Gravação do CSV completa')


feature_extraction()

Tipos de imagem habilitados: {'Original': {}}
Features habilitadas: {'firstorder': [], 'glcm': [], 'gldm': [], 'glrlm': [], 'glszm': [], 'ngtdm': [], 'shape': []}
Configurações atuais: {'minimumROIDimensions': 2, 'minimumROISize': None, 'normalize': False, 'normalizeScale': 1, 'removeOutliers': None, 'resampledPixelSpacing': None, 'interpolator': 23, 'preCrop': False, 'padDistance': 5, 'distances': [1], 'force2D': False, 'force2Ddimension': 0, 'resegmentRange': None, 'label': 1, 'additionalInfo': True, 'binWidth': 25, 'enableCExtensions': True}


FileNotFoundError: [Errno 2] No such file or directory: 'pre_processing/uids.csv'