In [1]:
import os
import pandas as pd

In [2]:
os.getcwd()

'/Users/osama/GP-2025-Strain/Code/Data_scripts'

In [3]:
# Define the base directory path
# base_dir = "Data/ACDC/database"
base_dir = "../../Data/ACDC/database"
training_dir = os.path.join(base_dir, "training")
testing_dir = os.path.join(base_dir, "testing")

In [4]:
# Function to parse .cfg file and extract features
def parse_cfg_file(cfg_file_path):
    features = {}
    
    # Read the .cfg file line by line and extract the relevant information
    with open(cfg_file_path, 'r') as file:
        for line in file:
            key, value = line.split(':')
            key = key.strip()
            value = value.strip()
            features[key] = value
    
    return features

# Function to check for files in patient folders
def check_patient_files(base_path):
    patient_data = []
    
    # List the subdirectories (patients) in the base path
    for folder in os.listdir(base_path):
        patient_dir = os.path.join(base_path, folder)
        
        # Ensure it is a directory (patient folder)
        if os.path.isdir(patient_dir):
            # Get the list of files in the patient directory
            patient_files = os.listdir(patient_dir)
            
            # Check for 4D and cfg files
            has_4d_file = any(f.endswith('_4d.nii') for f in patient_files)
            cfg_file = next((f for f in patient_files if f.endswith('.cfg')), None)
            
            if cfg_file:
                cfg_file_path = os.path.join(patient_dir, cfg_file)
                # Parse the .cfg file
                cfg_data = parse_cfg_file(cfg_file_path)
                
                # Prepare the data row
                row = {
                    'Patient': folder,
                    'ED': cfg_data.get('ED'),
                    'ES': cfg_data.get('ES'),
                    'Group': cfg_data.get('Group'),
                    'Height': float(cfg_data.get('Height', 0)),
                    'NbFrame': int(cfg_data.get('NbFrame', 0)),
                    'Weight': float(cfg_data.get('Weight', 0)),
                    'Has_4D_File': 'Yes' if has_4d_file else 'No'
                }
                
                # Add the row to patient_data list
                patient_data.append(row)
            else:
                row = {
                    'Patient': folder,
                    'ED': None,
                    'ES': None,
                    'Group': None,
                    'Height': None,
                    'NbFrame': None,
                    'Weight': None,
                    'Has_4D_File': 'Yes' if has_4d_file else 'No'
                }
    
    # Create a DataFrame from the collected patient data
    df = pd.DataFrame(patient_data)
    return df

# Call the function and create the DataFrame
df_training_patients = check_patient_files(training_dir)
df_testing_patients = check_patient_files(testing_dir)

In [5]:
df_training_patients.head()

Unnamed: 0,Patient,ED,ES,Group,Height,NbFrame,Weight,Has_4D_File
0,patient015,1,10,DCM,158.0,21,57.0,Yes
1,patient012,1,13,DCM,160.0,30,59.0,Yes
2,patient024,1,9,HCM,175.0,28,85.0,No
3,patient023,1,9,HCM,166.0,25,74.0,No
4,patient048,1,8,MINF,167.0,28,73.0,Yes


In [6]:
df_testing_patients.head()

Unnamed: 0,Patient,ED,ES,Group,Height,NbFrame,Weight,Has_4D_File
0,patient104,1,11,HCM,180.0,30,74.0,Yes
1,patient103,1,11,MINF,175.0,30,107.0,Yes
2,patient135,1,10,MINF,184.0,20,95.0,Yes
3,patient132,1,15,DCM,163.0,30,80.0,Yes
4,patient150,1,12,NOR,158.0,30,56.0,Yes
