In [None]:
# THIS CODE ASSUMES THE FOLLOWING:
# 1. All folders of every reviewer are in the same directory, and each folder contains subfolders for each center.
# 2. The directory structure is as follows:
#    Higher_directory
#    ├── First_reviewer
#    │   └── first_center
#    │   └── second_center...
#    Directory structure in this case is: directory_structure = {'First_reviewer': ['first_center', 'second_center', ...]}
# 3. The NIFTI files are named such that NCCT files do not end with '_l.nii', '_r.nii', '_b.nii', '_vl.nii' or '_vr.nii'.
# 4. The mask files are NIFTI files that do end with '_l.nii', '_r.nii', '_b.nii', or '_vr.nii'.
# 5. Numeric values in patient IDs should be unique, as non-numeric values (such as - or _) get ignored. for example patient 123 and patient1-2-3 are considered the same. 
import os
import re
import pandas as pd
import nibabel as nib
import numpy as np
from nilearn.image import resample_img

# Define the higher directory and directory structure
Higher_directory = r"D:\\"
directory_structure = {
    'Lucas': ['FIRST_CENTER', 'SECOND_CENTER', 'THIRD_CENTER'],
}

def calculate_volume(ncct_file_path, mask_file_path):
    """Calculate the volume of voxels where the mask is 1 and NCCT > 130 HU."""
    try:
        ncct_img = nib.load(ncct_file_path)
        mask_img = nib.load(mask_file_path)

        # Check and resample if affines are different
        if not np.array_equal(ncct_img.affine, mask_img.affine):
            print("Affine matrices differ. Resampling NCCT to match mask affine.")
            ncct_img = resample_img(ncct_img, target_affine=mask_img.affine, target_shape=mask_img.shape)

        ncct_data = ncct_img.get_fdata()
        mask_data = mask_img.get_fdata()

        voxel_volume = np.prod(mask_img.header.get_zooms())
        volume = np.sum((ncct_data > 130) & (mask_data == 1)) * voxel_volume

        return volume
    except Exception as e:
        print(f"Error processing {ncct_file_path} or {mask_file_path}: {e}")
        return 0


def extract_numeric_patient_id(filename):
    """Extract numeric patient ID from filename."""
    numbers = re.findall(r'\d+', filename)
    return ''.join(numbers)

def process_directory(Higher_directory, directory_structure):
    results = []

    for head_folder, subfolders_list in directory_structure.items():
        head_folder_path = os.path.join(Higher_directory, head_folder)
        print(f"Processing folder: {head_folder_path}")
        for subfolder_name in subfolders_list:
            subfolder_path = os.path.join(head_folder_path, subfolder_name)
            print(f"Processing subfolder: {subfolder_path}")
            if os.path.exists(subfolder_path):
                # Get all NIfTI files in the subfolder, excluding hidden files
                files = [f for f in os.listdir(subfolder_path) if f.endswith('.nii') and not f.startswith('.')]
                print(f"Found {len(files)} NIfTI files in {subfolder_path}")
                print(f"Files: {files}")
                # Identify NCCT files and segmentations
                ncct_files = [f for f in files if not any(f.endswith(suffix) for suffix in ['_l.nii', '_r.nii', '_vr.nii', '_vl.nii', '_b.nii'])]
                segmentation_files = [f for f in files if any(f.endswith(suffix) for suffix in ['_l.nii', '_r.nii', '_vr.nii', '_vl.nii', '_b.nii'])]

                # Process each NCCT file
                for ncct_file in ncct_files:
                    patient_id = extract_numeric_patient_id(ncct_file)
                    ncct_file_path = os.path.join(subfolder_path, ncct_file)
                    print(f"Processing NCCT file: {ncct_file_path} for patient ID: {patient_id}")
                    # Find corresponding segmentations
                    corresponding_segmentations = [f for f in segmentation_files if extract_numeric_patient_id(f) == patient_id]
                    print(f"Found {corresponding_segmentations} segmentations for patient ID: {patient_id}")
                    patient_result = {'PatientID': patient_id, 'NCCT_present': 1, 'center': subfolder_name, 'Examiner': head_folder}

                    for seg_file in corresponding_segmentations:
                        seg_file_path = os.path.join(subfolder_path, seg_file)
                        # Extract the suffix for the mask
                        mask_suffix = seg_file.split(patient_id)[-1][:-4]  # Extracts _l, _r, etc.
                        mask_name = f"{patient_id}{mask_suffix}"
                        volume = calculate_volume(ncct_file_path, seg_file_path)
                        patient_result[mask_suffix[1:]] = volume  # Use _l, _r, etc., as column headers

                    results.append(patient_result)

                # Check for segmentations without NCCT files
                for seg_file in segmentation_files:
                    seg_patient_id = extract_numeric_patient_id(seg_file)
                    if not any(extract_numeric_patient_id(f) == seg_patient_id for f in ncct_files):
                        mask_suffix = seg_file.split(seg_patient_id)[-1][:-4]
                        results.append({
                            'PatientID': seg_patient_id,
                            'NCCT_present': 0,
                            'center': subfolder_name,
                            'Examiner': head_folder,
                            mask_suffix[1:]: 'No NCCT file found'
                        })

    return results

# Process the directory and get results
results = process_directory(Higher_directory, directory_structure)

# Convert results to a DataFrame
df = pd.DataFrame(results)

# Save to Excel
excel_path = os.path.join(Higher_directory, 'NCCT_mask_volumes.xlsx')
df.to_excel(excel_path, index=False)

print(f"Results saved to {excel_path}")