In [None]:
# Function to get edema voxels
def get_edema_voxels(image_file, segmentation_mask_file):
    try:
        # Load the image and segmentation mask
        image = nib.load(image_file).get_fdata()
        segmentation_mask = nib.load(segmentation_mask_file).get_fdata()
        # Extract voxels where segmentation mask equals 2 (edema)
        edema_voxels = image[segmentation_mask == 2]
        edema_voxels = edema_voxels.reshape(-1, 1)  # Reshape to 2D array
        return edema_voxels
    
    except (OSError, EOFError) as e:
        print(f"Error processing file {image_file}: {e}")
        return None

def extract_common_prefix(filename):
    """
    Extracts the common prefix from the filename.
    Assumes that the common prefix includes the institution identifier and the date.
    """
    parts = filename.split('_')
    # Assuming the date is always the second part, the prefix includes the first two parts
    return '_'.join(parts[:2])

In [None]:
import numpy as np
import nibabel as nib
import os
import matplotlib.pyplot as plt
import glob
import gzip
import shutil
from sklearn.preprocessing import MinMaxScaler, RobustScaler

root_dir = '/Users/yehudadicker/Library/Mobile Documents/com~apple~CloudDocs/Penn_Research'  # Replace with the path to your main directory
institutions = ['Penn', 'CWRU', 'NYU', 'TJU', 'OSU', 'RH']

for inst in institutions:
    baseline_dir = os.path.join(root_dir, inst, 'Baseline')
    seg_dir = os.path.join(root_dir, inst, 'Seg')

    min_max_scaled_baseline_dir = os.path.join(root_dir, 'Min-Max_Scaled_Baseline')
    min_max_inst_dir = os.path.join(root_dir, min_max_scaled_baseline_dir, 'Edema_Scaled', inst)

    robust_scaled_baseline_dir = os.path.join(root_dir, 'Robust_Scaled_Baseline')
    robust_inst_dir = os.path.join(root_dir, robust_scaled_baseline_dir, 'Edema_Scaled', inst)

    os.makedirs(min_max_inst_dir, exist_ok=True)
    os.makedirs(robust_inst_dir, exist_ok=True)

    baseline_files = glob.glob(f"{baseline_dir}/*.nii.gz")
    for baseline_file in baseline_files:
        # Extract baseline file
        extracted_baseline_file = baseline_file[:-3]
        extract_gz_file(baseline_file, extracted_baseline_file)

        baseline_filename = os.path.basename(extracted_baseline_file)
        common_prefix = extract_common_prefix(baseline_filename)

        # Construct segmentation filename with the specific suffix
        segmentation_filename = f"{common_prefix}_LPS_rSRI_segmF.nii"
        segmentation_file = os.path.join(seg_dir, segmentation_filename)

        if not os.path.exists(segmentation_file):
            print(f"No matching segmentation file for {baseline_filename}, skipping.")
            continue
                    
        # Extract edema voxels
        edema_voxels = get_edema_voxels(extracted_baseline_file, segmentation_file)
        if edema_voxels is None:
            print("Skipping", extracted_baseline_file)
            continue

        # Apply Min-Max Scaling based on edema voxels
        min_max_scaler = MinMaxScaler()
        min_max_scaler = min_max_scaler.fit(edema_voxels)
        scale_and_save_image(extracted_baseline_file, min_max_inst_dir, min_max_scaler)

        # Apply Robust Scaling based on edema voxels
        robust_scaler = RobustScaler()
        robust_scaler = robust_scaler.fit(edema_voxels)
        scale_and_save_image(extracted_baseline_file, robust_inst_dir, robust_scaler)
