In [1]:
import numpy as np
import nibabel as nib
import os
import matplotlib.pyplot as plt
import glob
import gzip
import shutil
from sklearn.preprocessing import MinMaxScaler, RobustScaler

In [9]:
def scale_whole_image(input_file, output_dir, scaler):
    try:
        img = nib.load(input_file)
        data = img.get_fdata()

        print("Original data shape:", data.shape, "Type:", data.dtype)
        print("Original data statistics - Mean:", np.mean(data), "Std:", np.std(data))

        # Reshape the data to a 2D array where each row is a voxel
        reshaped_data = data.reshape(-1, 1)

        # Apply the scaler
        scaled_data_reshaped = scaler.fit_transform(reshaped_data)

        # Reshape the scaled data back to its original 3D shape
        scaled_data = scaled_data_reshaped.reshape(data.shape)

        print("Scaler parameters:", scaler.get_params())
        print("Scaler statistics - Center:", scaler.center_, "Scale:", scaler.scale_)
        print("Scaled data statistics - Mean:", np.mean(scaled_data), "Std:", np.std(scaled_data))
        print("Scaling completed")  

        scaled_img = nib.Nifti1Image(scaled_data, affine=img.affine)
        scaled_file_name = os.path.join(output_dir, os.path.basename(input_file).replace('.nii', '_scaled.nii.gz'))
        nib.save(scaled_img, scaled_file_name)
        print(f"File saved: {scaled_file_name}")
    except Exception as e:
        print(f"Error processing file {input_file}: {e}")


In [3]:
def extract_gz_file(gz_file, output_file):
    if not os.path.exists(output_file):
        with gzip.open(gz_file, 'rb') as f_in:
            with open(output_file, 'wb') as f_out:
                shutil.copyfileobj(f_in, f_out)

In [10]:
root_dir = '/Users/yehudadicker/Library/Mobile Documents/com~apple~CloudDocs/Penn_Research'  # Replace with the path to your main directory
#institutions = ['Penn', 'CWRU', 'NYU', 'TJU', 'OSU', 'RH']
institutions = ['Penn']

for inst in institutions:
    
    # Baseline dir in each institution 
    baseline_dir = os.path.join(root_dir, inst, 'Baseline')

    # Dir we're creating for scaling in root dir
    robust_scaled_baseline_dir = os.path.join(root_dir, 'Robust_Scaled_v2')

    # Institution dirs we're creating within Min_Max_Scaled_Baseline
    robust_inst_dir = os.path.join(root_dir, robust_scaled_baseline_dir, inst)

    os.makedirs(robust_scaled_baseline_dir, exist_ok=True)
    os.makedirs(robust_inst_dir, exist_ok=True)

    # Process baseline images with RobustScaler
    baseline_files = glob.glob(f"{baseline_dir}/*.nii.gz")
    for file in baseline_files:
        extracted_file = file[:-3]
        extract_gz_file(file, extracted_file)
        scale_whole_image(extracted_file, robust_inst_dir, RobustScaler())

Original data shape: (240, 240, 155) Type: float64
Original data statistics - Mean: 106.25764588663102 Std: 266.0412124541704
Scaler parameters: {'copy': True, 'quantile_range': (25.0, 75.0), 'unit_variance': False, 'with_centering': True, 'with_scaling': True}
Scaler statistics - Center: [0.] Scale: [1.]
Scaled data statistics - Mean: 106.2576458866309 Std: 266.0412124541705
Scaling completed
File saved: /Users/yehudadicker/Library/Mobile Documents/com~apple~CloudDocs/Penn_Research/Robust_Scaled_v2/Penn/AAVO_2011.12.31_t2_LPS_rSRI_SSFinal_N3_sus_scaled.nii.gz
Original data shape: (240, 240, 155) Type: float64
Original data statistics - Mean: 22.172980272821363 Std: 57.29117724273793
Scaler parameters: {'copy': True, 'quantile_range': (25.0, 75.0), 'unit_variance': False, 'with_centering': True, 'with_scaling': True}
Scaler statistics - Center: [0.] Scale: [1.]
Scaled data statistics - Mean: 22.1729802728214 Std: 57.291177242738016
Scaling completed
File saved: /Users/yehudadicker/Libr

KeyboardInterrupt: 