In [1]:
import os
import glob
import gzip
import nibabel as nib
from sklearn.preprocessing import MinMaxScaler
import numpy as np

In [2]:
def extract_gz_file(gz_file, output_file):
    if not os.path.exists(output_file):
        with gzip.open(gz_file, 'rb') as f_in:
            with open(output_file, 'wb') as f_out:
                shutil.copyfileobj(f_in, f_out)

In [3]:
def find_global_min_max(files):
    global_min, global_max = np.inf, -np.inf
    for file in files:
        try:
            img = nib.load(file)
            data = img.get_fdata()
            file_min, file_max = data.min(), data.max()
            global_min, global_max = min(global_min, file_min), max(global_max, file_max)
        except Exception as e:
            print(f"Error processing file {file}: {e}")
    return global_min, global_max

In [4]:
def scale_and_save_image(input_file, output_dir, scaler, global_min, global_max):
    try:
        
        print(input_file)
        img = nib.load(input_file)
        data = img.get_fdata()

        print("Original data shape:", data.shape, "Type:", data.dtype)
        print("Original data statistics - Mean:", np.mean(data), "Std:", np.std(data))

        # Normalize data using global min and max
        data_normalized = (data - global_min) / (global_max - global_min)

        # Apply MinMaxScaler
        scaled_data = scaler.fit_transform(data_normalized.reshape(-1, 1)).reshape(data.shape)
        print("Scaled data statistics - Mean:", np.mean(scaled_data), "Std:", np.std(scaled_data))

        scaled_img = nib.Nifti1Image(scaled_data, affine=img.affine)
        scaled_file_name = os.path.join(output_dir, os.path.basename(input_file).replace('.nii', '_inst_scaled.nii.gz'))
        nib.save(scaled_img, scaled_file_name)
    except Exception as e:
        print(f"Error processing file {input_file}: {e}")

In [5]:
root_dir = '/Users/yehudadicker/Library/Mobile Documents/com~apple~CloudDocs/Penn_Research'  
institutions = ['Penn', 'CWRU', 'NYU', 'TJU', 'OSU', 'RH']

# New directory for storing scaled images
min_max_scaled_dir = '/Users/yehudadicker/Library/Mobile Documents/com~apple~CloudDocs/Penn_Research/Scaled/Min_Max_Scaled/By_Dataset'
os.makedirs(min_max_scaled_dir, exist_ok=True)


all_files = []
for inst in institutions:
    baseline_dir = os.path.join(root_dir, inst, 'Baseline')
    baseline_files = glob.glob(f"{baseline_dir}/*.nii.gz")
    all_files.extend(baseline_files)

# Extract and prepare all files
extracted_files = [file[:-3] for file in all_files]
for file in all_files:
    extract_gz_file(file, file[:-3])

# Find global min and max across the entire dataset
global_min, global_max = find_global_min_max(extracted_files)

# Scale all images in the dataset using the global min and max
for extracted_file in extracted_files:
    scale_and_save_image(extracted_file, min_max_scaled_dir, MinMaxScaler(), global_min, global_max)

Error processing file /Users/yehudadicker/Library/Mobile Documents/com~apple~CloudDocs/Penn_Research/Penn/Baseline/ABGN_2015.02.27_flair_LPS_rSRI_SSFinal_N3_sus.nii: Expected 35712000 bytes, got 19463840 bytes from /Users/yehudadicker/Library/Mobile Documents/com~apple~CloudDocs/Penn_Research/Penn/Baseline/ABGN_2015.02.27_flair_LPS_rSRI_SSFinal_N3_sus.nii
 - could the file be damaged?
/Users/yehudadicker/Library/Mobile Documents/com~apple~CloudDocs/Penn_Research/Penn/Baseline/AAVO_2011.12.31_t2_LPS_rSRI_SSFinal_N3_sus.nii
Original data shape: (240, 240, 155) Type: float64
Original data statistics - Mean: 106.25764588663102 Std: 266.0412124541704
Scaled data statistics - Mean: 0.05684034406981407 Std: 0.14231327944889116
/Users/yehudadicker/Library/Mobile Documents/com~apple~CloudDocs/Penn_Research/Penn/Baseline/AAMD_2007.07.10_t2_LPS_rSRI_SSFinal_N3_sus.nii
Original data shape: (240, 240, 155) Type: float64
Original data statistics - Mean: 22.172980272821363 Std: 57.29117724273793
Scal