In [1]:
import numpy as np
import nibabel as nib
import os
import matplotlib.pyplot as plt
import glob
import gzip
import shutil
from sklearn.preprocessing import MinMaxScaler, RobustScaler

In [15]:
def extract_gz_file(gz_file, output_file):
    if not os.path.exists(output_file):
        with gzip.open(gz_file, 'rb') as f_in:
            with open(output_file, 'wb') as f_out:
                shutil.copyfileobj(f_in, f_out)
                
def scale_whole_image(input_file, output_dir, scaler):
    try:
        #print(input_file)
        img = nib.load(input_file)
        data = img.get_fdata()

        #print("Original data shape:", data.shape, "Type:", data.dtype)
        #print("Original data statistics - Mean:", np.mean(data), "Std:", np.std(data))

        # Treat each voxel as a sample and the intensity as the feature
        reshaped_data = data.reshape(-1, 1)  # Flatten the image into 2D for scaling
        scaled_data_reshaped = scaler.fit_transform(reshaped_data)

        # Reshape the scaled data back to its original 3D shape
        scaled_data = scaled_data_reshaped.reshape(data.shape)

        #print("Scaler parameters:", scaler.get_params())
        #print("Scaled data statistics - Mean:", np.mean(scaled_data), "Std:", np.std(scaled_data))

        scaled_img = nib.Nifti1Image(scaled_data, affine=img.affine)
        scaled_file_name = os.path.join(output_dir, os.path.basename(input_file).replace('.nii', '_scaled.nii.gz'))
        nib.save(scaled_img, scaled_file_name)
    except Exception as e:
        print(f"Error processing file {input_file}: {e}")

In [17]:
root_dir = '/Users/yehudadicker/Library/Mobile Documents/com~apple~CloudDocs/Penn_Research'  
institutions = ['Penn', 'CWRU', 'NYU', 'TJU', 'OSU', 'RH']

for inst in institutions:

    # Baseline dir in each institution 
    baseline_dir = os.path.join(root_dir, inst, 'Baseline')

    # Dir we're creating for scaling in root dir
    min_max_scaled_baseline_dir = os.path.join(root_dir, 'Min_Max_Scaled_v2')

    # Institution dirs we're creating within Min_Max_Scaled_Baseline
    min_max_inst_dir = os.path.join(root_dir, min_max_scaled_baseline_dir, inst)
   
    # Ensure scaled directories exist
    os.makedirs(min_max_scaled_baseline_dir, exist_ok=True)
    os.makedirs(min_max_inst_dir, exist_ok=True)

    # Process baseline images with MinMaxScaler
    baseline_files = glob.glob(f"{baseline_dir}/*.nii.gz")
    for file in baseline_files:
        extracted_file = file[:-3]
        extract_gz_file(file, extracted_file)
        scale_whole_image(extracted_file, min_max_inst_dir, MinMaxScaler())

Error processing file /Users/yehudadicker/Library/Mobile Documents/com~apple~CloudDocs/Penn_Research/Penn/Baseline/ABGN_2015.02.27_flair_LPS_rSRI_SSFinal_N3_sus.nii: Expected 35712000 bytes, got 19463840 bytes from /Users/yehudadicker/Library/Mobile Documents/com~apple~CloudDocs/Penn_Research/Penn/Baseline/ABGN_2015.02.27_flair_LPS_rSRI_SSFinal_N3_sus.nii
 - could the file be damaged?


In [18]:
unscaled_image = '/Users/yehudadicker/Library/Mobile Documents/com~apple~CloudDocs/Penn_Research/Penn/Baseline/AAAB_2006.10.28_adc_LPS_rSRI_SSFinal.nii'

unscaled_img= nib.load(unscaled_image)
unscaled_data = unscaled_img.get_fdata()

unique = np.unique(unscaled_data)
unique

array([0.00000000e+00, 1.35040782e-05, 2.96596281e-05, ...,
       9.81193781e-03, 9.82969254e-03, 9.90697742e-03])

In [36]:
scaled_image_v2 = '/Users/yehudadicker/Library/Mobile Documents/com~apple~CloudDocs/Penn_Research/Min_Max_Scaled_v2/Penn/AAAB_2006.10.28_adc_LPS_rSRI_SSFinal_scaled.nii'

scaled_img_v2= nib.load(scaled_image_v2)
scaled_data_v2 = scaled_img_v2.get_fdata()

unique = np.unique(scaled_data_v2)
unique

array([0.        , 0.00136309, 0.00299381, ..., 0.9904068 , 0.99219895,
       1.        ])

In [37]:
print("Original data statistics - Mean:", np.mean(unscaled_data), "Std:", np.std(unscaled_data))
print("Scaled data statistics - Mean:", np.mean(scaled_data_v2), "Std:", np.std(scaled_data_v2))


Original data statistics - Mean: 0.00039747165601163476 Std: 0.0011139564081072085
Scaled data statistics - Mean: 0.04012037570676458 Std: 0.11244160165451092
