In [2]:
import numpy as np
import nibabel as nib
import os
import matplotlib.pyplot as plt
import glob
import gzip
import shutil
from sklearn.preprocessing import MinMaxScaler, RobustScaler

In [6]:
def extract_gz_file(gz_file, output_file):
    if not os.path.exists(output_file):
        with gzip.open(gz_file, 'rb') as f_in:
            with open(output_file, 'wb') as f_out:
                shutil.copyfileobj(f_in, f_out)

def scale_and_save_image(input_file, output_dir, scaler):
    try:
        #print(input_file)
        img = nib.load(input_file)
        data = img.get_fdata()
        #print("Original data shape:", data.shape, "Type:", data.dtype)
        #print("Original data statistics - Mean:", np.mean(data), "Std:", np.std(data))

        scaled_data = scaler.fit_transform(data.reshape(-1, 1)).reshape(data.shape)
        #print("Scaler parameters:", scaler.get_params())
        #print("Scaler statistics - Center:", scaler.center_, "Scale:", scaler.scale_)
        #print("Scaled data statistics - Mean:", np.mean(scaled_data), "Std:", np.std(scaled_data))

        scaled_img = nib.Nifti1Image(scaled_data, affine=img.affine)
        scaled_file_name = os.path.join(output_dir, os.path.basename(input_file).replace('.nii', '_whole_image_scaled.nii.gz'))
        nib.save(scaled_img, scaled_file_name)
    except (OSError, EOFError) as e:
        print(f"Error processing file {input_file}: {e}")

In [7]:
root_dir = '/Users/yehudadicker/Library/Mobile Documents/com~apple~CloudDocs/Penn_Research'  # Replace with the path to your main directory
#institutions = ['Penn', 'CWRU', 'NYU', 'TJU', 'OSU', 'RH']
institutions = ['Penn']

for inst in institutions:
    
    # Baseline dir in each institution 
    baseline_dir = os.path.join(root_dir, inst, 'Baseline')

    # Dir we're creating for scaling in root dir
    robust_scaled_baseline_dir = os.path.join(root_dir, 'Robust_Scaled')

    # Institution dirs we're creating within Min_Max_Scaled_Baseline
    robust_inst_dir = os.path.join(root_dir, robust_scaled_baseline_dir, inst)

    os.makedirs(robust_scaled_baseline_dir, exist_ok=True)
    os.makedirs(robust_inst_dir, exist_ok=True)

    # Process baseline images with RobustScaler
    baseline_files = glob.glob(f"{baseline_dir}/*.nii.gz")
    for file in baseline_files:
        extracted_file = file[:-3]
        extract_gz_file(file, extracted_file)
        scale_and_save_image(extracted_file, robust_inst_dir, RobustScaler())

Error processing file /Users/yehudadicker/Library/Mobile Documents/com~apple~CloudDocs/Penn_Research/Penn/Baseline/ABGN_2015.02.27_flair_LPS_rSRI_SSFinal_N3_sus.nii: Expected 35712000 bytes, got 19463840 bytes from /Users/yehudadicker/Library/Mobile Documents/com~apple~CloudDocs/Penn_Research/Penn/Baseline/ABGN_2015.02.27_flair_LPS_rSRI_SSFinal_N3_sus.nii
 - could the file be damaged?


In [38]:
unscaled_image = '/Users/yehudadicker/Library/Mobile Documents/com~apple~CloudDocs/Penn_Research/Penn/Baseline/AAAB_2006.10.28_adc_LPS_rSRI_SSFinal.nii'

unscaled_img= nib.load(unscaled_image)
unscaled_data = unscaled_img.get_fdata()
# Find unique values
unique_values_2 = np.unique(unscaled_data)
unique_values_2

array([0.00000000e+00, 1.35040782e-05, 2.96596281e-05, ...,
       9.81193781e-03, 9.82969254e-03, 9.90697742e-03])

In [40]:
scaled_image = '/Users/yehudadicker/Library/Mobile Documents/com~apple~CloudDocs/Penn_Research/Robust_Scaled/Penn/AAAB_2006.10.28_adc_LPS_rSRI_SSFinal_whole_image_scaled.nii'

scaled_img = nib.load(scaled_image)
scaled_data = scaled_img.get_fdata()

# Find unique values
unique_values = np.unique(scaled_data)
unique_values

array([0.00000000e+00, 1.35040782e-05, 2.96596281e-05, ...,
       9.81193781e-03, 9.82969254e-03, 9.90697742e-03])

In [41]:
print("Original data statistics - Mean:", np.mean(unscaled_data), "Std:", np.std(unscaled_data))
print("Scaled data statistics - Mean:", np.mean(scaled_data), "Std:", np.std(scaled_data))


Original data statistics - Mean: 0.00039747165601163476 Std: 0.0011139564081072085
Scaled data statistics - Mean: 0.00039747165601163476 Std: 0.0011139564081072085


In [30]:
import os
import glob

def count_gz_files(directory):
    gz_files = glob.glob(f"{directory}/*.gz")  # Finds all .gz files in the directory
    return len(gz_files)

# Define your two directories
dir1 = '/Users/yehudadicker/Library/Mobile Documents/com~apple~CloudDocs/Penn_Research/Penn/Baseline'
dir2 = '/Users/yehudadicker/Library/Mobile Documents/com~apple~CloudDocs/Penn_Research/Robust_Scaled/Penn'

# Count .gz files in each directory
gz_counts_dir1 = count_gz_files(dir1)
gz_counts_dir2 = count_gz_files(dir2)

print(gz_counts_dir1)
print(gz_counts_dir2)


443
442
