In [1]:
# importing libraries
import nibabel as nib
import nilearn as nil
import scipy.ndimage as ndi
import matplotlib.pyplot as plt
import os
import glob
import numpy as np
import pandas as pd
import torch

In [2]:
import os
import glob

# Define the paths to your data folders
cwd = os.getcwd()  # Get the current working directory


#CRL for healthy brain mri scans
#EA for Alzheimer's Disease brain mri scans
#TB for bipolar disorder brain mri scans
crl_path = os.path.join(cwd, "CRL")
ea_path = os.path.join(cwd, "EA")
tb_path = os.path.join(cwd, "TB")

# Define the file extension you want to search for
file_extension = '*.nii.gz'

# Use glob to get a list of file paths for each category
crl_mri_scans = glob.glob(os.path.join(crl_path, file_extension))
ea_mri_scans = glob.glob(os.path.join(ea_path, file_extension))
tb_mri_scans = glob.glob(os.path.join(tb_path, file_extension))


In [3]:
# Load NIfTI images for each category
crl_images = [np.array(nib.load(scan_path).get_fdata()) for scan_path in crl_mri_scans]
ea_images = [np.array(nib.load(scan_path).get_fdata()) for scan_path in ea_mri_scans]
tb_images = [np.array(nib.load(scan_path).get_fdata()) for scan_path in tb_mri_scans]

In [4]:
crl_images[0].shape

(128, 128, 50, 93)

In [5]:
def normalize_images(images):
    normalized_images = [(img - np.mean(img)) / np.std(img) for img in images]
    return normalized_images

crl_normalized_images = normalize_images(crl_images)
ea_normalized_images = normalize_images(ea_images)
tb_normalized_images = normalize_images(tb_images)


In [6]:
def convert_images_to_double(images):
    double_images = [img.astype(np.float64) for img in images]
    return double_images

crl_double_images = convert_images_to_double(crl_normalized_images)
ea_double_images = convert_images_to_double(ea_normalized_images)
tb_double_images = convert_images_to_double(tb_normalized_images)


In [7]:
import torch
from torch.utils.data import Dataset

class CustomDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image = self.data[idx]
        label = self.labels[idx]
        return image, label

# Combine your padded image data and labels
all_images = crl_double_images + ea_double_images + tb_double_images
all_labels = [0] * len(crl_normalized_images) + [1] * len(ea_normalized_images) + [2] * len(tb_normalized_images)

# Create an instance of your custom dataset
custom_dataset = CustomDataset(all_images, all_labels)


In [8]:
print(custom_dataset.data[0].shape)
print(type(custom_dataset.data[0]))

(128, 128, 50, 93)
<class 'numpy.ndarray'>


In [9]:
# Split your dataset into training, validation, and test sets
from sklearn.model_selection import train_test_split


train_data, test_data = train_test_split(custom_dataset, test_size=0.2, random_state=42)
train_data, val_data = train_test_split(train_data, test_size=0.2, random_state=42)



In [10]:
print(train_data[0][0].shape)
print(type(train_data[0][0]))

(128, 128, 40, 93)
<class 'numpy.ndarray'>


In [11]:
for index, (img, label) in enumerate(train_data):
    print(img.shape)

(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 45, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 43, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 45, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 47, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 43, 93)


In [12]:
# resize the images
from skimage.transform import resize
target_shape = (128, 128, 40, 93)
def resize_dataset(dataset, target_shape):
    resized_dataset = []
    for item in dataset:
        resized_item = resize(item[0], target_shape, mode='constant', anti_aliasing=True)
        resized_dataset.append((resized_item, item[1]))
    return resized_dataset

# Example usage with the target shape
target_shape = (128, 128, 40, 93)
resized_train_data = resize_dataset(train_data, target_shape)
resized_test_data = resize_dataset(test_data, target_shape)
resized_val_data = resize_dataset(val_data, target_shape)


In [13]:
for index, (img, label) in enumerate(resized_train_data):
    print(img.shape)

(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)
(128, 128, 40, 93)


In [14]:
print(resized_train_data[0])

(array([[[[-0.45346027, -0.45346027, -0.45346027, ..., -0.45346027,
          -0.45346027, -0.45346027],
         [-0.45346027, -0.45346027, -0.45346027, ..., -0.45346027,
          -0.45346027, -0.45346027],
         [-0.45346027, -0.45346027, -0.45346027, ..., -0.45346027,
          -0.45346027, -0.45346027],
         ...,
         [-0.45346027, -0.45346027, -0.45346027, ..., -0.45346027,
          -0.45346027, -0.45346027],
         [-0.45346027, -0.45346027, -0.45346027, ..., -0.45346027,
          -0.45346027, -0.45346027],
         [-0.45346027, -0.45346027, -0.45346027, ..., -0.45346027,
          -0.45346027, -0.45346027]],

        [[-0.45346027, -0.45346027, -0.45346027, ..., -0.45346027,
          -0.45346027, -0.45346027],
         [-0.45346027, -0.45346027, -0.45346027, ..., -0.45346027,
          -0.45346027, -0.45346027],
         [-0.45346027, -0.45346027, -0.45346027, ..., -0.45346027,
          -0.45346027, -0.45346027],
         ...,
         [-0.45346027, -0.4534602

In [15]:
import torch

# Convert the resized datasets into PyTorch tensors
tensor_train_data = [(torch.tensor(data), label) for data, label in resized_train_data]
tensor_test_data = [(torch.tensor(data), label) for data, label in resized_test_data]
tensor_val_data = [(torch.tensor(data), label) for data, label in resized_val_data]


In [16]:
import torch

# Define file paths for saving
train_data_path = 'tensor_train_data.pt'
test_data_path = 'tensor_test_data.pt'
val_data_path = 'tensor_val_data.pt'

# Save the tensor datasets
torch.save(tensor_train_data, train_data_path)
torch.save(tensor_test_data, test_data_path)
torch.save(tensor_val_data, val_data_path)
