In [1]:
import os
import shutil
import re
import pandas as pd
import numpy as np
from dotenv import load_dotenv
import nibabel as nib

# Data Organization

In [2]:
from IPython.display import clear_output
def printUpdate(count, total):
  clear_output(wait=True)
  percent = 100.*count/total
  print(f"{count}/{total} ({percent:.2f}%)", end="")
  print(f'[{"██"*int(percent//10)}{"--"*int((100-percent)//10)}]')



for i in range(10):
  printUpdate(i, 10)

9/10 (90.00%)[██████████████████--]


In [3]:
def moveRename(source, dest, num_files = 291):
    """
    Moves all .hdr and .img images from source location to a single directory
    """
    rawDirPat = r"(?:[\W\S]+?)OAS2_([0-9]{4})_MR([0-9]{1})/RAW"
    i = 0
    for root, dir, files in os.walk(source):
        r_match = re.findall(rawDirPat, root)
        if len(r_match) > 0:
            subID = r_match[0][0]
            session = r_match[0][1]
            new_name = f"{subID}_{session}"
            for f in files:
                fname, fext = os.path.splitext(f)
                if fext == ".img" or fext == ".hdr":
                    f_match = re.findall(r"mpr-([0-9]{1}).nifti", fname)
                    if len(f_match) > 0:
                        f_num = f_match[0]
                        old_name = os.path.join(root, f)
                        new_name = os.path.join(
                            dest, f"OAS2_{subID}_MR{f_num}_V{session}.nifti{fext}"
                        )

                        printUpdate(i, num_files)
                        i += 1
                        shutil.copy2(old_name, new_name)
    print()

In [4]:
def convertToNii(source, num_files = 291):
    """
    Convert images from .img to .nii format and get rid of .img and .hdr files.
    """

    i = 0
    for root, dir, files in os.walk(source):
        for f in files:
            fbase, fext = os.path.splitext(f)
            if fext == ".img":
                # print(f"Converting {f}")
                printUpdate(i, num_files)
                i += 1
                fname = os.path.join(root, f)
                img = nib.load(fname)
                nib.save(img, fname.replace(".img", ".nii"))
                os.remove(os.path.join(root, fbase + ".hdr"))
                os.remove(os.path.join(root, fbase + ".img"))
    print()

In [5]:
def removeVisits3to5(source):
    """
    Removes all images taken at visits 3-5
    """
    for root, dir, files in os.walk(source):
        for f in files:
            m = re.match(r"OAS2_[0-9]{4}_MR[0-9]{1}_V([0-9]{1})", f)
            session_num = int(m.groups()[0])
            if session_num > 2:
                print(f"Removing {f}")
                os.remove(os.path.join(root, f))

def removeSubjects(subjectIDs:list, source):
    for root, dir, files in os.walk(source):
        for f in files:
            for id in subjectIDs:
                m = re.match(re.compile("("+id+")"), f)
                if m!= None:
                    subjectIDs.remove(id)
                    os.remove(os.path.join(root, f))


In [6]:
# Only run once, to move data
# Set True to move files
move_files = False

In [7]:
if move_files:
  moveRename("datasets/OAS2", "datasets/OAS2_nii", 2733)

In [8]:
if move_files:
  convertToNii("datasets/OAS2_nii", 1366)

In [9]:
if move_files:
  removeVisits3to5("datasets/OAS2_nii")


In [10]:
# Remove subject MRIs who had an age > 95
if move_files:
  removeSubjects(["OAS2_0051_MR3", "OAS2_0087_MR1", "OAS2_0087_MR2"], os.getenv("OAS2NII"))

# Load Data

In [None]:
def moveByClass(df, num_files = 291):
  train_path = os.path.join(os.getenv("OAS2NII"), "train")
  test_path = os.path.join(os.getenv("OAS2NII"), "test")
  dem_path = os.path.join(os.getenv("OAS2NII"), "class_demented")
  ndem_path = os.path.join(os.getenv("OAS2NII"), "class_nondemented")
  if not os.path.exists(train_path):
    os.makedirs(train_path)
    if not os.path.exists(os.path.join(train_path, "class_demented")):
      os.makedirs(os.path.join(train_path, "class_demented"))
    if not os.path.exists(os.path.join(train_path, "class_nondemented")):
      os.makedirs(os.path.join(train_path, "class_nondemented"))
  if not os.path.exists(test_path):
    os.makedirs(test_path)
    if not os.path.exists(os.path.join(test_path, "class_demented")):
      os.makedirs(os.path.join(test_path, "class_demented"))
    if not os.path.exists(os.path.join(test_path, "class_nondemented")):
      os.makedirs(os.path.join(test_path, "class_nondemented"))

  new_paths = []
  i = 0
  for index, row in df.iterrows():
    old_name = row["file"]
    fname = os.path.split(old_name)[1]
    if row["Group"] == 0:
      new_name = os.path.join(os.getenv("OAS2NII"), row["Split"], "class_nondemented", fname)
    else:
      new_name = os.path.join(os.getenv("OAS2NII"), row["Split"], "class_demented", fname)
    new_paths += [new_name]
    printUpdate(i, num_files)
    i += 1
    os.rename(old_name, new_name)
    print(f'Old: {old_name}\nNew:{new_name}\n')

  files = [f for f in os.listdir(os.getenv("OAS2NII")) if os.path.isfile(os.path.join(os.getenv("OAS2NII"), f))]
  # for f in files:
  #   os.remove(os.path.join(os.getenv("OAS2NII"),f))

  df["file"] = new_paths


In [12]:
load_dotenv()
df = pd.read_excel("OAS2-normalized.xlsx")

In [13]:
fnames = []

df.set_index("MRI ID", inplace=True)

for index, row in df.iterrows():
  fname = index + "_V" + str(row["Visit"]+1) +  ".nifti.nii"
  fnames += [os.path.join(os.getenv("OAS2NII"),fname)]

df["file"] = fnames


In [14]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(df.drop(columns=["Group"]), df["Group"], test_size=0.2, stratify=df[['Group',"Sex_F"]])



# x_train["Split"] = ["Train"]*x_train.shape[0]
# # y_train["Split"] = ["Train"]*y_train.shape[0]
# x_test["Split"] = ["Test"]*x_test.shape[0]
# y_test["Split"] = ["Test"]*y_test.shape[0]


In [15]:
train = x_train.copy(deep=True)
train["Split"] = ["train"]*train.shape[0]
train["Group"] = y_train.values

test = x_test.copy(deep=True)
test["Split"] = ["test"]*test.shape[0]
test["Group"] = y_test.values

df = pd.merge(train, test, how="outer")


In [16]:
moveByClass(df)


0/291 (0.00%)[--------------------]


FileNotFoundError: [Errno 2] No such file or directory: 'datasets/OAS2_nii/OAS2_0001_MR1_V1.nifti.nii' -> 'datasets/OAS2_nii/train/class_nondemented/OAS2_0001_MR1_V1.nifti.nii'

# Keras

In [None]:
# Needed to add cuda to path for GPU utilization
# !source ~/.profile

In [None]:
import keras
from keras import layers
from keras import ops
import SimpleITK as sitk

2025-04-03 16:16:21.397187: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-03 16:16:21.529503: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1743718581.581876 1188558 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1743718581.596887 1188558 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1743718581.706000 1188558 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [None]:
reader = sitk.ImageFileReader()
reader.SetImageIO("NiftiImageIO")
reader.SetFileName(df["file"].iloc[0])
image = reader.Execute()
img_size = image.GetSize()
num_images = df.shape[0]


In [None]:
img_inputs = keras.Input(shape=(img_size[0],img_size[1],img_size[2],num_images))


In [None]:
dense = layers.Dense(64, activation="relu")
x = dense(img_inputs)
x = layers.Dense(64, activation="relu")(x)
outputs = layers.Dense(10)(x)
model = keras.Model(inputs=img_inputs, outputs=outputs, name="mnist_model")


I0000 00:00:1743718583.446580 1188558 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 7812 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3080, pci bus id: 0000:01:00.0, compute capability: 8.6


In [None]:
model.summary()

In [None]:
dset = keras.utils.image_dataset_from_directory(directory=os.getenv("OAS2NII"), labels="inferred")

Found 0 files belonging to 0 classes.


ValueError: No images found in directory datasets/OAS2_nii. Allowed formats: ('.bmp', '.gif', '.jpeg', '.jpg', '.png')