In [33]:
import os
import shutil
import re
import pandas as pd
import numpy as np
from dotenv import load_dotenv
import nibabel as nib

# Data Organization

In [34]:
def moveRename(source, dest):
    """
    Moves all .hdr and .img images from source location to a single directory
    """
    rawDirPat = r"(?:[\W\S]+?)OAS2_([0-9]{4})_MR([0-9]{1})/RAW"
    for root, dir, files in os.walk(source):
        r_match = re.findall(rawDirPat, root)
        if len(r_match) > 0:
            subID = r_match[0][0]
            session = r_match[0][1]
            new_name = f"{subID}_{session}"
            for f in files:
                fname, fext = os.path.splitext(f)
                if fext == ".img" or fext == ".hdr":
                    f_match = re.findall(r"mpr-([0-9]{1}).nifti", fname)
                    if len(f_match) > 0:
                        f_num = f_match[0]
                        old_name = os.path.join(root, f)
                        new_name = os.path.join(
                            dest, f"OAS2_{subID}_MR{f_num}_V{session}.nifti{fext}"
                        )
                        print(f"Copying {new_name}")
                        shutil.copy2(old_name, new_name)

In [35]:
def convertToNii(source):
    """
    Convert images from .img to .nii format and get rid of .img and .hdr files.
    """
    for root, dir, files in os.walk(source):
        for f in files:
            fbase, fext = os.path.splitext(f)
            if fext == ".img":
                print(f"Converting {f}")
                fname = os.path.join(root, f)
                img = nib.load(fname)
                nib.save(img, fname.replace(".img", ".nii"))
                os.remove(os.path.join(root, fbase + ".hdr"))
                os.remove(os.path.join(root, fbase + ".img"))

In [None]:
def removeVisits3to5(source):
    """
    Removes all images taken at visits 3-5
    """
    for root, dir, files in os.walk(source):
        for f in files:
            m = re.match(r"OAS2_[0-9]{4}_MR[0-9]{1}_V([0-9]{1})", f)
            session_num = int(m.groups()[0])
            if session_num >= 3:
                print(f"Removing {f}")
                os.remove(os.path.join(root, f))

def removeSubjects(subjectIDs:list, source):
    for root, dir, files in os.walk(source):
        for f in files:
            for id in subjectIDs:
                m = re.match(re.compile("("+id+")"), f)
                if m!= None:
                    subjectIDs.remove(id)
                    os.remove(os.path.join(root, f))


In [None]:
# Only run once, to move data
# moveRename("datasets/OAS2", "datasets/OAS2_nii")
# convertToNii("datasets/OAS2_nii")
# removeVisits3to5("datasets/OAS2_nii")
# # Remove subject MRIs who had an age > 95
# removeSubjects(["OAS2_0051_MR3", "OAS2_0087_MR1", "OAS2_0087_MR2"], os.getenv("OAS2NII"))

# Load Data

In [38]:
load_dotenv()
df = pd.read_excel("OAS2-normalized.xlsx")

In [None]:
fnames = []

for index, row in df.iterrows():
  fname = row["MRI ID"] + "_V" + str(row["Visit"]+1) +  ".nifti.nii"
  fnames += [os.path.join(os.getenv("OAS2NII"),fname)]

df["file"] = fnames


291
(291, 14)
