# Data Preprocessing Step 


In [1]:
import pandas as pd
from pathlib import Path

df = pd.read_csv('subject_lookup.csv')
mapping = dict(zip(df["OrigCode"], df["SubjectID"]))

# Path to the folder containing subject directories
base_dir = Path("/home/smooi/Desktop/toast/data/toast_pipe_data")  # Change this if needed

# Rename matching folders
for folder in base_dir.iterdir():
    if folder.is_dir() and folder.name in mapping:
        new_name = mapping[folder.name]
        new_path = folder.parent / new_name
        if not new_path.exists():
            print(f"Renaming: {folder.name} → {new_name}")
            folder.rename(new_path)

Renaming: S_aEA_1069A → S016
Renaming: G_Nemours_0020 → G044
Renaming: G_2023-U7487-00427-MG → G003
Renaming: S_Cc_103Pre → S048
Renaming: S_Cc_MreVolHrsb201 → S062
Renaming: S_aU01_006 → S044
Renaming: S_aSTE_005 → S029
Renaming: G_Nemours_0023 → G047
Renaming: S_aEA_1049A → S003
Renaming: S_aEA_1065A → S015
Renaming: S_MreVol_9 → S072
Renaming: G_Nemours_0025 → G049
Renaming: S_MreVol_6 → S069
Renaming: S_aU01_D2002 → S045
Renaming: G_Nemours_0001 → G025
Renaming: S_aU01_004 → S042
Renaming: G_Nemours_0012 → G036
Renaming: G_Nemours_0024 → G048
Renaming: S_aEA_1078B → S023
Renaming: G_2023-U7487-0719-MS → G015
Renaming: G_2023-U7487-0706-VP → G014
Renaming: S_aEA_1076A → S022
Renaming: G_Nemours_0004 → G028
Renaming: G_2023-U7778-0188-JG → G022
Renaming: S_aSTE_018 → S038
Renaming: G_Nemours_0009 → G033
Renaming: G_Nemours_0003 → G027
Renaming: G_2023-U7487-00443-EM → G004
Renaming: G_2023-U7778-0177-EC → G021
Renaming: S_Cc_114 → S059
Renaming: S_aSTE_010 → S034
Renaming: G_2023-U74

In [3]:
from pathlib import Path

def rename_files_by_subject_id(root_dir):
    root = Path(root_dir)
    for subject_folder in root.iterdir():
        if subject_folder.is_dir():
            subject_id = subject_folder.name
            for file in subject_folder.iterdir():
                if file.is_file():
                    new_name = f"{subject_id}_{file.name}"
                    new_path = file.parent / new_name
                    file.rename(new_path)
                    print(f"Renamed: {file.name} → {new_name}")

# Example usage
rename_files_by_subject_id("/home/smooi/Desktop/toast/data/toast_pipe_data")   # e.g., "toast_pipe_data"


Renamed: t2bet.nii → G041_t2bet.nii
Renamed: stiff_prob.mat → G041_stiff_prob.mat
Renamed: mask.mat → G041_mask.mat
Renamed: t2stack.nii → G041_t2stack.nii
Renamed: t2stack.mat → G041_t2stack.mat
Renamed: Mu.mat → G041_Mu.mat
Renamed: t2bet_norm.nii → G041_t2bet_norm.nii
Renamed: t2bet.nii → G053_t2bet.nii
Renamed: stiff_prob.mat → G053_stiff_prob.mat
Renamed: mask.mat → G053_mask.mat
Renamed: t2stack.nii → G053_t2stack.nii
Renamed: t2stack.mat → G053_t2stack.mat
Renamed: Mu.mat → G053_Mu.mat
Renamed: t2bet_norm.nii → G053_t2bet_norm.nii
Renamed: t2bet.nii → G014_t2bet.nii
Renamed: stiff_prob.mat → G014_stiff_prob.mat
Renamed: mask.mat → G014_mask.mat
Renamed: t2stack.nii → G014_t2stack.nii
Renamed: t2stack.mat → G014_t2stack.mat
Renamed: Mu.mat → G014_Mu.mat
Renamed: t2bet_norm.nii → G014_t2bet_norm.nii
Renamed: t2bet.nii → G047_t2bet.nii
Renamed: stiff_prob.mat → G047_stiff_prob.mat
Renamed: mask.mat → G047_mask.mat
Renamed: t2stack.nii → G047_t2stack.nii
Renamed: t2stack.mat → G047_

In [4]:
! pwd 

/home/smooi/Desktop/toast


In [7]:
from pathlib import Path
import shutil

root_dir = Path("/home/smooi/Desktop/toast/data/toast_pipe_data")
mask_dir = root_dir / "mask"
t2stack_dir = root_dir / "t2stack"

# Ensure destination directories exist
mask_dir.mkdir(exist_ok=True)
t2stack_dir.mkdir(exist_ok=True)

# Traverse all subfolders
for subject_folder in root_dir.iterdir():
    if subject_folder.is_dir() and subject_folder.name not in ["mask", "t2stack", "empty_data"]:
        # Find files matching *_mask.mat and *_t2stack.nii
        for file in subject_folder.glob("*_mask.mat"):
            dest = mask_dir / file.name
            shutil.copy2(file, dest)
            print(f"Copied: {file} → {dest}")

        for file in subject_folder.glob("*_t2stack.nii"):
            dest = t2stack_dir / file.name
            shutil.copy2(file, dest)
            print(f"Copied: {file} → {dest}")


Copied: /home/smooi/Desktop/toast/data/toast_pipe_data/G041/G041_mask.mat → /home/smooi/Desktop/toast/data/toast_pipe_data/mask/G041_mask.mat
Copied: /home/smooi/Desktop/toast/data/toast_pipe_data/G041/G041_t2stack.nii → /home/smooi/Desktop/toast/data/toast_pipe_data/t2stack/G041_t2stack.nii
Copied: /home/smooi/Desktop/toast/data/toast_pipe_data/G053/G053_mask.mat → /home/smooi/Desktop/toast/data/toast_pipe_data/mask/G053_mask.mat
Copied: /home/smooi/Desktop/toast/data/toast_pipe_data/G053/G053_t2stack.nii → /home/smooi/Desktop/toast/data/toast_pipe_data/t2stack/G053_t2stack.nii
Copied: /home/smooi/Desktop/toast/data/toast_pipe_data/G014/G014_mask.mat → /home/smooi/Desktop/toast/data/toast_pipe_data/mask/G014_mask.mat
Copied: /home/smooi/Desktop/toast/data/toast_pipe_data/G014/G014_t2stack.nii → /home/smooi/Desktop/toast/data/toast_pipe_data/t2stack/G014_t2stack.nii
Copied: /home/smooi/Desktop/toast/data/toast_pipe_data/G047/G047_mask.mat → /home/smooi/Desktop/toast/data/toast_pipe_dat

In [8]:
# Count the number of files in mask and t2stack directories
mask_count = len(list(mask_dir.glob("*")))
t2stack_count = len(list(t2stack_dir.glob("*")))
print(f"Number of files in mask directory: {mask_count}")
print(f"Number of files in t2stack directory: {t2stack_count}")

Number of files in mask directory: 135
Number of files in t2stack directory: 135
