In [5]:
import os
import shutil
import re

def group_files_by_patient(source_folder):
  """
  Groups .npy files in the specified source folder by patient ID.
  This function scans the given source folder for .npy files, extracts the patient ID
  from each file name using a regex pattern, and moves the files into corresponding
  subfolders named after the patient IDs.
  Parameters:
  source_folder (str): The path to the folder containing the .npy files to be organized.
  Returns:
  None
  Example:
  If the source folder contains files named 'patient001_image.npy', 'patient002_image.npy',
  and 'patient001_label.npy', the function will create subfolders 'patient001' and 'patient002'
  in the source folder and move the files into these subfolders accordingly.
  """
  # List all .npy files in the folder
  files = [f for f in os.listdir(source_folder) if f.endswith(".npy")]

  # Regex pattern to extract patient ID (e.g., patientXXX)
  pattern = re.compile(r"^(patient\d+)_")

  for file in files:
    match = pattern.match(file)
    if match:
      patient_id = match.group(1)  # Extract patientXXX
      patient_folder = os.path.join(source_folder, patient_id)
      
      # Create the patient folder if it doesn't exist
      os.makedirs(patient_folder, exist_ok=True)
      
      # Move the file into the corresponding patient folder
      shutil.move(os.path.join(source_folder, file), os.path.join(patient_folder, file))

  print("Files have been grouped into their respective patient folders.")


In [4]:
group_files_by_patient(os.path.abspath(os.path.join(os.getcwd(), "../../Data/ACDC/database/processed_training")))
group_files_by_patient(os.path.abspath(os.path.join(os.getcwd(), "../../Data/ACDC/database/processed_testing")))

Files have been grouped into their respective patient folders.


FileNotFoundError: [WinError 3] The system cannot find the path specified: 'd:\\study\\graduation project\\GP-2025-Strain\\Data\\ACDC\\database\\processed_testing'