In [3]:
import sys
import pathlib

In [5]:
DATA_ROOT = pathlib.Path("../") / "data" / "20241212_ThyRedo"

In [6]:
# List of folders
folders = [f for f in DATA_ROOT.iterdir() if f.is_dir()]

In [7]:
folders

[PosixPath('../data/20241212_ThyRedo/Cancer'),
 PosixPath('../data/20241212_ThyRedo/Normal')]

In [15]:
OUTPUT_DIRECTORY = pathlib.Path("../") / "data" / "raw2"

In [18]:
for f in folders:
    output_p = 0
    print(f"Processing folder: {f.name}")
    patients = [p for p in f.iterdir() if p.is_dir()]
    print(f"Found {len(patients)} patients in {f.name}")
    for p in patients:
        print(f"  Patient: {p.name}")
        fovs = [fo for fo in p.iterdir() if fo.is_dir()]
        output_p += 1
        # Remove all fovs that do not have a number
        fovs = [fo for fo in fovs if any(char.isdigit() for char in fo.name)]
        print(f"  Found {len(fovs)} FOVs in patient {p.name}")
        for fov in fovs:
            print(f"    FOV: {fov.name}")
            MIP_Folder = fov / "MIP_images" / "A"
            files = list(MIP_Folder.glob("*.tif"))
            print(f"    Found {len(files)} MIP images in FOV {fov.name}")
            for file in files:
                print(f"      File: {file.name}")
                new_file_name = f"{f.name}_P{output_p}_F{fov.name}_I{file.name}"
                output_file = OUTPUT_DIRECTORY / new_file_name
                print(f"      Output File: {output_file}")
                # Copy file to output directory
                output_file.parent.mkdir(parents=True, exist_ok=True)
                file.rename(output_file)
                print(f"      Moved {file.name} to {output_file}")


Processing folder: Cancer
Found 5 patients in Cancer
  Patient: Box1 03a1
  Found 5 FOVs in patient Box1 03a1
    FOV: 1
    Found 9 MIP images in FOV 1
      File: 9.tif
      Output File: ../data/raw2/Cancer_P1_F1_I9.tif
      Moved 9.tif to ../data/raw2/Cancer_P1_F1_I9.tif
      File: 8.tif
      Output File: ../data/raw2/Cancer_P1_F1_I8.tif
      Moved 8.tif to ../data/raw2/Cancer_P1_F1_I8.tif
      File: 3.tif
      Output File: ../data/raw2/Cancer_P1_F1_I3.tif
      Moved 3.tif to ../data/raw2/Cancer_P1_F1_I3.tif
      File: 2.tif
      Output File: ../data/raw2/Cancer_P1_F1_I2.tif
      Moved 2.tif to ../data/raw2/Cancer_P1_F1_I2.tif
      File: 1.tif
      Output File: ../data/raw2/Cancer_P1_F1_I1.tif
      Moved 1.tif to ../data/raw2/Cancer_P1_F1_I1.tif
      File: 5.tif
      Output File: ../data/raw2/Cancer_P1_F1_I5.tif
      Moved 5.tif to ../data/raw2/Cancer_P1_F1_I5.tif
      File: 4.tif
      Output File: ../data/raw2/Cancer_P1_F1_I4.tif
      Moved 4.tif to ../data/raw2

In [21]:
#Inside the raw2 folder move all the cancer files to a separate folder
cancer_folder = OUTPUT_DIRECTORY / "cancerous"
cancer_folder.mkdir(parents=True, exist_ok=True)

# Get all the .tif files in the output directory and move cancer files
files = list(OUTPUT_DIRECTORY.glob("*.tif"))

for f in files:
    if "cancer" in f.name.lower():
        f.rename(cancer_folder / f.name)
        
print(f"Moved cancer files to {cancer_folder}")

Moved cancer files to ../data/raw2/cancerous


In [22]:
#Inside the raw2 folder move all the cancer files to a separate folder
normal_folder = OUTPUT_DIRECTORY / "normal"
normal_folder.mkdir(parents=True, exist_ok=True)

# Get all the .tif files in the output directory and move normal files
files = list(OUTPUT_DIRECTORY.glob("*.tif"))

for f in files:
    if "normal" in f.name.lower():
        f.rename(normal_folder / f.name)

print(f"Moved normal files to {normal_folder}")

Moved normal files to ../data/raw2/normal
