In [1]:
import os
import re
import numpy as np


cubes_dir = "./Data/Images/Data Cubes"
walkthrough = os.walk(cubes_dir)
for root, folders, files in walkthrough:
  break
folders = [zarr for zarr in folders if zarr.endswith(".zarr")]
samples = len(folders)
pattern = re.compile(r"Data_Cube_(\d+)\.zarr")

print(f"[INFO] Samples: {samples}")
print(f"[INFO] Selected for test and for validation: {int(samples * 0.15)}; for training: {int(samples * 0.7)}")


rng = np.random.default_rng(164953)
test = rng.choice(folders, int(samples * 0.15), replace=False)
test_ids = [int(pattern.search(id).group(1)) for id in test]
print()
print(f"[DEBUG] Test size: {len(test)}")
print(f"[DEBUG] Test ids: {test_ids}")
print(f"[INFO] Moving test folders [", end="")
os.makedirs(f"{cubes_dir}/Test", exist_ok=True)
for i, folder in enumerate(test):
  if not (i + 1) % (len(test) // 100): print(".", end="")
  os.rename(f"{os.path.join(cubes_dir, folder)}", f"{cubes_dir}/Test/{folder}")
print("]")

walkthrough = os.walk(cubes_dir)
for root, folders, files in walkthrough:
  break
folders = [zarr for zarr in folders if zarr.endswith(".zarr")]
val = rng.choice(folders, int(samples * 0.15), replace=False)
val_ids = [int(pattern.search(id).group(1)) for id in val]
print()
print(f"[DEBUG] Validation size: {len(val)}")
print(f"[DEBUG] Validation ids: {val_ids}")
print(f"[INFO] Moving validation folders [", end="")
os.makedirs(f"{cubes_dir}/Val", exist_ok=True)
for i, folder in enumerate(val):
  if not (i + 1) % (len(val) // 100): print(".", end="")
  os.rename(f"{os.path.join(cubes_dir, folder)}", f"{cubes_dir}/Val/{folder}")
print("]")


walkthrough = os.walk(cubes_dir)
for root, folders, files in walkthrough:
  break
folders = [zarr for zarr in folders if zarr.endswith(".zarr")]
train = folders
train_ids = [int(pattern.search(id).group(1)) for id in train]
print()
print(f"[DEBUG] Train size: {len(train)}")
print(f"[DEBUG] Train ids: {train_ids}")
print(f"[INFO] Moving train folders [", end="")
os.makedirs(f"{cubes_dir}/Train", exist_ok=True)
for i, folder in enumerate(train):
  if not (i + 1) % (len(train) // 100): print(".", end="")
  os.rename(f"{os.path.join(cubes_dir, folder)}", f"{cubes_dir}/Train/{folder}")
print("]")


log_file = "./Log/Train Test Split.txt"
with open(log_file, "w") as file:
  file.writelines(f"[INFO] Samples: {samples}\n")
  file.writelines(f"[INFO] Selected for test and for validation: {int(samples * 0.15)}; for training: {int(samples * 0.7)}\n\n")
  file.writelines(f"[DEBUG] Test size: {len(test)}\n")
  file.writelines(f"[DEBUG] Test ids: {test_ids}\n\n")
  file.writelines(f"[DEBUG] Validation size: {len(val)}\n")
  file.writelines(f"[DEBUG] Validation ids: {val_ids}\n\n")
  file.writelines(f"[DEBUG] Train size: {len(train)}\n")
  file.writelines(f"[DEBUG] Train ids: {train_ids}\n")


[INFO] Samples: 6609
[INFO] Selected for test and for validation: 991; for training: 4626

[DEBUG] Test size: 991
[DEBUG] Test ids: [13758, 3011, 12383, 10904, 3203, 1141, 5554, 6518, 9141, 10416, 3661, 5456, 6918, 4055, 8583, 10112, 10217, 9921, 4480, 2332, 9755, 2394, 11598, 6120, 13457, 9329, 5742, 5747, 9940, 6885, 5755, 3294, 9359, 4675, 8845, 8148, 3770, 7970, 8558, 13649, 9426, 3081, 8959, 2804, 4001, 11198, 10606, 2525, 10808, 4344, 2016, 12588, 2891, 4352, 1739, 9751, 5241, 6432, 7975, 1237, 3503, 8762, 13740, 13835, 11768, 5042, 3955, 2009, 12285, 11305, 12762, 5944, 2915, 4474, 3804, 13756, 8668, 5363, 13350, 11807, 9936, 856, 4262, 3901, 2717, 11223, 8563, 10830, 4242, 12449, 4088, 12155, 4971, 10213, 8372, 4862, 9041, 12169, 4098, 9425, 7392, 2313, 9320, 9938, 8553, 10816, 3662, 10113, 3209, 2995, 5736, 3010, 3184, 2011, 6899, 14533, 9149, 7864, 2029, 3077, 8386, 8542, 3000, 12181, 9637, 9044, 13353, 5231, 1545, 5338, 7691, 8272, 5454, 2118, 9933, 2110, 6980, 10540, 10406,