# Splitting data into training and test set
- The data will be split into training and test sets. The training set will consist of 90% of the data, while the test set will consist of the remaining 10%. The object classes will be distributed uniformly across both sets.

- Defined path: full_data/train, full_data/test

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import numpy as np
import os
import shutil
import json

In [4]:
project_base = "/content/drive/MyDrive/MLDL/6D-Pose-Estimation"

In [15]:
def load_images(source_path, dest_fold):

  valid_folders = [f"{i:02d}" for i in range(1, 16) if f"{i:02d}" not in ["03", "07"]]

  image_labels = []

  for folder in valid_folders:

    images_path = os.path.join(source_path, folder, dest_fold)
    image_labels.append(sorted(os.listdir(images_path)))

  return image_labels


In [None]:
source_path = os.path.join(project_base, "data/raw/data")
image_labels = load_images(source_path, "rgb")

# Depth

In [16]:
source_path = os.path.join(project_base, "data/raw/data")
image_depth_labels = load_images(source_path, "depth")

# Name convention for images will be: *class* _*numofpicuture*

In [12]:
def move_all_images(image_labels, destination_path):

  images_base_path = os.path.join(project_base, "data/raw/data")
  valid_folders = [f"{i:02d}" for i in range(1, 16) if f"{i:02d}" not in ["03", "07"]]
  class_map = {idx: cls  for idx, cls in enumerate(valid_folders)}

  for i, image_group in enumerate(image_labels):
    for label in image_group:
      image_path = os.path.join(images_base_path, class_map[i], "rgb", label)
      dst_path = os.path.join(destination_path, f"{class_map[i]}_{label}")
      shutil.copy2(image_path, dst_path)

In [14]:
destination_path = os.path.join(project_base, "data/full_data/depth")
move_all_images(image_depth_labels, destination_path)

KeyboardInterrupt: 

In [None]:
from sklearn.model_selection import train_test_split

def split_into_train_test(input_path, input_files):

  valid_folders = [f"{i:02d}" for i in range(1, 16) if f"{i:02d}" not in ["03", "07"]]
  class_map = {idx: cls  for idx, cls in enumerate(valid_folders)}
  labels = [class_map[i] for i, img_group in enumerate(image_labels) for _ in img_group]

  X_train, X_test, y_train, y_test = train_test_split(
    all_images, labels, test_size=0.1, random_state=42, stratify=labels
  )

  return X_train, X_test



In [None]:
all_images = sorted(os.listdir(destination_path))
train_images, test_images = split_into_train_test(destination_path, all_images)

In [None]:
len(train_images), len(test_images)

(14220, 1580)

Now, I just want to transfer test images into another folder

In [None]:
def move_test_images(X_test, source_root, dest_root):

    os.makedirs(dest_root, exist_ok=True)

    try:
        for img_name in X_test:
            src_path = os.path.join(source_root, img_name)
            dst_path = os.path.join(dest_root, img_name)

            if os.path.exists(src_path):
                shutil.move(src_path, dst_path)
            else:
                print(f"Warning: {src_path} does not exist!")
    except Exception as e:
        print(str(e))


In [None]:
input_path = os.path.join(project_base, "data/full_data/images")
dest_path = os.path.join(project_base, "data/full_data/test/images")
move_test_images(test_images, input_path, dest_path)

In [None]:
def export_to_txt_files(train_images, test_images, dest_root):

  with open(dest_root, "w") as f:
    for item in train_images:
      f.write(f"{item}\n")

  with open(dest_root, "w") as f:
    for item in train_images:
      f.write(f"{item}\n")


In [None]:
def split_json(json_path, train_images, test_images, output_path_train, output_path_test):

  #Load json
  with open(json_path, "r") as f:
    data = json.load(f)
  train_json = {key:value for key, value in data.items() if f"{key}.png" in train_images}
  test_json = {key:value for key, value in data.items() if f"{key}.png" in test_images}

    # Export JSONs
  with open(output_path_train, "w") as f:
      json.dump(train_json, f, indent=4)

  with open(output_path_test, "w") as f:
      json.dump(test_json, f, indent=4)





In [None]:
train_images = os.listdir(os.path.join(project_base, "data/full_data/train/images"))
test_images = os.listdir(os.path.join(project_base, "data/full_data/test/images"))

In [None]:
gt_json_path = os.path.join(project_base, "data/key_points_data/2D_keypoints.json")

In [None]:
output_train_json = os.path.join(project_base, "data/key_points_data/train/2D_keypoints.json")
output_test_json = os.path.join(project_base, "data/key_points_data/test/2D_keypoints.json")

split_json(gt_json_path, train_images, test_images, output_train_json, output_test_json)

In [None]:
cropped_images = os.path.join(project_base, "data/key_points_data/heatmaps")
dest_root = os.path.join(project_base, "data/key_points_data/test/heatmaps")

In [None]:
move_test_images(test_images,cropped_images, dest_root)

In [None]:
def move_test_heatmaps(X_test, source_root, dest_root):

    os.makedirs(dest_root, exist_ok=True)

    try:
        for img_name in X_test:
            src_path = os.path.join(source_root, f"{img_name[:-4]}.pt")
            dst_path = os.path.join(dest_root, f"{img_name[:-4]}.pt")

            if os.path.exists(src_path):
                shutil.move(src_path, dst_path)
            else:
                print(f"Warning: {src_path} does not exist!")
    except Exception as e:
        print(str(e))

In [None]:
heatmaps = os.path.join(project_base, "data/key_points_data/heatmaps")
dest_root = os.path.join(project_base, "data/key_points_data/test/heatmaps")

In [None]:
move_test_heatmaps(test_images,heatmaps, dest_root)