## Setup

In [1]:
import os
import shutil
import random

## The table data met labels (copy of the collect data)¶

In [2]:
# Create an array containing the table data with an index
table_data = [
    ["Label", "Gaze Direction", "Sentence"],
    ["forward", "Forward", "Look forward."],
    ["left", "Left", "Look to the left."],
    ["right", "Right", "Look to the right."],
    ["mirror_interior", "Interior Mirror", "Look at the interior mirror."],
    ["mirror_right", "Right Side Mirror", "Look at the right side mirror."],
    ["mirror_left", "Left Side Mirror", "Look at the left side mirror."],
    ["shoulder_right", "Right Shoulder", "Look over your right shoulder."],
    ["shoulder_left", "Left Shoulder", "Look over your left shoulder."],
    ["dashboard_straight_down", "Dashboard Straight Down", "Look straight down at the dashboard."],
    ["dashboard_down_right", "Dashboard Down Towards Center Console", "Look down towards the center console."],
    ["forward_right", "Forward Right", "Look forward and to the right."],
    ["forward_left", "Forward Left", "Look forward and to the left."]
]

# To access, for example, the row for "forward_right" and its Sentence column:
# Note that Python is 0-indexed, so row 11 (forward_right) is at index 10 in the array
# print(table_data[11][2])  # Output: Look forward and to the right.


### Global Variables

In [3]:
# General path for the data
base_path = r"C:\GazeDetection\test_recording_correct_upload_3"

# Extract labels assuming table_data is defined as above
# Assuming the first row is headers, skip it
labels = [row[0] for row in table_data[1:]]


# Set types of datasets
types = ["train", "test", "val"]

# Set the percentage of files to copy
copy_percentage = 25  # Adjust as needed

## Copies a specified percentage from the total (base_path) to a new set (base_path_percent)

In [None]:
# New main directory based on percentage
new_base_path = f"{base_path}_{copy_percentage}percent"
if not os.path.exists(new_base_path):
    os.makedirs(new_base_path, exist_ok=True)
    # Create subdirectories within the new main directory
    for type in types:
        os.makedirs(os.path.join(new_base_path, type), exist_ok=True)

for label in labels:
    print(f"Processing label: {label}")
    for type in types:
        source_dir = os.path.join(base_path, type, label)
        dest_dir = os.path.join(new_base_path, type, label)
        if not os.path.exists(dest_dir):
            os.makedirs(dest_dir, exist_ok=True)
        
        files = os.listdir(source_dir)
        total_files = len(files)
        random.shuffle(files)  # Shuffle to select random files
        num_files_to_copy = max(1, len(files) * copy_percentage // 100)  # Calculate specified percentage
        
        for file in files[:num_files_to_copy]:
            source_file = os.path.join(source_dir, file)
            destination_file = os.path.join(dest_dir, file)
            shutil.copy2(source_file, destination_file)  # Copy files
        
        print(f"{type.capitalize()} directory: {total_files} files.")
        print(f"Copied {num_files_to_copy} files to {dest_dir} for {label}")