In [6]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Dataset Splitter

**Import Necessary Packages**

In [7]:
import os
import shutil
import numpy as np

**Splitter**

In [8]:
# Set the paths to your dataset directories
dataset_path = '/content/drive/MyDrive/FYP/UNET'  # Adjust this path
images_path = os.path.join(dataset_path, 'frames')
masks_path = os.path.join(dataset_path, 'masks')

# Create directories for the splits
splits = ['train', 'val', 'test']
for split in splits:
    os.makedirs(os.path.join(dataset_path, split, 'images'), exist_ok=True)
    os.makedirs(os.path.join(dataset_path, split, 'masks'), exist_ok=True)

# Load images and masks
image_files = os.listdir(images_path)
mask_files = os.listdir(masks_path)

# Ensure both lists are the same length
assert len(image_files) == len(mask_files), "Mismatch between images and masks count!"

# Shuffle the dataset
np.random.seed(42)  # For reproducibility
np.random.shuffle(image_files)

# Define split sizes
total_images = len(image_files)
train_size = int(0.7 * total_images)  # 70% for training
val_size = int(0.15 * total_images)    # 15% for validation
test_size = total_images - train_size - val_size  # 15% for testing

# Split the dataset
train_images = image_files[:train_size]
val_images = image_files[train_size:train_size + val_size]
test_images = image_files[train_size + val_size:]

# Copy images and masks to respective directories
def copy_files(file_list, source_folder, dest_folder):
    for file_name in file_list:
        shutil.copy(os.path.join(source_folder, file_name), os.path.join(dest_folder, file_name))

# Copy images
copy_files(train_images, images_path, os.path.join(dataset_path, 'train', 'images'))
copy_files(val_images, images_path, os.path.join(dataset_path, 'val', 'images'))
copy_files(test_images, images_path, os.path.join(dataset_path, 'test', 'images'))

# Copy corresponding masks
copy_files(train_images, masks_path, os.path.join(dataset_path, 'train', 'masks'))
copy_files(val_images, masks_path, os.path.join(dataset_path, 'val', 'masks'))
copy_files(test_images, masks_path, os.path.join(dataset_path, 'test', 'masks'))

print("Dataset split into train, val, and test sets successfully!")

Dataset split into train, val, and test sets successfully!
