In [5]:
import os
import cv2
import numpy as np
import shutil
import random

In [6]:
def split_dataset(input_folder, output_folder='CNN_Train', train_ratio=0.8):
    # Define output folder paths
    output_train_folder = os.path.join(output_folder, 'Train')
    output_test_folder = os.path.join(output_folder, 'Test')

    print("Input folder:", input_folder)
    print("Output folder:", output_folder)
    print("Train folder:", output_train_folder)
    print("Test folder:", output_test_folder)

    # Create Train and Test folders if they don't exist
    if not os.path.exists(output_train_folder):
        os.makedirs(output_train_folder)
    if not os.path.exists(output_test_folder):
        os.makedirs(output_test_folder)

    # Get list of files
    input_train_folder = os.path.join(input_folder, 'Face_Cropped_Split/Train')
    input_test_folder = os.path.join(input_folder, 'Face_Cropped_Split/Test')

    print("Input train folder:", input_train_folder)
    print("Input test folder:", input_test_folder)

    train_files = os.listdir(input_train_folder)
    test_files = os.listdir(input_test_folder)

    print("Train files:", train_files)
    print("Test files:", test_files)

    # Shuffle the files
    random.shuffle(train_files)
    random.shuffle(test_files)

    # Calculate split indices
    num_train = int(len(train_files) * train_ratio)

    # Copy files to train and test folders and extract labels
    for file in train_files[:num_train]:
        src = os.path.join(input_train_folder, file)
        dst = os.path.join(output_train_folder, file)
        shutil.copy(src, dst)

        # Create label image
        label = int(file[:5])
        # Example: 100x100 grayscale image
        label_image = np.full((100, 100), label, dtype=np.uint8)
        cv2.imwrite(os.path.join(output_train_folder,
                    file[:-4] + '_label.jpg'), label_image)

    for file in test_files[num_train:]:
        src = os.path.join(input_test_folder, file)
        dst = os.path.join(output_test_folder, file)
        shutil.copy(src, dst)

        # Create label image
        label = int(file[:5])
        # Example: 100x100 grayscale image
        label_image = np.full((100, 100), label, dtype=np.uint8)
        cv2.imwrite(os.path.join(output_test_folder,
                    file[:-4] + '_label.jpg'), label_image)

    # Return the paths to the train and test folders
    return output_train_folder, output_test_folder