In [1]:
import os
import shutil
from sklearn.model_selection import train_test_split
import zipfile

# Define paths
working_dir = os.getcwd()  # Current working directory
dataset_dir = "/kaggle/input/dfer-facial"  # Source dataset folder
final_output_dir = os.path.join(working_dir, "final_dataset", "Train_Test")
zip_output_path = os.path.join(working_dir, "final_dataset.zip")  # Zipped file in Kaggle working

# Parameters
test_size = 0.2  # 20% test set

# Create directories for train-test splits
train_dir = os.path.join(final_output_dir, "train")
test_dir = os.path.join(final_output_dir, "test")
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# List all classes (assuming dataset has subfolders for each class)
classes = [d for d in os.listdir(dataset_dir) if os.path.isdir(os.path.join(dataset_dir, d))]

# Perform train-test split for each class
for cls in classes:
    cls_path = os.path.join(dataset_dir, cls)
    images = os.listdir(cls_path)
    
    # Split the images into train and test
    train_images, test_images = train_test_split(images, test_size=test_size, random_state=42)
    
    # Create class directories in train and test folders
    os.makedirs(os.path.join(train_dir, cls), exist_ok=True)
    os.makedirs(os.path.join(test_dir, cls), exist_ok=True)
    
    # Move train images
    for img in train_images:
        src = os.path.join(cls_path, img)
        dst = os.path.join(train_dir, cls, img)
        shutil.copy(src, dst)  # Use shutil.move if you want to move instead of copy
    
    # Move test images
    for img in test_images:
        src = os.path.join(cls_path, img)
        dst = os.path.join(test_dir, cls, img)
        shutil.copy(src, dst)

# Create a zip file of the final dataset
with zipfile.ZipFile(zip_output_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
    for root, dirs, files in os.walk(final_output_dir):
        for file in files:
            file_path = os.path.join(root, file)
            arcname = os.path.relpath(file_path, final_output_dir)  # Relative path for the zip
            zipf.write(file_path, arcname)

print(f"Train-test split completed! The zipped dataset is available at: {zip_output_path}")


Train-test split completed! The zipped dataset is available at: /kaggle/working/final_dataset.zip
