# **Dataset Splitting**

The dataset is divided into **80% training and 20% testing sets** to ensure proper evaluation.  
- **Training set** is used for model learning.  
- **Validation set** is used to tune hyperparameters and monitor performance during training.  
- **Test set** provides an unbiased evaluation of the final model's accuracy.


In [1]:
import os
import shutil
import random

# Paths
raw_dir = "dataset"
output_dir = "resnet_dataset"  

# Split ratios (updated for maximum accuracy)
train_ratio = 0.7
val_ratio = 0.15
test_ratio = 0.15

classes = ["healthy child face", "malnourished child face"]

for split in ["train", "val", "test"]:
    for cls in classes:
        os.makedirs(os.path.join(output_dir, split, cls), exist_ok=True)


for cls in classes:
    src_folder = os.path.join(raw_dir, cls)
    images = [f for f in os.listdir(src_folder) if os.path.isfile(os.path.join(src_folder, f))]
    random.shuffle(images)
    
    n = len(images)
    n_train = int(n * train_ratio)
    n_val = int(n * val_ratio)
    
    train_imgs = images[:n_train]
    val_imgs = images[n_train:n_train + n_val]
    test_imgs = images[n_train + n_val:]
    
    # Copy files to respective folders
    for img_list, split in [(train_imgs, "train"), (val_imgs, "val"), (test_imgs, "test")]:
        for fname in img_list:
            src = os.path.join(src_folder, fname)
            dst = os.path.join(output_dir, split, cls, fname)
            shutil.copy(src, dst)

print("✅ Dataset split into train/val/test successfully (70-15-15)!")


✅ Dataset split into train/val/test successfully (70-15-15)!
