Dataset used
I have collected data from TrashNet, which includes images of various trash items labelled trash (these are not recyclable), then there are multiple classes of recyclable trash such as paper, cardboard, glass, metal and plastic

1211 images are there for the training set and 508 images for the test set

Images are pre-labelled


In [1]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt

In [5]:
import os
import shutil
import numpy as np
from sklearn.model_selection import train_test_split

def create_splits(data_dir, output_dir, test_size=0.10, val_size=0.15):
    # Create directories for the train, validation, and test sets
    train_dir = os.path.join(output_dir, 'train')
    val_dir = os.path.join(output_dir, 'val')
    test_dir = os.path.join(output_dir, 'test')

    for d in [train_dir, val_dir, test_dir]:
        if not os.path.exists(d):
            os.makedirs(d)

    # Process each class directory
    for class_name in os.listdir(data_dir):
        class_dir = os.path.join(data_dir, class_name)
        if not os.path.isdir(class_dir):
            continue
        
        # Create class directories in train, val, test
        for d in [train_dir, val_dir, test_dir]:
            class_dir_out = os.path.join(d, class_name)
            if not os.path.exists(class_dir_out):
                os.makedirs(class_dir_out)
        
        # Get all images and split them
        images = [os.path.join(class_dir, img) for img in os.listdir(class_dir) if img.endswith(('png', 'jpg', 'jpeg'))]
        train_val, test_images = train_test_split(images, test_size=test_size, random_state=42)
        train_images, val_images = train_test_split(train_val, test_size=val_size / (1 - test_size), random_state=42)
        print(f"Class {class_name}: {len(train_images)} training, {len(val_images)} validation")
        
        # Function to copy images to the respective directories
        def copy_images(image_list, output_dir):
            for image in image_list:
                dest = os.path.join(output_dir, class_name, os.path.basename(image))
                shutil.copy(image, dest)

        # Copy images to their respective directories
        copy_images(train_images, train_dir)
        copy_images(val_images, val_dir)
        copy_images(test_images, test_dir)

# Set the paths
original_data_dir = r'C:\Users\vidia\OneDrive\Documents\mlp_proj\mlp_assignment\data\dataset-resized'
output_data_dir = r'C:\Users\vidia\OneDrive\Documents\mlp_proj\mlp_assignment\src\dataset_split'

# Create splits
create_splits(original_data_dir, output_data_dir)