In [None]:
import os
import shutil
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
dataset_path = "/content/drive/MyDrive/Waste Classification Project/dataset"
output_path = "/content/drive/MyDrive/Waste Classification Project/dataset_processed"

# Load the image file paths from the hazardous and non-hazardous subfolders into separate lists.
hazardous_files = [os.path.join(dataset_path, "hazardous", f) for f in os.listdir(os.path.join(dataset_path, "hazardous"))]
non_hazardous_files = [os.path.join(dataset_path, "non-hazardous", f) for f in os.listdir(os.path.join(dataset_path, "non-hazardous"))]

all_files = hazardous_files + non_hazardous_files
# Labels for each image (1 for hazardous, 0 for non-hazardous).
labels = [1] * len(hazardous_files) + [0] * len(non_hazardous_files)

In [None]:
# Split dataset into train, validation, and test (70% train, 15% validation, 15% test)
train_files, temp_files, train_labels, temp_labels = train_test_split(all_files, labels, test_size=0.3, stratify=labels, random_state=42)
val_files, test_files, val_labels, test_labels = train_test_split(temp_files, temp_labels, test_size=0.5, stratify=temp_labels, random_state=42)

In [None]:
# Function to copy files into appropriate folder (train, val, test)
def move_files(files, labels, split):
  for file, label in zip(files, labels):
    class_name = "hazardous" if label == 1 else "non-hazardous"
    split_path = os.path.join(output_path, split, class_name)
    os.makedirs(split_path, exist_ok=True)
    shutil.copy(file, split_path)

In [None]:
# Move files to respective folders (train, val, test)
move_files(train_files, train_labels, "train")
move_files(val_files, val_labels, "val")
move_files(test_files, test_labels, "test")

In [None]:
# Set up ImageDataGenerator for augmentation and preprocessing
datagen = ImageDataGenerator(
    rescale=1./255,            # Normalize pixel values to between 0 and 1
    rotation_range=40,         # Random rotation range in degrees
    width_shift_range=0.2,     # Horizontal shift
    height_shift_range=0.2,    # Vertical shift
    shear_range=0.2,           # Shear transformation
    zoom_range=0.2,            # Zoom in/out
    horizontal_flip=True,      # Randomly flip images horizontally
    fill_mode='nearest'        # Fill missing pixels after transformations
)

In [None]:
# Apply preprocessing and augmentation on the training set
train_generator = datagen.flow_from_directory(
    os.path.join(output_path, 'train'),
    target_size=(224, 224),  # Resize images to 224x224
    batch_size=32,
    class_mode='binary'      # Binary classification: hazardous vs non-hazardous
)

In [None]:
# Set up a simple validation generator (no augmentation, only rescaling)
test_val_datagen = ImageDataGenerator(rescale=1./255)
val_generator = test_val_datagen.flow_from_directory(
    os.path.join(output_path, 'val'),
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary'
)

# Test generator (no augmentation)
test_generator = test_val_datagen.flow_from_directory(
    os.path.join(output_path, 'test'),
    target_size=(224, 224),
    batch_size=32,
    class_mode='binary'
)

In [None]:
print("Dataset preprocessing, augmentation, and splitting completed!")