In [1]:
import os  # Module for interacting with the operating system

from tensorflow.keras.preprocessing import image_dataset_from_directory  # Load images from a directory and create a dataset



Loading images from a specified directory.

Automatically inferring labels from subdirectory names.

Shuffling and batching the images.

Resizing images to a consistent size.

Splitting the data into training and validation sets.

Verifying the dataset structure by printing out class names and inspecting a batch of images and labels.

In [2]:
BATCH_SIZE = 32
IMG_SIZE = (224, 224)
directory = "D:/GROUP6/images/images/"

# Define the training dataset
train_dataset = image_dataset_from_directory(
    directory,
    labels='inferred',  # Automatically infers labels from subdirectory names
    label_mode='categorical',  # Return labels as one-hot encoded vectors
    shuffle=True,  # Shuffle the dataset to ensure random distribution
    batch_size=BATCH_SIZE,  # Number of images to process in a single batch
    image_size=IMG_SIZE,  # Resize all images to 224x224 pixels
    validation_split=0.2,  # Reserve 20% of the data for validation
    subset='training',  # This dataset is for training
    seed=42  # Random seed for reproducibility
)

# Define the validation dataset
validation_dataset = image_dataset_from_directory(
    directory,
    labels='inferred',  # Automatically infers labels from subdirectory names
    label_mode='categorical',  # Return labels as one-hot encoded vectors
    shuffle=True,  # Shuffle the dataset to ensure random distribution
    batch_size=BATCH_SIZE,  # Number of images to process in a single batch
    image_size=IMG_SIZE,  # Resize all images to 224x224 pixels
    validation_split=0.2,  # Reserve 20% of the data for validation
    subset='validation',  # This dataset is for validation
    seed=42  # Random seed for reproducibility
)

Found 15000 files belonging to 30 classes.
Using 12000 files for training.
Found 15000 files belonging to 30 classes.
Using 3000 files for validation.


In [3]:
# Verify the dataset structure
class_names = train_dataset.class_names  # Retrieve the class names inferred from the subdirectory names

# Print each waste class name on a new line
num_classes = len(class_names)
print(f"Number of waste classes: {num_classes}")
print("Waste class names:")
for class_name in class_names:
    print(class_name)

Number of waste classes: 30
Waste class names:
aerosol_cans
aluminum_food_cans
aluminum_soda_cans
cardboard_boxes
cardboard_packaging
clothing
coffee_grounds
disposable_plastic_cutlery
eggshells
food_waste
glass_beverage_bottles
glass_cosmetic_containers
glass_food_jars
magazines
newspaper
office_paper
paper_cups
plastic_cup_lids
plastic_detergent_bottles
plastic_food_containers
plastic_shopping_bags
plastic_soda_bottles
plastic_straws
plastic_trash_bags
plastic_water_bottles
shoes
steel_food_cans
styrofoam_cups
styrofoam_food_containers
tea_bags


In [4]:
# Take one batch of images and labels from the training dataset to inspect
for image_batch, label_batch in train_dataset.take(1):
    print(f"Image batch shape: {image_batch.shape}")  # Print the shape of the image batch
    print(f"Label batch shape: {label_batch.shape}")  # Print the shape of the label batch
    print(f"Labels: {label_batch.numpy()}")  # Convert the label batch to a numpy array and print it

Image batch shape: (32, 224, 224, 3)
Label batch shape: (32, 30)
Labels: [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0