In [None]:
import re
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier

from tqdm import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset, random_split

from datasets import load_dataset

In [7]:
ds = load_dataset("garythung/trashnet")

### Inspecting the Dataset

This section prints the overall dataset structure and previews the first five samples from the training subset.  
It helps verify that the dataset has been loaded correctly and provides an overview of the data fields, including images and their corresponding labels.


In [8]:
print(ds)
train_ds = ds['train']  # or ds['test']
print(train_ds[:5])  # first 5 examples

DatasetDict({
    train: Dataset({
        features: ['image', 'label'],
        num_rows: 5054
    })
})
{'image': [<PIL.Image.Image image mode=RGB size=3024x4032 at 0x1F2A94F3BD0>, <PIL.Image.Image image mode=RGB size=3024x4032 at 0x1F290BC4E10>, <PIL.Image.Image image mode=RGB size=4032x3024 at 0x1F28FF31FD0>, <PIL.Image.Image image mode=RGB size=3024x4032 at 0x1F2A98E00D0>, <PIL.Image.Image image mode=RGB size=3024x4032 at 0x1F2A97BB7D0>], 'label': [0, 0, 0, 0, 0]}


### CNN Image Classification on TrashNet Dataset

This section trains a convolutional neural network (CNN) on the TrashNet dataset using PyTorch.  
The process includes loading and preprocessing images, splitting the dataset into training and testing sets, defining a CNN architecture, and training the model for three epochs.  
After training, the model is evaluated on the test set to compute overall classification accuracy.


In [None]:
# --- Load dataset ---
# Assigns a preloaded dataset object (ds) to the variable 'dataset'
dataset = ds

# --- Preprocessing ---
# Defines a sequence of image transformations:
# 1. Resizes all images to 64x64 pixels
# 2. Converts PIL images to PyTorch tensors
# 3. Normalizes pixel values to the range [-1, 1] using mean and standard deviation of 0.5
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Custom PyTorch Dataset class for handling the Hugging Face dataset
class TrashNetTorch(Dataset):
    def __init__(self, hf_dataset, transform=None):
        # Stores dataset and optional transformation
        self.data = hf_dataset
        self.transform = transform

    def __len__(self):
        # Returns total number of samples in the dataset
        return len(self.data)

    def __getitem__(self, idx):
        # Retrieves one sample (image and label) from the dataset
        image = self.data[idx]["image"].convert("RGB")  # Ensures image is in RGB format
        label = self.data[idx]["label"]
        # Applies transformation if provided
        if self.transform:
            image = self.transform(image)
        return image, label

# --- Manual split (80% train, 20% test) ---
# Converts Hugging Face dataset to a PyTorch Dataset and applies preprocessing
full_data = TrashNetTorch(dataset["train"], transform)

# Calculates number of samples for training and testing
train_size = int(0.8 * len(full_data))
test_size = len(full_data) - train_size

# Randomly splits the dataset into training and testing subsets
train_data, test_data = random_split(full_data, [train_size, test_size])

# Creates DataLoader objects to handle batching and shuffling of data
train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
test_loader = DataLoader(test_data, batch_size=32, shuffle=False)

# --- CNN Model ---
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # Defines two convolutional layers with ReLU activations and max pooling
        self.conv = nn.Sequential(
            nn.Conv2d(3, 16, 3, padding=1),  # Input: 3 channels, Output: 16 channels
            nn.ReLU(),
            nn.MaxPool2d(2, 2),              # Reduces spatial dimensions by factor of 2
            nn.Conv2d(16, 32, 3, padding=1), # Input: 16 channels, Output: 32 channels
            nn.ReLU(),
            nn.MaxPool2d(2, 2)               # Further reduces spatial dimensions
        )
        # Defines two fully connected layers for classification
        self.fc = nn.Sequential(
            nn.Flatten(),                    # Flattens feature maps into a single vector
            nn.Linear(32 * 16 * 16, 128),    # Fully connected layer with 128 units
            nn.ReLU(),
            nn.Linear(128, 6)                # Output layer with 6 classes
        )

    def forward(self, x):
        # Defines forward pass through convolutional and fully connected layers
        return self.fc(self.conv(x))

# Determines whether to use GPU (CUDA) or CPU
device = "cuda" if torch.cuda.is_available() else "cpu"

# Instantiates model and moves it to selected device
model = CNN().to(device)

# Defines loss function and optimization algorithm
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# --- Training ---
for epoch in range(3):
    model.train()  # Sets model to training mode
    total_loss = 0
    # Initializes progress bar for the training loop
    progress = tqdm(train_loader, desc=f"Epoch {epoch+1}/3", leave=False)
    for images, labels in progress:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()               # Resets gradients before each batch
        outputs = model(images)             # Forward pass
        loss = criterion(outputs, labels)   # Computes loss
        loss.backward()                     # Backpropagation
        optimizer.step()                    # Updates model parameters
        total_loss += loss.item()           # Accumulates loss for monitoring
        progress.set_postfix(loss=loss.item())  # Displays current batch loss
    # Prints average loss for the completed epoch
    print(f"Epoch {epoch+1} done. Avg loss: {total_loss/len(train_loader):.4f}")

# --- Evaluation ---
model.eval()  # Sets model to evaluation mode
correct, total = 0, 0
# Disables gradient calculation for evaluation
with torch.no_grad():
    for images, labels in tqdm(test_loader, desc="Evaluating"):
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)             # Forward pass
        _, predicted = torch.max(outputs, 1) # Selects class with highest probability
        total += labels.size(0)              # Counts total samples
        correct += (predicted == labels).sum().item()  # Counts correct predictions

# Calculates and prints final test accuracy
acc = 100 * correct / total
print(f"✅ Final Test Accuracy: {acc:.2f}%")

                                                                        

Epoch 1 done. Avg loss: 1.3705


                                                                        

Epoch 2 done. Avg loss: 1.0448


                                                                        

Epoch 3 done. Avg loss: 0.8892


Evaluating: 100%|██████████| 32/32 [02:49<00:00,  5.29s/it]

✅ Final Test Accuracy: 62.81%



