## Garbage Classifier 🗑️ 🚮 🚯

### ⚙️ Setup

In [1]:
# Import libraries
import wandb
import torch
from glob import glob
import matplotlib.pylab as plt
from torch.utils.data import Dataset
import torch.nn as nn
import numpy as np
from PIL import Image
from torchvision.models import resnet18
from torchvision import transforms, models
import pytorch_lightning as pl
from sklearn.model_selection import StratifiedShuffleSplit

In [2]:
# Constants
EPOCHS = 12
LEARNING_RATE = 2e-4
TEST_SPLIT = 0.2
VAL_SPLIT = 0.2
BATCH_SIZE = 8
NUM_CLASSES = 4
INPUT_SHAPE = (3, 224, 224)
INPUT_SIZE = (1, 3, 256, 256)

In [3]:
# Variables
DATASET_LOCAL_PATH="/Users/redge/Library/CloudStorage/OneDrive-UniversityofCalgary/School/MEng/Winter2024/enel645/ENSF-611-ENEL-645/ENEL 645/A2/small_dataset"
DATASET_REMOTE_PATH="/work/TALC/enel645_2024w/CVPR_2024_dataset"
MODEL_PATH="/Users/redge/Library/CloudStorage/OneDrive-UniversityofCalgary/School/MEng/Winter2024/enel645/ENSF-611-ENEL-645/ENEL 645/A2/best_dataset/garbage_net.pth"

dataset_path = DATASET_LOCAL_PATH
normalized_path = dataset_path
best_model_path = MODEL_PATH

# ↻ Data Loader

In [4]:
# functions from dataset_loader.py

def list_images(images_path: str) -> np.ndarray:
    """
    List all images in the given path.
    """
    images = glob(images_path, recursive=True)
    return np.array(images)

def extract_labels(images: np.ndarray) -> tuple:
    """
    Extract labels from image paths.
    """
    labels = np.array([f.replace("\\", "/").split("/")[-2] for f in images])
    classes = np.unique(labels)
    return labels, classes

def convert_labels_to_int(labels: np.ndarray, classes: np.ndarray) -> np.ndarray:
    """
    Convert string labels to integers.
    """
    label_to_int = {label: i for i, label in enumerate(classes)}
    labels_int = np.array([label_to_int[label] for label in labels])
    return labels_int

def list_data_and_prepare_labels(images_path: str) -> tuple:
    """
    List all images, extract labels, and prepare them for training.
    """
    images = list_images(images_path)
    labels, classes = extract_labels(images)
    labels_int = convert_labels_to_int(labels, classes)
    return images, labels_int, classes

def split_data(images: np.ndarray, labels: np.ndarray, val_split: float, test_split: float, random_state: int = 10) -> tuple:
    """
    Split data into train, validation, and test sets and return them as dictionaries.
    """
    # Splitting the data into dev and test sets
    sss = StratifiedShuffleSplit(n_splits=1, test_size=test_split, random_state=random_state)
    dev_index, test_index = next(sss.split(images, labels))
    dev_images, dev_labels = images[dev_index], labels[dev_index]
    test_images, test_labels = images[test_index], labels[test_index]

    # Splitting the data into train and val sets
    val_size = int(val_split * len(images))
    val_split_adjusted = val_size / len(dev_images)
    sss2 = StratifiedShuffleSplit(n_splits=1, test_size=val_split_adjusted, random_state=random_state)
    train_index, val_index = next(sss2.split(dev_images, dev_labels))

    # Creating train, validation, and test dictionaries
    train_images = images[train_index]
    train_labels = labels[train_index]
    val_images = images[val_index]
    val_labels = labels[val_index]

    train_set = {"X": train_images, "Y": train_labels}
    val_set = {"X": val_images, "Y": val_labels}
    test_set = {"X": test_images, "Y": test_labels}

    return {"Train": train_set, "Validation": val_set, "test": test_set}

In [5]:
# get dataset
images_path = normalized_path + "/**/*.png"
images, labels_int, classes = list_data_and_prepare_labels(images_path)

In [6]:
# split dataset
all_dataset = split_data(images, labels_int, VAL_SPLIT, TEST_SPLIT)
train_set = all_dataset["Train"]
val_set = all_dataset["Validation"]
test_set = all_dataset["Validation"]

### 🤖 Transforms

In [7]:
# from my_transforms.py

torch_vision_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.4120, 0.3768, 0.3407],
        std=[0.2944, 0.2759, 0.2598],
    )
])

torch_vision_transform_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.4120, 0.3768, 0.3407],
        std=[0.2944, 0.2759, 0.2598],
    )
])

### 🗑️ Garbage Models

In [8]:
# from garbage_model.py

class GarbageModel(pl.LightningModule):
    def __init__(self, input_shape: tuple, num_classes: int, learning_rate: float = 2e-4, transfer: bool = False):
        super().__init__()

        # log hyperparameters
        self.save_hyperparameters()
        self.learning_rate = learning_rate
        self.input_shape = input_shape

        self.num_classes = num_classes
        
        # transfer learning if pretrained=True
        self.feature_extractor = models.resnet18(pretrained=transfer)

        if transfer:
            # layers are frozen by using eval()
            self.feature_extractor.eval()
            # freeze params
            for param in self.feature_extractor.parameters():
                param.requires_grad = False

        n_features = self._get_conv_output(self.input_shape)
        self.classifier = nn.Linear(n_features, num_classes)
        self.criterion = nn.CrossEntropyLoss()
    
    def _get_conv_output(self, shape):
        batch_size = 1
        tmp_input = torch.autograd.Variable(torch.rand(batch_size, *shape))

        output_feat = self.feature_extractor(tmp_input)
        n_size = output_feat.data.view(batch_size, -1).size(1)
        return n_size

    # will be used during inference
    def forward(self, x):
        x = self.feature_extractor(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)

        return x

In [9]:
# from base_dataset.py
class BaseDataset(Dataset):
    def __init__(self, data_dic: dict, transform: transforms.transforms.Compose = None):
        self.file_paths = data_dic["X"]
        self.labels = data_dic["Y"]
        self.transform = transform
        
    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        label = self.labels[idx]
        file_path = self.file_paths[idx]
        
        # Read an image with PIL and convert it to RGB
        image = Image.open(file_path).convert('RGB')

        if self.transform:
            image = self.transform(image)

        # Convert label to a Long tensor
        label = torch.tensor(label, dtype=torch.long)

        return image, label

In [10]:
# Get the dataset
train_dataset = BaseDataset(train_set, transform=torch_vision_transform)
print(train_dataset.labels)
val_dataset = BaseDataset(val_set, transform=torch_vision_transform)
test_dataset = BaseDataset(test_set,transform= torch_vision_transform_test)

[1 1 3 1 3 1 2 1 2 0 2 3 3 2 2 0 1 1 1 0 1 0 2 2 2 1 1 2 3 0 2 1 2 2 3 0 2
 3 3 3 1 0 1 0 2 2 1 0 1 1 1 0 3 3 0 2 1 0 2 2 2 1 2 0 1 2 2 0 1 2 2 2]


In [11]:
# Get data loaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)

In [12]:
def get_dataset_stats(data_loader):
    """
    Get mean and std stats.
    """
    mean = 0.
    std = 0.
    nb_samples = 0.
    for data in data_loader:
        data = data[0]  # Get the images to compute the stgatistics
        batch_samples = data.size(0)
        data = data.view(batch_samples, data.size(1), -1)
        mean += data.mean(2).sum(0)
        std += data.std(2).sum(0)
        nb_samples += batch_samples

    mean /= nb_samples
    std /= nb_samples
    return mean, std

In [13]:
print(get_dataset_stats(train_loader))

(tensor([0.3629, 0.3255, 0.3942]), tensor([0.9863, 1.0403, 1.1084]))


In [14]:
# train_iterator = iter(train_loader)
# train_batch = next(train_iterator)

### 🏃‍♂️ Train

In [15]:
# load model
net_18 = GarbageModel(input_shape=INPUT_SHAPE, num_classes=NUM_CLASSES, transfer=True)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
net_18.to(device)



GarbageModel(
  (feature_extractor): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=

In [16]:
# from train_utils.py

# Custom class
from src.base_dataset import BaseDataset
from src.garbage_model import GarbageModel

wandb.init(
    project="enel-645-garbage-classifier",
    name="test-run",
    config={"learning_rate": 0.02, "architecture": "resnet_18", "dataset": "CVPR_2024_dataset", "epochs": 12}
)

def train_validate(model: GarbageModel, train_loader: BaseDataset, val_loader: BaseDataset, epochs: int, learning_rate: float, best_model_path: str, device: torch.device, verbose: bool = True) -> None:
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
    best_loss = 1e+20

    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        if verbose:
            print(f'Epoch {epoch + 1}, Train loss: {train_loss / len(train_loader):.3f}', end=' ')

        scheduler.step()

        model.eval()
        val_loss = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
            if verbose:
                print(f'Val loss: {val_loss / len(val_loader):.3f}')

        # Log training and validation loss to wandb
        wandb.log({"epoch": epoch + 1, "train_loss": train_loss, "val_loss": val_loss})

        if val_loss < best_loss:
            if verbose:
                print("Saving model")
            torch.save(model.state_dict(), best_model_path)
            best_loss = val_loss

    if verbose:
        print('Finished Training')

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mredgesantillan[0m ([33menel-645[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [17]:
train_validate(net_18, train_loader, val_loader, EPOCHS, LEARNING_RATE, best_model_path, device)

Epoch 1, Train loss: 1.510 Val loss: 1.592
Saving model
Epoch 2, Train loss: 1.102 Val loss: 1.299
Saving model
Epoch 3, Train loss: 0.937 Val loss: 1.554
Epoch 4, Train loss: 0.894 Val loss: 1.210
Saving model
Epoch 5, Train loss: 0.788 Val loss: 1.222
Epoch 6, Train loss: 0.794 Val loss: 1.062
Saving model
Epoch 7, Train loss: 0.695 Val loss: 1.047
Saving model
Epoch 8, Train loss: 0.625 Val loss: 1.059
Epoch 9, Train loss: 0.546 Val loss: 1.212
Epoch 10, Train loss: 0.722 Val loss: 1.117
Epoch 11, Train loss: 0.580 Val loss: 0.899
Saving model
Epoch 12, Train loss: 0.533 Val loss: 1.044
Finished Training


In [18]:
# Load the best model to be used in the test set
net = GarbageModel((3,224,224), 4, False)
net.load_state_dict(torch.load(MODEL_PATH))

correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        # calculate outputs by running images through the network
        outputs = net(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()




In [19]:
print(f'Accuracy of the network on the test images: {100 * correct / total} %')

Accuracy of the network on the test images: 45.833333333333336 %
