Transfer learning

In [1]:
import os
import torch

import torch.nn as nn
import torch.optim as optim

from PIL import Image
from torchvision import transforms
import torchvision.models as models
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

In [2]:
class WasteDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []
        self.classes = sorted(os.listdir(data_dir))
        self.class_to_idx = {cls: i for i, cls in enumerate(self.classes)}

        for label_name in self.classes:
            label_dir = os.path.join(data_dir, label_name)
            for img_name in os.listdir(label_dir):
                self.image_paths.append(os.path.join(label_dir, img_name))
                self.labels.append(self.class_to_idx[label_name])

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

In [3]:
# Simple preprocessing

input_size = 224

# ImageNet normalization values
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

# Simple transforms - just resize and normalize
train_transforms = transforms.Compose([
    transforms.Resize((input_size, input_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

val_transforms = transforms.Compose([
    transforms.Resize((input_size, input_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

In [4]:
# Create dataloaders

train_dataset = WasteDataset(
    data_dir='data/train',
    transform=train_transforms
)

val_dataset = WasteDataset(
    data_dir='data/val',
    transform=val_transforms
)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [5]:
# Build the model

class WasteClassifierMobileNet(nn.Module):
    def __init__(self, num_classes=6):
        super(WasteClassifierMobileNet, self).__init__()

        # Load pre-trained MobileNetV2
        self.base_model = models.mobilenet_v2(weights='IMAGENET1K_V1')

        # Freeze base model parameters
        for param in self.base_model.parameters():
            param.requires_grad = False

        # Remove original classifier
        self.base_model.classifier = nn.Identity()

        # Add custom layers
        self.global_avg_pooling = nn.AdaptiveAvgPool2d((1, 1))
        self.output_layer = nn.Linear(1280, num_classes)

    def forward(self, x):
        x = self.base_model.features(x)
        x = self.global_avg_pooling(x)
        x = torch.flatten(x, 1)
        x = self.output_layer(x)
        return x

In [6]:
# Train the model

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = WasteClassifierMobileNet(num_classes=6)
model.to(device)

optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

In [7]:
# Training loop
num_epochs = 10

for epoch in range(num_epochs):
    # Training phase
    model.train()  # Set the model to training mode
    running_loss = 0.0
    correct = 0
    total = 0

    # Iterate over the training data
    for inputs, labels in train_loader:
        # Move data to the specified device (GPU or CPU)
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero the parameter gradients to prevent accumulation
        optimizer.zero_grad()
        # Forward pass
        outputs = model(inputs)
        # Calculate the loss
        loss = criterion(outputs, labels)
        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        # Accumulate training loss
        running_loss += loss.item()
        # Get predictions
        _, predicted = torch.max(outputs.data, 1)
        # Update total and correct predictions
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    # Calculate average training loss and accuracy
    train_loss = running_loss / len(train_loader)
    train_acc = correct / total

    # Validation phase
    model.eval()  # Set the model to evaluation mode
    val_loss = 0.0
    val_correct = 0
    val_total = 0

    # Disable gradient calculation for validation
    with torch.no_grad():
        # Iterate over the validation data
        for inputs, labels in val_loader:
            # Move data to the specified device (GPU or CPU)
            inputs, labels = inputs.to(device), labels.to(device)
            # Forward pass
            outputs = model(inputs)
            # Calculate the loss
            loss = criterion(outputs, labels)

            # Accumulate validation loss
            val_loss += loss.item()
            # Get predictions
            _, predicted = torch.max(outputs.data, 1)
            # Update total and correct predictions
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()

    # Calculate average validation loss and accuracy
    val_loss /= len(val_loader)
    val_acc = val_correct / val_total

    # Print epoch results
    print(f'Epoch {epoch+1}/{num_epochs}')
    print(f'  Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}')
    print(f'  Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')

Epoch 1/10
  Train Loss: 1.0373, Train Acc: 0.6351
  Val Loss: 0.6112, Val Acc: 0.8152
Epoch 2/10
  Train Loss: 0.6254, Train Acc: 0.8006
  Val Loss: 0.4829, Val Acc: 0.8567
Epoch 3/10
  Train Loss: 0.5233, Train Acc: 0.8231
  Val Loss: 0.4086, Val Acc: 0.8706
Epoch 4/10
  Train Loss: 0.4609, Train Acc: 0.8433
  Val Loss: 0.3666, Val Acc: 0.8825
Epoch 5/10
  Train Loss: 0.4109, Train Acc: 0.8746
  Val Loss: 0.3256, Val Acc: 0.9027
Epoch 6/10
  Train Loss: 0.3861, Train Acc: 0.8718
  Val Loss: 0.3255, Val Acc: 0.9030
Epoch 7/10
  Train Loss: 0.3801, Train Acc: 0.8765
  Val Loss: 0.2799, Val Acc: 0.9224
Epoch 8/10
  Train Loss: 0.3423, Train Acc: 0.8900
  Val Loss: 0.2547, Val Acc: 0.9311
Epoch 9/10
  Train Loss: 0.3302, Train Acc: 0.8924
  Val Loss: 0.2503, Val Acc: 0.9284
Epoch 10/10
  Train Loss: 0.3353, Train Acc: 0.8880
  Val Loss: 0.2619, Val Acc: 0.9220


Tuning the learning rate

In [8]:
import os
import torch

import torch.nn as nn
import torch.optim as optim

from PIL import Image
from torchvision import transforms
import torchvision.models as models
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

In [9]:
class WasteDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []
        self.classes = sorted(os.listdir(data_dir))
        self.class_to_idx = {cls: i for i, cls in enumerate(self.classes)}

        for label_name in self.classes:
            label_dir = os.path.join(data_dir, label_name)
            for img_name in os.listdir(label_dir):
                self.image_paths.append(os.path.join(label_dir, img_name))
                self.labels.append(self.class_to_idx[label_name])

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

In [10]:
# Simple preprocessing

input_size = 224

# ImageNet normalization values
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

# Simple transforms - just resize and normalize
train_transforms = transforms.Compose([
    transforms.Resize((input_size, input_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

val_transforms = transforms.Compose([
    transforms.Resize((input_size, input_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

In [11]:
# Create dataloaders

train_dataset = WasteDataset(
    data_dir='data/train',
    transform=train_transforms
)

val_dataset = WasteDataset(
    data_dir='data/val',
    transform=val_transforms
)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [12]:
# Build the model

class WasteClassifierMobileNet(nn.Module):
    def __init__(self, num_classes=6):
        super(WasteClassifierMobileNet, self).__init__()

        # Load pre-trained MobileNetV2
        self.base_model = models.mobilenet_v2(weights='IMAGENET1K_V1')

        # Freeze base model parameters
        for param in self.base_model.parameters():
            param.requires_grad = False

        # Remove original classifier
        self.base_model.classifier = nn.Identity()

        # Add custom layers
        self.global_avg_pooling = nn.AdaptiveAvgPool2d((1, 1))
        self.output_layer = nn.Linear(1280, num_classes)

    def forward(self, x):
        x = self.base_model.features(x)
        x = self.global_avg_pooling(x)
        x = torch.flatten(x, 1)
        x = self.output_layer(x)
        return x

In [13]:
# Train the model

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = nn.CrossEntropyLoss()

In [14]:
def train_and_evaluate(model, optimizer, train_loader, val_loader, criterion, num_epochs, device):
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_loss = running_loss / len(train_loader)
        train_acc = correct / total

        # Validation phase
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)
                loss = criterion(outputs, labels)

                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

        val_loss /= len(val_loader)
        val_acc = val_correct / val_total

        print(f'Epoch {epoch+1}/{num_epochs}')
        print(f'  Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}')
        print(f'  Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')

In [15]:
def make_model(learning_rate=0.01):
    model = WasteClassifierMobileNet(num_classes=6)
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    return model, optimizer

In [17]:
learning_rates = [0.0001, 0.001, 0.01]
num_epochs = 10

for lr in learning_rates:
    print(f'\n=== Learning Rate: {lr} ===')
    model, optimizer = make_model(learning_rate=lr)
    train_and_evaluate(model, optimizer, train_loader, val_loader, criterion, num_epochs, device)


=== Learning Rate: 0.0001 ===
Epoch 1/10
  Train Loss: 1.5959, Train Acc: 0.3941
  Val Loss: 1.3968, Val Acc: 0.5841
Epoch 2/10
  Train Loss: 1.2892, Train Acc: 0.6284
  Val Loss: 1.1455, Val Acc: 0.6996
Epoch 3/10
  Train Loss: 1.0949, Train Acc: 0.7016
  Val Loss: 0.9849, Val Acc: 0.7499
Epoch 4/10
  Train Loss: 0.9695, Train Acc: 0.7396
  Val Loss: 0.8759, Val Acc: 0.7772
Epoch 5/10
  Train Loss: 0.8790, Train Acc: 0.7590
  Val Loss: 0.7984, Val Acc: 0.7831
Epoch 6/10
  Train Loss: 0.8069, Train Acc: 0.7776
  Val Loss: 0.7430, Val Acc: 0.8065
Epoch 7/10
  Train Loss: 0.7571, Train Acc: 0.7875
  Val Loss: 0.6901, Val Acc: 0.8108
Epoch 8/10
  Train Loss: 0.7163, Train Acc: 0.7982
  Val Loss: 0.6562, Val Acc: 0.8275
Epoch 9/10
  Train Loss: 0.6741, Train Acc: 0.8061
  Val Loss: 0.6168, Val Acc: 0.8346
Epoch 10/10
  Train Loss: 0.6529, Train Acc: 0.8156
  Val Loss: 0.5924, Val Acc: 0.8445

=== Learning Rate: 0.001 ===
Epoch 1/10
  Train Loss: 1.0218, Train Acc: 0.6466
  Val Loss: 0.613

Best:  
=== Learning Rate: 0.01 ===  

Adding inner layers

In [18]:
# import numpy as np
import os
import torch

import torch.nn as nn
import torch.optim as optim

from PIL import Image
from torchvision import transforms
import torchvision.models as models
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

In [19]:
class WasteDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []
        self.classes = sorted(os.listdir(data_dir))
        self.class_to_idx = {cls: i for i, cls in enumerate(self.classes)}

        for label_name in self.classes:
            label_dir = os.path.join(data_dir, label_name)
            for img_name in os.listdir(label_dir):
                self.image_paths.append(os.path.join(label_dir, img_name))
                self.labels.append(self.class_to_idx[label_name])

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

In [20]:
# Simple preprocessing

input_size = 224

# ImageNet normalization values
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

# Simple transforms - just resize and normalize
train_transforms = transforms.Compose([
    transforms.RandomRotation(10),           # Rotate up to 10 degrees
    transforms.RandomResizedCrop(224, scale=(0.9, 1.0)),  # Zoom
    transforms.RandomHorizontalFlip(),       # Horizontal flip
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

val_transforms = transforms.Compose([
    transforms.Resize((input_size, input_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

In [21]:
# Create dataloaders

train_dataset = WasteDataset(
    data_dir='data/train',
    transform=train_transforms
)

val_dataset = WasteDataset(
    data_dir='data/val',
    transform=val_transforms
)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [22]:
class WasteClassifierMobileNet(nn.Module):
    def __init__(self, size_inner=100, num_classes=6):
        super(WasteClassifierMobileNet, self).__init__()
        
        self.base_model = models.mobilenet_v2(weights='IMAGENET1K_V1')
        
        # Freeze everything
        for param in self.base_model.parameters():
            param.requires_grad = False

        # Unfreeze last block (features[18] is last inverted residual block)
        for param in self.base_model.features[18].parameters():
            param.requires_grad = True
        
        # Remove original classifier
        self.base_model.classifier = nn.Identity()
        
        # Add custom layers
        self.global_avg_pooling = nn.AdaptiveAvgPool2d((1, 1))
        self.inner = nn.Linear(1280, size_inner)  # New inner layer
        self.relu = nn.ReLU()
        self.output_layer = nn.Linear(size_inner, num_classes)

    def forward(self, x):
        x = self.base_model.features(x)
        x = self.global_avg_pooling(x)
        x = torch.flatten(x, 1)
        x = self.inner(x)
        x = self.relu(x)
        x = self.output_layer(x)
        return x

In [23]:
# Train the model

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = nn.CrossEntropyLoss()

In [None]:
def train_and_evaluate(model, optimizer, train_loader, val_loader, criterion, num_epochs, device):
    best_val_accuracy = 0.0  # Initialize variable to track the best validation accuracy

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_loss = running_loss / len(train_loader)
        train_acc = correct / total

        # Validation phase
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)
                loss = criterion(outputs, labels)

                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

        val_loss /= len(val_loader)
        val_acc = val_correct / val_total

        print(f'Epoch {epoch+1}/{num_epochs}')
        print(f'  Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}')
        print(f'  Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')

        # Checkpoint the model if validation accuracy improved
        if val_acc > best_val_accuracy:
            best_val_accuracy = val_acc
            checkpoint_path = f'model/mobilenet_v2_{epoch+1:02d}_{val_acc:.3f}.pth'
            torch.save(model.state_dict(), checkpoint_path)
            print(f'Checkpoint saved: {checkpoint_path}')

In [25]:
def make_model(learning_rate=0.01, size_inner=100):
    model = WasteClassifierMobileNet(
        num_classes=6,
        size_inner=size_inner
    )
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    return model, optimizer

In [26]:
learning_rate = 0.01
size_inners = [10, 100, 500]
num_epochs = 10

for size in size_inners:
    print(f'\n=== Size inner: {size} ===')
    model, optimizer = make_model(learning_rate=learning_rate, size_inner=size)
    train_and_evaluate(model, optimizer, train_loader, val_loader, criterion, num_epochs, device)


=== Size inner: 10 ===
Epoch 1/10
  Train Loss: 1.0438, Train Acc: 0.6003
  Val Loss: 0.7973, Val Acc: 0.7064
Checkpoint saved: mobilenet_v2_01_0.706.pth
Epoch 2/10
  Train Loss: 0.7672, Train Acc: 0.7317
  Val Loss: 0.5686, Val Acc: 0.8239
Checkpoint saved: mobilenet_v2_02_0.824.pth
Epoch 3/10
  Train Loss: 0.6466, Train Acc: 0.7922
  Val Loss: 0.4806, Val Acc: 0.8544
Checkpoint saved: mobilenet_v2_03_0.854.pth
Epoch 4/10
  Train Loss: 0.5538, Train Acc: 0.8128
  Val Loss: 0.4035, Val Acc: 0.8623
Checkpoint saved: mobilenet_v2_04_0.862.pth
Epoch 5/10
  Train Loss: 0.5145, Train Acc: 0.8203
  Val Loss: 0.3507, Val Acc: 0.8773
Checkpoint saved: mobilenet_v2_05_0.877.pth
Epoch 6/10
  Train Loss: 0.4611, Train Acc: 0.8472
  Val Loss: 0.3627, Val Acc: 0.8738
Epoch 7/10
  Train Loss: 0.4090, Train Acc: 0.8607
  Val Loss: 0.2880, Val Acc: 0.9121
Checkpoint saved: mobilenet_v2_07_0.912.pth
Epoch 8/10
  Train Loss: 0.3989, Train Acc: 0.8694
  Val Loss: 0.2206, Val Acc: 0.9410
Checkpoint saved

Best:   
=== Size inner: 100 ===  

Dropout Regularization

In [27]:
# import numpy as np
import os
import torch

import torch.nn as nn
import torch.optim as optim

from PIL import Image
from torchvision import transforms
import torchvision.models as models
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

In [28]:
class WasteDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []
        self.classes = sorted(os.listdir(data_dir))
        self.class_to_idx = {cls: i for i, cls in enumerate(self.classes)}

        for label_name in self.classes:
            label_dir = os.path.join(data_dir, label_name)
            for img_name in os.listdir(label_dir):
                self.image_paths.append(os.path.join(label_dir, img_name))
                self.labels.append(self.class_to_idx[label_name])

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

In [29]:
# Simple preprocessing

input_size = 224

# ImageNet normalization values
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

# Simple transforms - just resize and normalize
train_transforms = transforms.Compose([
    transforms.RandomRotation(10),           # Rotate up to 10 degrees
    transforms.RandomResizedCrop(224, scale=(0.9, 1.0)),  # Zoom
    transforms.RandomHorizontalFlip(),       # Horizontal flip
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

val_transforms = transforms.Compose([
    transforms.Resize((input_size, input_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

In [30]:
# Create dataloaders

train_dataset = WasteDataset(
    data_dir='data/train',
    transform=train_transforms
)

val_dataset = WasteDataset(
    data_dir='data/val',
    transform=val_transforms
)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [31]:
class WasteClassifierMobileNet(nn.Module):
    def __init__(self, size_inner=100, droprate=0.2, num_classes=6):
        super(WasteClassifierMobileNet, self).__init__()
        
        self.base_model = models.mobilenet_v2(weights='IMAGENET1K_V1')
        
        for param in self.base_model.parameters():
            param.requires_grad = False
        
        self.base_model.classifier = nn.Identity()
        
        self.global_avg_pooling = nn.AdaptiveAvgPool2d((1, 1))
        self.inner = nn.Linear(1280, size_inner)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(droprate)  # Add dropout
        self.output_layer = nn.Linear(size_inner, num_classes)

    def forward(self, x):
        x = self.base_model.features(x)
        x = self.global_avg_pooling(x)
        x = torch.flatten(x, 1)
        x = self.inner(x)
        x = self.relu(x)
        x = self.dropout(x)  # Apply dropout
        x = self.output_layer(x)
        return x



In [32]:
# Train the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = nn.CrossEntropyLoss()

In [None]:
def train_and_evaluate(model, optimizer, train_loader, val_loader, criterion, num_epochs, device):
    best_val_accuracy = 0.0  # Initialize variable to track the best validation accuracy

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_loss = running_loss / len(train_loader)
        train_acc = correct / total

        # Validation phase
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)
                loss = criterion(outputs, labels)

                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

        val_loss /= len(val_loader)
        val_acc = val_correct / val_total

        print(f'Epoch {epoch+1}/{num_epochs}')
        print(f'  Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}')
        print(f'  Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')

        # Checkpoint the model if validation accuracy improved
        if val_acc > best_val_accuracy:
            best_val_accuracy = val_acc
            checkpoint_path = f'model/mobilenet_v3_{epoch+1:02d}_{val_acc:.3f}.pth'
            torch.save(model.state_dict(), checkpoint_path)
            print(f'Checkpoint saved: {checkpoint_path}')

In [34]:
def make_model(
        learning_rate=0.01,
        size_inner=100,
        droprate=0.2
):
    model = WasteClassifierMobileNet(
        num_classes=6,
        size_inner=size_inner,
        droprate=droprate
    )
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    return model, optimizer

In [35]:
learning_rate = 0.01
size_inner = 100
droprate = [0.0, 0.2, 0.5, 0.8]
num_epochs = 10

for dr in droprate:
    print(f'\n=== Drop rate: {dr} ===')
    model, optimizer = make_model(learning_rate=learning_rate, size_inner=size_inner, droprate=dr)
    train_and_evaluate(model, optimizer, train_loader, val_loader, criterion, num_epochs, device)


=== Drop rate: 0.0 ===
Epoch 1/10
  Train Loss: 1.4191, Train Acc: 0.6035
  Val Loss: 0.5339, Val Acc: 0.8180
Checkpoint saved: mobilenet_v3_01_0.818.pth
Epoch 2/10
  Train Loss: 0.6199, Train Acc: 0.7768
  Val Loss: 0.4657, Val Acc: 0.8417
Checkpoint saved: mobilenet_v3_02_0.842.pth
Epoch 3/10
  Train Loss: 0.5825, Train Acc: 0.7855
  Val Loss: 0.3666, Val Acc: 0.8746
Checkpoint saved: mobilenet_v3_03_0.875.pth
Epoch 4/10
  Train Loss: 0.5132, Train Acc: 0.8116
  Val Loss: 0.3601, Val Acc: 0.8773
Checkpoint saved: mobilenet_v3_04_0.877.pth
Epoch 5/10
  Train Loss: 0.4574, Train Acc: 0.8425
  Val Loss: 0.2847, Val Acc: 0.9030
Checkpoint saved: mobilenet_v3_05_0.903.pth
Epoch 6/10
  Train Loss: 0.5070, Train Acc: 0.8223
  Val Loss: 0.4019, Val Acc: 0.8556
Epoch 7/10
  Train Loss: 0.4028, Train Acc: 0.8587
  Val Loss: 0.3127, Val Acc: 0.9011
Epoch 8/10
  Train Loss: 0.4166, Train Acc: 0.8488
  Val Loss: 0.3263, Val Acc: 0.8765
Epoch 9/10
  Train Loss: 0.4134, Train Acc: 0.8496
  Val Los

Dropout was not considered for the final model

Data Augmentation

In [36]:
# import numpy as np
import os
import torch

import torch.nn as nn
import torch.optim as optim

from PIL import Image
from torchvision import transforms
import torchvision.models as models
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

In [37]:
class WasteDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.transform = transform
        self.image_paths = []
        self.labels = []
        self.classes = sorted(os.listdir(data_dir))
        self.class_to_idx = {cls: i for i, cls in enumerate(self.classes)}

        for label_name in self.classes:
            label_dir = os.path.join(data_dir, label_name)
            for img_name in os.listdir(label_dir):
                self.image_paths.append(os.path.join(label_dir, img_name))
                self.labels.append(self.class_to_idx[label_name])

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label

In [38]:
# Simple preprocessing

input_size = 224

# ImageNet normalization values
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

# Simple transforms - just resize and normalize
train_transforms = transforms.Compose([
    transforms.RandomRotation(10),           # Rotate up to 10 degrees
    transforms.RandomResizedCrop(224, scale=(0.9, 1.0)),  # Zoom
    transforms.RandomHorizontalFlip(),       # Horizontal flip
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

val_transforms = transforms.Compose([
    transforms.Resize((input_size, input_size)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

In [39]:
# Create dataloaders

train_dataset = WasteDataset(
    data_dir='data/train',
    transform=train_transforms
)

val_dataset = WasteDataset(
    data_dir='data/val',
    transform=val_transforms
)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [40]:
class WasteClassifierMobileNet(nn.Module):
    def __init__(self, size_inner=100, num_classes=6):
        super(WasteClassifierMobileNet, self).__init__()
        
        self.base_model = models.mobilenet_v2(weights='IMAGENET1K_V1')
        
        for param in self.base_model.parameters():
            param.requires_grad = False

        # Unfreeze last block (features[18] is last inverted residual block)
        for param in self.base_model.features[18].parameters():
            param.requires_grad = True
            
        self.base_model.classifier = nn.Identity()
        
        self.global_avg_pooling = nn.AdaptiveAvgPool2d((1, 1))
        self.inner = nn.Linear(1280, size_inner)
        self.relu = nn.ReLU()
        self.output_layer = nn.Linear(size_inner, num_classes)

    def forward(self, x):
        x = self.base_model.features(x)
        x = self.global_avg_pooling(x)
        x = torch.flatten(x, 1)
        x = self.inner(x)
        x = self.relu(x)
        x = self.output_layer(x)
        return x

In [41]:
# Train the model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
criterion = nn.CrossEntropyLoss()

In [45]:
def train_and_evaluate(model, optimizer, train_loader, val_loader, criterion, num_epochs, device):
    best_val_accuracy = 0.0  # Initialize variable to track the best validation accuracy

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)

            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_loss = running_loss / len(train_loader)
        train_acc = correct / total

        # Validation phase
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)
                loss = criterion(outputs, labels)

                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

        val_loss /= len(val_loader)
        val_acc = val_correct / val_total

        print(f'Epoch {epoch+1}/{num_epochs}')
        print(f'  Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}')
        print(f'  Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}')

        # Checkpoint the model if validation accuracy improved
        if val_acc > best_val_accuracy:
            best_val_accuracy = val_acc
            checkpoint_path = f'model/mobilenet_v4_{epoch+1:02d}_{val_acc:.3f}.pth'
            torch.save(model.state_dict(), checkpoint_path)
            print(f'Checkpoint saved: {checkpoint_path}')

In [46]:
def make_model(
        learning_rate=0.01,
        size_inner=100,
):
    model = WasteClassifierMobileNet(
        num_classes=6,
        size_inner=size_inner,
    )
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    return model, optimizer

In [47]:
learning_rate = 0.01
size_inner = 100
num_epochs = 50

print(f'\n=== Leaning rate: {learning_rate}, size_inner: {size_inner} ===')
model, optimizer = make_model(learning_rate=learning_rate, size_inner=size_inner)
train_and_evaluate(model, optimizer, train_loader, val_loader, criterion, num_epochs, device)


=== Leaning rate: 0.01, size_inner: 100 ===
Epoch 1/50
  Train Loss: 0.9012, Train Acc: 0.6977
  Val Loss: 0.4433, Val Acc: 0.8457
Checkpoint saved: model/mobilenet_v4_01_0.846.pth
Epoch 2/50
  Train Loss: 0.5011, Train Acc: 0.8184
  Val Loss: 0.3253, Val Acc: 0.8813
Checkpoint saved: model/mobilenet_v4_02_0.881.pth
Epoch 3/50
  Train Loss: 0.4429, Train Acc: 0.8429
  Val Loss: 0.2757, Val Acc: 0.9007
Checkpoint saved: model/mobilenet_v4_03_0.901.pth
Epoch 4/50
  Train Loss: 0.3605, Train Acc: 0.8765
  Val Loss: 0.2135, Val Acc: 0.9272
Checkpoint saved: model/mobilenet_v4_04_0.927.pth
Epoch 5/50
  Train Loss: 0.3139, Train Acc: 0.8896
  Val Loss: 0.1590, Val Acc: 0.9478
Checkpoint saved: model/mobilenet_v4_05_0.948.pth
Epoch 6/50
  Train Loss: 0.2330, Train Acc: 0.9205
  Val Loss: 0.1322, Val Acc: 0.9573
Checkpoint saved: model/mobilenet_v4_06_0.957.pth
Epoch 7/50
  Train Loss: 0.2294, Train Acc: 0.9161
  Val Loss: 0.1161, Val Acc: 0.9624
Checkpoint saved: model/mobilenet_v4_07_0.962.

Using the Trained Model

In [48]:
import glob
import re
import torch

import torch.nn as nn

import torchvision.models as models

In [49]:
class WasteClassifierMobileNet(nn.Module):
    def __init__(self, size_inner=100, num_classes=6):
        super(WasteClassifierMobileNet, self).__init__()
        
        self.base_model = models.mobilenet_v2(weights='IMAGENET1K_V1')
        
        for param in self.base_model.parameters():
            param.requires_grad = False

        # Unfreeze last block (features[18] is last inverted residual block)
        for param in self.base_model.features[18].parameters():
            param.requires_grad = True
            
        self.base_model.classifier = nn.Identity()
        
        self.global_avg_pooling = nn.AdaptiveAvgPool2d((1, 1))
        self.inner = nn.Linear(1280, size_inner)
        self.relu = nn.ReLU()
        self.output_layer = nn.Linear(size_inner, num_classes)

    def forward(self, x):
        x = self.base_model.features(x)
        x = self.global_avg_pooling(x)
        x = torch.flatten(x, 1)
        x = self.inner(x)
        x = self.relu(x)
        x = self.output_layer(x)
        return x

In [50]:
def extract_accuracy(filename):
    """
    Extracts the accuracy value from filenames like:
    mobilenet_v3A_06_0.845.pth
    """
    match = re.search(r"_([0-9]*\.[0-9]+)\.pth$", filename)
    if match:
        return float(match.group(1))
    return -1  # If something goes wrong

In [51]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [53]:
# Find best checkpoint
list_of_files = glob.glob('model/mobilenet_v*.pth')
best_model_file = [x for x in list_of_files if str(max([extract_accuracy(x) for x in list_of_files])) in x][0]
print(f"Loading model from: {best_model_file}")

Loading model from: model/mobilenet_v4_39_0.998.pth


In [54]:
# Architecture used during training
trained_size_inner = 100      # ← change to whatever you trained with
num_classes        = 6

# Load model
model = WasteClassifierMobileNet(
    size_inner=trained_size_inner, 
    num_classes=num_classes
)

model.load_state_dict(torch.load(best_model_file))
model.to(device)
model.eval()



WasteClassifierMobileNet(
  (base_model): MobileNetV2(
    (features): Sequential(
      (0): Conv2dNormActivation(
        (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): ReLU6(inplace=True)
      )
      (1): InvertedResidual(
        (conv): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): ReLU6(inplace=True)
          )
          (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
      )
      (2): InvertedResidual(
        (conv): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(16, 96

Making Predictions

In [55]:
from keras_image_helper import create_preprocessor
import numpy as np

def preprocess_pytorch_style(X):
    # X: shape (1, 224, 224, 3), dtype=float32, values in [0, 255]
    X = X / 255.0
    
    mean = np.array([0.485, 0.456, 0.406]).reshape(1, 3, 1, 1)
    std = np.array([0.229, 0.224, 0.225]).reshape(1, 3, 1, 1)
    
    # Convert NHWC → NCHW (batch, height, width, channels → batch, channels, height, width)
    X = X.transpose(0, 3, 1, 2)
    
    # Normalize
    X = (X - mean) / std
    
    return X.astype(np.float32)

preprocessor = create_preprocessor(preprocess_pytorch_style, target_size=(224, 224))



In [None]:
import random

# Classification from random image in data/test
path = 'data/test'
classes = ["cardboard", "glass", "metal", "paper", "plastic", "trash"]
folder = random.choice(classes)
files = os.listdir(f'{path}/{folder}')
file = random.choice(files)
print(f'Classifying {file} from {path}/{folder} folder')

X = preprocessor.from_path(f'{path}/{folder}/{file}')
X = torch.Tensor(X).to(device)

with torch.no_grad():
    pred = model(X).cpu().numpy()[0]

result = dict(zip(classes, pred.tolist()))
sorted_result = dict(sorted(result.items(), key=lambda item: item[1], reverse=True))
print(f'File: {file} Clasified as: {sorted_result}')


Classifying cardboard238.jpg from data/test/cardboard folder
File: cardboard238.jpg Clasified as: {'cardboard': 16.32851791381836, 'paper': -6.687436580657959, 'metal': -7.029561996459961, 'glass': -7.788281440734863, 'trash': -9.7933988571167, 'plastic': -13.322946548461914}


In [57]:
# Classification from an url

url = 'https://storage.googleapis.com/kagglesdsdata/datasets/9083965/14238273/Garbage%20classification/test/cardboard/cardboard112.jpg?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=databundle-worker-v2%40kaggle-161607.iam.gserviceaccount.com%2F20260119%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20260119T094428Z&X-Goog-Expires=345600&X-Goog-SignedHeaders=host&X-Goog-Signature=6efdbb784d786342ee5e566d4562048552db1f0e866da3178949c3064f5cad5a6bb69da0ec9bc5e2a07518269d78ca32d02c3543f76dbc101bd87f0ecbefc6f4e76fa83ca75f325c21061b967902e83a3f273917ed7ac901768d15f2a6118d8817421b8c24d33c7363b161bb8df86b697fc3f3572e813f771561b1fe0da62d36da59e7dad2a4d469f7bcaea3998758ad43530360047a3186bfae2dc04a2ccd6079b2639752f8d1e3152c952b19858e0b34ffbaa0c0bd9828ca2b55dccd44b1fc11adfa72e351acac974bad8d13a51ace5e89c6d43247960041a067cf048d13ae0507188c25b39bc9bfa3fac59ade3383782374cb6d58ae53140361ce96256583'
classes = ["cardboard", "glass", "metal", "paper", "plastic", "trash"]

X = preprocessor.from_url(url)
X = torch.Tensor(X).to(device)

with torch.no_grad():
    pred = model(X).cpu().numpy()[0]

result = dict(zip(classes, pred.tolist()))
sorted_result = dict(sorted(result.items(), key=lambda item: item[1], reverse=True))
print(sorted_result)


{'cardboard': 10.506720542907715, 'paper': -3.291609764099121, 'glass': -5.369290828704834, 'trash': -5.617626190185547, 'metal': -5.633875370025635, 'plastic': -8.832342147827148}


Exporting to ONNX

In [58]:
# Create dummy input
dummy_input = torch.randn(1, 3, 224, 224).to(device)

# Export to ONNX
onnx_path = "model/waste_classifier_mobilenet_v4.onnx"

torch.onnx.export(
    model,
    dummy_input,
    onnx_path,
    verbose=True,
    input_names=['input'],
    output_names=['output'],
    dynamic_axes={
        'input': {0: 'batch_size'},
        'output': {0: 'batch_size'}
    }
)

print(f"Model exported to {onnx_path}")


  torch.onnx.export(


[torch.onnx] Obtain model graph for `WasteClassifierMobileNet([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `WasteClassifierMobileNet([...]` with `torch.export.export(..., strict=False)`... ✅
[torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
Applied 104 of general pattern rewrite rules.
Model exported to model/waste_classifier_mobilenet_v4.onnx
