In [1]:
import torch # Pytorch
from torchvision import models, transforms, datasets as ds # Pytorch vision library for import vision models

from torch.utils.data import DataLoader

import os

import torch.nn as nn # neural net library
import torch.nn.functional as F # neural net functions for layers and activations

import torch.optim as optim # optimizer library

from cvpr_dataset import CVPR

def create_model():
# load EfficientV2 m model with pre-trained weights
  #model = models.efficientnet_v2_m(weights='DEFAULT')

  # load EfficientV2 m model with pre-trained weights
  model = models.mobilenet_v3_small(weights='DEFAULT')

  #print(model)
  
  # freeze all layers except classifier
  for param in model.features.parameters():
    param.requires_grad = False

  # Get the # of input features for the final layer
  #num_features_classify = model.classifier[1].in_features
  num_features_classify = model.classifier[3].in_features

  # Replace existing classifier with custom classifier
  #model.classifier[1] = nn.Linear(num_features_classify, 4)
  model.classifier[3] = nn.Linear(num_features_classify, 4)

  return model
  

def train(device, model, dataloaders, criterion, optimizer, num_epoch=5):
  best_acc = 0.0
  
  # run full pass for 10 epochs
  for epoch in range(num_epoch):
    # different training for different phase
    for phase in ['train', 'val']:

      # if phase is train, enter in train mode
      if (phase == 'train'):
        model.train()
      else: # else get in eval mode - freezing parameters
        model.eval()

      running_loss = 0.0
      running_corrects = 0

      # move inputs and labels to device
      for inputs, labels in dataloaders[phase]:
        inputs, labels = inputs.to(device), labels.to(device)

        # zero gradient before each run
        optimizer.zero_grad()

        with torch.set_grad_enabled(phase == 'train'):
          outputs = model(inputs)
          loss = criterion(outputs, labels)
          _, preds = torch.max(outputs, 1)

          if phase == 'train':
            loss.backward()
            optimizer.step()


        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)

      epoch_loss = running_loss / len(dataloaders[phase].dataset)
      epoch_acc = running_corrects.float() / len(dataloaders[phase].dataset)

      print(f"{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}")
            
      if phase == "val" and epoch_acc > best_acc:
          best_acc = epoch_acc
          torch.save(model.state_dict(), "best_model.pth")

  print(f"Best val Acc: {best_acc:.4f}")
  return model

# Test function
def test_model(device, model, dataloader):
    model.load_state_dict(torch.load("best_model.pth", map_location=device))
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            correct += torch.sum(preds == labels).item()
            total += labels.size(0)
    print(f"Test Accuracy: {100 * correct / total:.2f}%")



def dataset():
# Define data directories
  data_dir = r'/Users/danishshahid/MEng/ENSF617/Assignments/Assignment2/garbage_data'
  train_dir = os.path.join(data_dir, 'CVPR_2024_dataset_Train')
  val_dir = os.path.join(data_dir, 'CVPR_2024_dataset_Val')
  test_dir = os.path.join(data_dir, 'CVPR_2024_dataset_Test')

  #preprocess = models.EfficientNet_V2_M_Weights.IMAGENET1K_V1.transforms()

  preprocess = models.MobileNet_V3_Small_Weights.IMAGENET1K_V1.transforms()
  # Define transformations
  transform = {
      "train": transforms.Compose([
        transforms.RandomHorizontalFlip(),
        preprocess
      ]),
      "val": preprocess,
      "test": preprocess,
  }

  # Load datasets
  datasets = {
      "train": CVPR(data_dir, 'train', transform['train']),
      "val": CVPR(data_dir, 'val', transform['val']),
      "test": CVPR(data_dir, 'test', transform['test']),
  }

  # Define data loaders
  dataloaders = {
      "train": DataLoader(datasets["train"], batch_size=128, shuffle=True, num_workers=2),
      "val": DataLoader(datasets["val"], batch_size=128, shuffle=False, num_workers=2),
      "test": DataLoader(datasets["test"], batch_size=128, shuffle=False, num_workers=2),
  }

  return dataloaders


In [2]:
  # setting device
  # check mps if present otherwise cuda if cuda is present, otherwise cpu
device = torch.device('mps' if torch.backends.mps.is_available() else 'cuda:0' if torch.cuda.is_available() else 'cpu')

In [3]:
# send model to device
model = create_model().to(device)

In [4]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.classifier.parameters(), lr=0.001)

In [5]:
dataloaders = dataset()

In [6]:
model = train(device, model, dataloaders, criterion, optimizer)

train Loss: 1.3545 Acc: 0.3418
val Loss: 1.2664 Acc: 0.4267
train Loss: 1.2447 Acc: 0.4725
val Loss: 1.1840 Acc: 0.5033
train Loss: 1.1694 Acc: 0.5308
val Loss: 1.1300 Acc: 0.5500
train Loss: 1.1094 Acc: 0.5718
val Loss: 1.0894 Acc: 0.5739
train Loss: 1.0692 Acc: 0.5977
val Loss: 1.0558 Acc: 0.5911
Best val Acc: 0.5911


In [8]:

# Evaluate the model on the test set
test_model(device, model, dataloaders["test"])

Test Accuracy: 51.95%
