In [6]:
!git clone https://github.com/LeonLaumeyer/mai_project1_optimization.git

Cloning into 'mai_project1_optimization'...
remote: Enumerating objects: 12, done.[K
remote: Counting objects: 100% (12/12), done.[K
remote: Compressing objects: 100% (12/12), done.[K
remote: Total 12 (delta 0), reused 12 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (12/12), 57.23 KiB | 1.73 MiB/s, done.


In [8]:
!pip3 install -r mai_project1_optimization/requirements.txt

Collecting torchmetrics (from -r mai_project1_optimization/requirements.txt (line 6))
  Downloading torchmetrics-1.7.0-py3-none-any.whl.metadata (21 kB)
Collecting jedi>=0.16 (from ipython->-r mai_project1_optimization/requirements.txt (line 1))
  Downloading jedi-0.19.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->-r mai_project1_optimization/requirements.txt (line 5))
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->-r mai_project1_optimization/requirements.txt (line 5))
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch->-r mai_project1_optimization/requirements.txt (line 5))
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch->-

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import models
from torchvision.models import *
from plotly import express as px
from collections import Counter
import numpy as np
import random

from mai_project1_optimization.modules.dataset import IntelImageClassificationDataset
from mai_project1_optimization.modules.utility import NotebookPlotter, InferenceSession, Evaluator, ISO_time
from mai_project1_optimization.modules.trainer import Trainer

torch.manual_seed(1)
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def set_seed(seed=1):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True  # for reproducibility
    torch.backends.cudnn.benchmark = False

set_seed(1)

https://www.kaggle.com/datasets/puneet6060/intel-image-classification

In [None]:
# labels, values = zip(*Counter([item[1] for item in dataset.train_dataset]).items())
# fig = px.bar(x=labels, y=values, labels={'x': 'Categories', 'y': 'Counts'}, title='Distribution of Classes')
# fig.show()

| n | label |
| --- | --- |
| 0 | buildings |
| 1 | forest |
| 2 | glacier |
| 3 | mountain |
| 4 | sea |
| 5 | street |

NotebookPlotter.plot_dataset_item_interactive(dataset.train_dataset)

In [None]:
choice = 1 # 1,2,3

if choice != 5:
    dataset = IntelImageClassificationDataset(resize=(150,150))
else:
    dataset = IntelImageClassificationDataset(resize=(384,384))
    
# SqueezeNet 1.1
if choice == 1:
    model = models.squeezenet1_1(weights=SqueezeNet1_1_Weights.DEFAULT)
    num_features = model.classifier[1].in_channels
    kernel_size = model.classifier[1].kernel_size
    model.classifier[1] = nn.Conv2d(num_features, 6, kernel_size)

# MobileNetV2
elif choice == 2:
    model = models.mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT)
    num_features = model.classifier[1].in_features
    model.classifier[1] = nn.Linear(num_features, 6)

# MobileNetV3 Small
elif choice == 3:
    model = models.mobilenet_v3_small(weights=MobileNet_V3_Small_Weights.DEFAULT)
    num_features = model.classifier[3].in_features
    model.classifier[3] = nn.Linear(num_features, 6)

# MobileNetV3 Large
elif choice == 4:
    model = models.mobilenet_v3_large(weights=MobileNet_V3_Large_Weights.DEFAULT)
    num_features = model.classifier[3].in_features
    model.classifier[3] = nn.Linear(num_features, 6)

# VisionTransformer Base 16
elif choice == 5:
    model = models.vit_b_16(weights=ViT_B_16_Weights.IMAGENET1K_SWAG_E2E_V1)
    num_features = model.heads[0].in_features
    model.heads[0] = nn.Linear(num_features, 6)

dataloader = DataLoader(dataset.train_dataset, batch_size=24, shuffle=True)
trainer = Trainer(model=model, lr=0.001)

In [None]:
# model.load_state_dict(torch.load(f"checkpoints/.pt"))
trainer.train(dataloader, epochs=10)

Epoch  10/10, Batch  110/110, Loss 0.1916274875402451

In [None]:
session = InferenceSession(model)
output = session(torch.stack(tuple(item[0] for item in dataset.test_dataset)))
Evaluator.acc(output, torch.tensor(tuple(item[1] for item in dataset.test_dataset))).item()

0.9129151701927185

In [None]:
# torch.save(model.state_dict(), f"checkpoints/{model.__class__.__name__}.pt")

## Initial Results for Model Selection

| model | accuracy | size |
| --- | --- | --- |
| ResNet18 | 0.87 | 44.7 MB |
| ResNet34 | 0.88 | 83.3 MB |
| MobileNet V2 | 0.91 | 13.6 MB |
| MobileNet V3 small | 0.90 | 9.8 MB |
| VGG19 | 0.83 | 548.1 MB |
| SqueezeNet 1.0 | 0.89 | 4.8 MB |
| DenseNet | 0.90 | 30.8 MB |
| EfficientNet B0 | 0.92 | 20.5 MB |
| ViT-b/16 | 0.73 | 330.3 MB |