In [1]:
try:
  import google.colab
  IN_COLAB = True
except:
  IN_COLAB = False


In [2]:
if IN_COLAB:
    !git clone https://github.com/LeonLaumeyer/mai_project1_optimization.git

In [3]:
if IN_COLAB:
    !pip3 install -r mai_project1_optimization/requirements.txt

In [4]:
import torch
import torch.nn as nn
import torch.profiler
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
from torch.utils.tensorboard import SummaryWriter
import torch.ao.quantization.quantizer.xnnpack_quantizer as xq
from torch.ao.quantization.quantizer.xnnpack_quantizer import XNNPACKQuantizer, get_symmetric_quantization_config
from torch.ao.quantization.quantize_pt2e import prepare_pt2e, convert_pt2e
from torchvision import models, datasets, transforms
from torchvision.models import *
from plotly import express as px
from collections import Counter
import numpy as np
import random
import time
import io
import os

if(IN_COLAB):
    from mai_project1_optimization.modules.dataset import IntelImageClassificationDataset
    from mai_project1_optimization.modules.utility import NotebookPlotter, InferenceSession, Evaluator, ISO_time, get_model_size_mb, evaluate_time_acc_model
    from mai_project1_optimization.modules.trainer import Trainer
else:
    from modules.dataset import IntelImageClassificationDataset
    from modules.utility import NotebookPlotter, InferenceSession, Evaluator, ISO_time, get_model_size_mb, evaluate_time_acc_model
    from modules.trainer import Trainer

torch.manual_seed(1)
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(DEVICE)
QUANT_BOOL, QOVERWRITE = True, False # Quantization boolean / Overwrite baseline model boolean (Discouraged bc eval compatibility)

def set_seed(seed=1):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True  # for reproducibility
    torch.backends.cudnn.benchmark = False

# Removed support for Tensor Units
# torch.backends.cudnn.allow_tf32 = True
# torch.backends.cuda.matmul.allow_tf32 = True

set_seed(1)

Matplotlib created a temporary cache directory at C:\Users\krahf\AppData\Local\Temp\matplotlib-o_koq4e7 because the default path (C:\Users\krahf\.matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.


cuda


https://www.kaggle.com/datasets/puneet6060/intel-image-classification

In [5]:
# labels, values = zip(*Counter([item[1] for item in dataset.train_dataset]).items())
# fig = px.bar(x=labels, y=values, labels={'x': 'Categories', 'y': 'Counts'}, title='Distribution of Classes')
# fig.show()

| n | label |
| --- | --- |
| 0 | buildings |
| 1 | forest |
| 2 | glacier |
| 3 | mountain |
| 4 | sea |
| 5 | street |

NotebookPlotter.plot_dataset_item_interactive(dataset.train_dataset)

In [6]:
choice = 1 # 1,2,3
freezeLayer = False

if choice != 5:
    dataset = IntelImageClassificationDataset(resize=(150,150))
else:
    dataset = IntelImageClassificationDataset(resize=(384,384))
    
# SqueezeNet 1.1
if choice == 1:
    model = models.squeezenet1_1(weights=SqueezeNet1_1_Weights.DEFAULT)
    num_features = model.classifier[1].in_channels
    kernel_size = model.classifier[1].kernel_size
    if(freezeLayer):
        for param in model.parameters():
            param.requires_grad = False
    model.classifier[1] = nn.Conv2d(num_features, 6, kernel_size)
    

# MobileNetV2
elif choice == 2:
    model = models.mobilenet_v2(weights=MobileNet_V2_Weights.DEFAULT)
    num_features = model.classifier[1].in_features
    if(freezeLayer):
        for param in model.parameters():
            param.requires_grad = False
    model.classifier[1] = nn.Linear(num_features, 6)

# MobileNetV3 Small
elif choice == 3:
    model = models.mobilenet_v3_small(weights=MobileNet_V3_Small_Weights.DEFAULT)
    num_features = model.classifier[3].in_features
    if(freezeLayer):
        for param in model.parameters():
            param.requires_grad = False
    model.classifier[3] = nn.Linear(num_features, 6)

# MobileNetV3 Large
elif choice == 4:
    model = models.mobilenet_v3_large(weights=MobileNet_V3_Large_Weights.DEFAULT)
    num_features = model.classifier[3].in_features
    if(freezeLayer):
        for param in model.parameters():
            param.requires_grad = False
    model.classifier[3] = nn.Linear(num_features, 6)

# VisionTransformer Base 16
elif choice == 5:
    model = models.vit_b_16(weights=ViT_B_16_Weights.IMAGENET1K_SWAG_E2E_V1)
    num_features = model.heads[0].in_features
    if(freezeLayer):
        for param in model.parameters():
            param.requires_grad = False
    model.heads[0] = nn.Linear(num_features, 6)
    


dataloader = DataLoader(dataset.train_dataset, batch_size=24, shuffle=True)
trainer = Trainer(model=model, lr=0.001)

In [7]:
# model.load_state_dict(torch.load(f"checkpoints/.pt"))
trainer.train(dataloader, epochs=10)

  0%|          | 0/10 [00:00<?, ?it/s]

In [8]:
# Calibrate model for quantization - subset of 200 examples
if QUANT_BOOL:
    qmodel = model # Keep original model
    if not torch.device("cpu"):
        print("CPU is not available — quantization skipped.") # Quantization with this method requires a cpu
        quantized_model = None
    else:
        calibration_subset = Subset(dataset.train_dataset, list(range(200)))
        calibration_loader = DataLoader(calibration_subset, batch_size=24, shuffle=True)
        device = torch.device("cpu")
        qmodel.to(device)
        quantizer = XNNPACKQuantizer().set_global(get_symmetric_quantization_config())
        qmodel.eval()
        example_inputs = next(iter(calibration_loader))
        example_inputs = (example_inputs[0].to(device),)

        qmodel = torch.export.export_for_training(qmodel, example_inputs).module()
        qmodel = prepare_pt2e(qmodel, quantizer)

        with torch.no_grad():
            for images, _ in calibration_loader:
                images = images.to(device)
                qmodel(images)

        quantized_model = convert_pt2e(qmodel)
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        quantized_model = torch.export.export_for_training(quantized_model, example_inputs).module() # Double export to remove unused weights
        quantized_model.to(device)

    print(f"Baseline model size: {get_model_size_mb(model):.3f} MB")
    print(f"Quantized model size: {get_model_size_mb(quantized_model):.3f} MB")


Baseline model size: 2.921 MB
Quantized model size: 0.752 MB


In [9]:
model.eval()

SqueezeNet(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
    (3): Fire(
      (squeeze): Conv2d(64, 16, kernel_size=(1, 1), stride=(1, 1))
      (squeeze_activation): ReLU(inplace=True)
      (expand1x1): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
      (expand1x1_activation): ReLU(inplace=True)
      (expand3x3): Conv2d(16, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (expand3x3_activation): ReLU(inplace=True)
    )
    (4): Fire(
      (squeeze): Conv2d(128, 16, kernel_size=(1, 1), stride=(1, 1))
      (squeeze_activation): ReLU(inplace=True)
      (expand1x1): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
      (expand1x1_activation): ReLU(inplace=True)
      (expand3x3): Conv2d(16, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (expand3x3_activation): ReLU(inplace=True)
    )
    (5): MaxPool2d

In [10]:
if QUANT_BOOL:
    test_loader = DataLoader(dataset.test_dataset, batch_size=24, shuffle=False, pin_memory= True)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(device)

    acc_q, time_q = evaluate_time_acc_model(quantized_model, test_loader, device, profile_model=True, log_dir="runs/profiler/quant")
    test_loader = DataLoader(dataset.test_dataset, batch_size=24, shuffle=False, pin_memory= True)
    acc_orig, time_orig = evaluate_time_acc_model(model, test_loader, device, profile_model=True, log_dir="runs/profiler/norm")

    print(f"Quantized Accuracy: {acc_q * 100:.2f}%, Time: {time_q:.2f}s")
    print(f"Original Accuracy: {acc_orig * 100:.2f}%, Time: {time_orig:.2f}s")
    # Observation of slightly reduced accuracy and increased time. Probably overhead maybe simply a error somewhere.
if QOVERWRITE and QUANT_BOOL:
    model = quantized_model


cuda
Quantized Accuracy: 84.50%, Time: 4.90s
Original Accuracy: 84.57%, Time: 2.92s


In [11]:
session = InferenceSession(model)
output = session(torch.stack(tuple(item[0] for item in dataset.test_dataset)))
Evaluator.acc(output, torch.tensor(tuple(item[1] for item in dataset.test_dataset))).item()


0.8489665389060974

In [12]:
# torch.save(model.state_dict(), f"checkpoints/{model.__class__.__name__}.pt")

## Initial Results for Model Selection

| model | accuracy | size |
| --- | --- | --- |
| ResNet18 | 0.87 | 44.7 MB |
| ResNet34 | 0.88 | 83.3 MB |
| MobileNet V2 | 0.91 | 13.6 MB |
| MobileNet V3 small | 0.90 | 9.8 MB |
| VGG19 | 0.83 | 548.1 MB |
| SqueezeNet 1.0 | 0.89 | 4.8 MB |
| DenseNet | 0.90 | 30.8 MB |
| EfficientNet B0 | 0.92 | 20.5 MB |
| ViT-b/16 | 0.73 | 330.3 MB |