In [2]:
import os
import random
from PIL import Image
from torch import nn
import torch 
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
import json 
from src.config import PROCESSED_DATA_DIR, RAW_DATA_DIR
import torch.optim as optim

from loguru import logger

from utils import train_validate_model, modify_model_output, test_model

import time
import wandb

[32m2024-07-25 14:26:29.922[0m | [1mINFO    [0m | [36msrc.config[0m:[36m<module>[0m:[36m11[0m - [1mPROJ_ROOT path is: C:\Git\hamburger-hotdog-pizza-classifier[0m


In [18]:
wandb.init()

[34m[1mwandb[0m: Currently logged in as: [33mdtiourine[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [4]:
model = torch.load('models/vgg16.pth')
model.eval()

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [13]:
import torchvision.models as models
model = models.vgg16(weights='DEFAULT')
model.classifier[6] = nn.Linear(model.classifier[6].in_features, 3)
filename = 'models/vgg16.pth'
full_path = os.path.join(os.getcwd(), filename)
model.load_state_dict(torch.load(full_path))

<All keys matched successfully>

In [14]:
from torchinfo import summary

# Assuming the input to the model is a single channel image of size 28x28
summary(model, input_size=(1, 3, 224, 224))

Layer (type:depth-idx)                   Output Shape              Param #
VGG                                      [1, 3]                    --
├─Sequential: 1-1                        [1, 512, 7, 7]            --
│    └─Conv2d: 2-1                       [1, 64, 224, 224]         1,792
│    └─ReLU: 2-2                         [1, 64, 224, 224]         --
│    └─Conv2d: 2-3                       [1, 64, 224, 224]         36,928
│    └─ReLU: 2-4                         [1, 64, 224, 224]         --
│    └─MaxPool2d: 2-5                    [1, 64, 112, 112]         --
│    └─Conv2d: 2-6                       [1, 128, 112, 112]        73,856
│    └─ReLU: 2-7                         [1, 128, 112, 112]        --
│    └─Conv2d: 2-8                       [1, 128, 112, 112]        147,584
│    └─ReLU: 2-9                         [1, 128, 112, 112]        --
│    └─MaxPool2d: 2-10                   [1, 128, 56, 56]          --
│    └─Conv2d: 2-11                      [1, 256, 56, 56]          29

# Experiment 1

Let's aim to achieve 99% accuracy on the test set by training for more epochs. Then we will address overfitting on the validation set.

In [16]:
image_path = PROCESSED_DATA_DIR / "pizza_hamburger_hotdog_20_percent"
train_dir = image_path / 'train' 
test_dir = image_path / 'test'
valid_dir = image_path / 'valid'

batch_size = 64

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
        transforms.ToTensor()
])
train_data = ImageFolder(train_dir, transform=transform)
valid_data = ImageFolder(valid_dir, transform=transform)
test_data = ImageFolder(test_dir, transform=transform)
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.3)

device ='cuda' if torch.cuda.is_available() else 'cpu'

In [22]:
train_validate_model(num_epochs=4, model=model, train_loader=train_loader, valid_loader=valid_loader, criterion=criterion, optimizer=optimizer, device=device, model_save_path='models/finetuned_vgg16_model.pth')

Overall Training Progress: 100%|██████████| 4/4 [00:13<00:00,  3.44s/it, Best Val Accuracy=89.44%, Current Train Accuracy=99.63%, Current Val Accuracy=89.44%]


# Experiment 2

The model achieves a 99.63% accuracy on the train set and 89% on the val set, so let's address this overfitting an aim for 95% accuracy on the val set. Let's try this by adding regularization.

In [23]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.3, weight_decay=1e-2)

In [24]:
train_validate_model(num_epochs=10, model=model, train_loader=train_loader, valid_loader=valid_loader, criterion=criterion, optimizer=optimizer, device=device, model_save_path='models/finetuned_vgg16_model.pth')

Overall Training Progress: 100%|██████████| 10/10 [00:36<00:00,  3.66s/it, Best Val Accuracy=90.56%, Current Train Accuracy=100.00%, Current Val Accuracy=90.00%]
