## VGG Models

In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
import numpy as np
import matplotlib.pyplot as plt
import torchvision
import torch.optim as optim

In [2]:
USE_GPU = True

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

print('using device:', device)

using device: cuda


### Data Preparation

In [24]:
class VehicleDataset(Dataset):
    
    def __init__(self, path):
        data = np.load(path)
        self.images = data["images"]
        self.labels = data["labels"]
        print("Images shape:", self.images.shape)
        print("Labels shape:", self.labels.shape)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]

        image = torch.tensor(image, dtype=torch.float32).permute(2, 0, 1) / 255.0
        label = torch.tensor(label, dtype=torch.long)

        mean = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
        std = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)
        image = (image - mean) / std
        
        return image, label

In [25]:
dataset = VehicleDataset('../dataset/stanford_cars_dataset.npz')

batch_size = 32
train_size = int(0.7 * len(dataset))
val_size = int(0.2 * len(dataset))
test_size = int(len(dataset) - train_size - val_size)

train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

Images shape: (8144, 64, 64, 3)
Labels shape: (8144, 1)


### VGG Models

In [26]:
class BasicBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(BasicBlock, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, bias=False)
        self.relu = nn.ReLU(inplace=True)
    def forward(self, x):
        x = self.conv(x)
        x = self.relu(x)
        return x

In [50]:
class VGG11(nn.Module):
    def __init__(self):
        super(VGG11, self).__init__()
        self.layer1 = BasicBlock(3, 64)
        self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer2 = BasicBlock(64, 128)
        self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer3 = BasicBlock(128, 256)
        self.layer4 = BasicBlock(256, 256)
        self.maxpool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer5 = BasicBlock(256, 512)
        self.layer6 = BasicBlock(512, 512)
        self.maxpool4 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer7 = BasicBlock(512, 512)
        self.layer8 = BasicBlock(512, 512)
        self.maxpool5 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.classifier = nn.Sequential(
            nn.Linear(512 * 2 * 2, 4096),
            nn.ReLU(),
            nn.Dropout(p=0.7),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(p=0.7),
            nn.Linear(4096, 196)
        )
    def forward(self, x):
        x = self.layer1(x)
        x = self.maxpool1(x)
        x = self.layer2(x)
        x = self.maxpool2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.maxpool3(x)
        x = self.layer5(x)
        x = self.layer6(x)
        x = self.maxpool4(x)
        x = self.layer7(x)
        x = self.layer8(x)
        x = self.maxpool5(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

### Training

In [51]:
# model = VGG11().to(device)
def train_model(model):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    num_epochs = 5
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
    
        for images, labels in train_loader:
            images = images.to(device)
            labels = labels[:,0].to(device) - 1

            # Forward
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Metrics
            running_loss += loss.item() * images.size(0)
            preds = torch.argmax(outputs, dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

        epoch_loss = running_loss / total
        epoch_acc = correct / total

        print(f"Epoch {epoch+1}/{num_epochs} - Loss: {epoch_loss:.4f} - Accuracy: {epoch_acc:.4f}")
    
        model.eval()
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in val_loader:
                images = images.to(device)
                labels = labels[:,0].to(device) - 1
            
                outputs = model(images)
                preds = torch.argmax(outputs, dim=1)
                correct += (preds == labels).sum().item()
                total += labels.size(0)
        val_acc = correct / total
        print(f'Validation Acc: {val_acc:.4f}')
        # return val_acc

In [52]:
model = VGG11().to(device)
train_model(model)

Epoch 1/5 - Loss: 5.3065 - Accuracy: 0.0079
Validation Acc: 0.0025
Epoch 2/5 - Loss: 5.2797 - Accuracy: 0.0098
Validation Acc: 0.0025
Epoch 3/5 - Loss: 5.2776 - Accuracy: 0.0098
Validation Acc: 0.0025
Epoch 4/5 - Loss: 5.2767 - Accuracy: 0.0098
Validation Acc: 0.0025
Epoch 5/5 - Loss: 5.2750 - Accuracy: 0.0098
Validation Acc: 0.0025


### Testing

In [30]:
def test_model(model, test_loader, device):
    model.eval()  # Set model to evaluation mode
    correct = 0
    total = 0

    all_preds = []
    all_labels = []

    with torch.no_grad():
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels[:,0].to(device) - 1 
            
            outputs = model(images)
            preds = torch.argmax(outputs, dim=1)

            correct += (preds == labels).sum().item()
            total += labels.size(0)

            all_preds.append(preds.cpu())
            all_labels.append(labels.cpu())

    accuracy = correct / total
    print(f'Test Accuracy: {accuracy:.4f}')

    # Concatenate all predictions and labels if needed for further analysis
    all_preds = torch.cat(all_preds)
    all_labels = torch.cat(all_labels)
    
    return accuracy, all_preds, all_labels

In [31]:
test_model(model, test_loader, device)

Test Accuracy: 0.0098


(0.00980392156862745,
 tensor([118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 1

In [34]:
class VGG13(nn.Module):
    def __init__(self):
        super(VGG13, self).__init__()
        self.layer1 = BasicBlock(3, 64)
        self.layer11 = BasicBlock(64, 64)
        self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer2 = BasicBlock(64, 128)
        self.layer22 = BasicBlock(128, 128)
        self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer3 = BasicBlock(128, 256)
        self.layer4 = BasicBlock(256, 256)
        self.maxpool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer5 = BasicBlock(256, 512)
        self.layer6 = BasicBlock(512, 512)
        self.maxpool4 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer7 = BasicBlock(512, 512)
        self.layer8 = BasicBlock(512, 512)
        self.maxpool5 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.classifier = nn.Sequential(
            nn.Linear(512 * 2 * 2, 4096),
            nn.ReLU(),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Linear(4096, 196)
        )
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer11(x)
        x = self.maxpool1(x)
        x = self.layer2(x)
        x = self.layer22(x)
        x = self.maxpool2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.maxpool3(x)
        x = self.layer5(x)
        x = self.layer6(x)
        x = self.maxpool4(x)
        x = self.layer7(x)
        x = self.layer8(x)
        x = self.maxpool5(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

In [35]:
model = VGG13().to(device)
train_model(model)

Epoch 1/5 - Loss: 5.2804 - Accuracy: 0.0077
Validation Acc: 0.0025
Epoch 2/5 - Loss: 5.2772 - Accuracy: 0.0098
Validation Acc: 0.0025
Epoch 3/5 - Loss: 5.2761 - Accuracy: 0.0098
Validation Acc: 0.0025
Epoch 4/5 - Loss: 5.2752 - Accuracy: 0.0098
Validation Acc: 0.0025
Epoch 5/5 - Loss: 5.2744 - Accuracy: 0.0098
Validation Acc: 0.0025


In [36]:
test_model(model, test_loader, device)

Test Accuracy: 0.0098


(0.00980392156862745,
 tensor([118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 1

In [37]:
class VGG16(nn.Module):
    def __init__(self):
        super(VGG16, self).__init__()
        self.layer1 = BasicBlock(3, 64)
        self.layer11 = BasicBlock(64, 64)
        self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer2 = BasicBlock(64, 128)
        self.layer22 = BasicBlock(128, 128)
        self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer3 = BasicBlock(128, 256)
        self.layer4 = BasicBlock(256, 256)
        self.layer41 = BasicBlock(256, 256)
        self.maxpool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer5 = BasicBlock(256, 512)
        self.layer6 = BasicBlock(512, 512)
        self.layer61 = BasicBlock(512, 512)
        self.maxpool4 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer7 = BasicBlock(512, 512)
        self.layer8 = BasicBlock(512, 512)
        self.layer81 = BasicBlock(512, 512)
        self.maxpool5 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.classifier = nn.Sequential(
            nn.Linear(512 * 2 * 2, 4096),
            nn.ReLU(),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Linear(4096, 196)
        )
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer11(x)
        x = self.maxpool1(x)
        x = self.layer2(x)
        x = self.layer22(x)
        x = self.maxpool2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer41(x)
        x = self.maxpool3(x)
        x = self.layer5(x)
        x = self.layer6(x)
        x = self.layer61(x)
        x = self.maxpool4(x)
        x = self.layer7(x)
        x = self.layer8(x)
        x = self.layer81(x)
        x = self.maxpool5(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

In [38]:
model = VGG16().to(device)
train_model(model)

Epoch 1/5 - Loss: 5.2812 - Accuracy: 0.0075
Validation Acc: 0.0025
Epoch 2/5 - Loss: 5.2773 - Accuracy: 0.0098
Validation Acc: 0.0025
Epoch 3/5 - Loss: 5.2763 - Accuracy: 0.0098
Validation Acc: 0.0025
Epoch 4/5 - Loss: 5.2754 - Accuracy: 0.0098
Validation Acc: 0.0025
Epoch 5/5 - Loss: 5.2738 - Accuracy: 0.0098
Validation Acc: 0.0025


In [39]:
test_model(model, test_loader, device)

Test Accuracy: 0.0098


(0.00980392156862745,
 tensor([118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 1

In [42]:
class VGG19(nn.Module):
    def __init__(self):
        super(VGG19, self).__init__()
        self.layer1 = BasicBlock(3, 64)
        self.layer11 = BasicBlock(64, 64)
        self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer2 = BasicBlock(64, 128)
        self.layer22 = BasicBlock(128, 128)
        self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer3 = BasicBlock(128, 256)
        self.layer4 = BasicBlock(256, 256)
        self.layer41 = BasicBlock(256, 256)
        self.layer42 = BasicBlock(256, 256)
        self.maxpool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer5 = BasicBlock(256, 512)
        self.layer6 = BasicBlock(512, 512)
        self.layer61 = BasicBlock(512, 512)
        self.layer62 = BasicBlock(512, 512)
        self.maxpool4 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer7 = BasicBlock(512, 512)
        self.layer8 = BasicBlock(512, 512)
        self.layer81 = BasicBlock(512, 512)
        self.layer82 = BasicBlock(512, 512)
        self.maxpool5 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.classifier = nn.Sequential(
            nn.Linear(512 * 2 * 2, 4096),
            nn.ReLU(),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Linear(4096, 196)
        )
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer11(x)
        x = self.maxpool1(x)
        x = self.layer2(x)
        x = self.layer22(x)
        x = self.maxpool2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer41(x)
        x = self.layer42(x)
        x = self.maxpool3(x)
        x = self.layer5(x)
        x = self.layer6(x)
        x = self.layer61(x)
        x = self.layer62(x)
        x = self.maxpool4(x)
        x = self.layer7(x)
        x = self.layer8(x)
        x = self.layer81(x)
        x = self.layer82(x)
        x = self.maxpool5(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

In [43]:
model = VGG19().to(device)
train_model(model)

Epoch 1/5 - Loss: 5.2808 - Accuracy: 0.0053
Validation Acc: 0.0025
Epoch 2/5 - Loss: 5.2774 - Accuracy: 0.0098
Validation Acc: 0.0025
Epoch 3/5 - Loss: 5.2764 - Accuracy: 0.0098
Validation Acc: 0.0025
Epoch 4/5 - Loss: 5.2754 - Accuracy: 0.0098
Validation Acc: 0.0025
Epoch 5/5 - Loss: 5.2745 - Accuracy: 0.0098
Validation Acc: 0.0025


In [44]:
test_model(model, test_loader, device)

Test Accuracy: 0.0098


(0.00980392156862745,
 tensor([118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118, 118,
         118, 118, 118, 118, 1