## VGG Models

In [25]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, random_split
import numpy as np
import matplotlib.pyplot as plt
import torchvision
import torch.optim as optim

In [13]:
USE_GPU = True

if USE_GPU and torch.cuda.is_available():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

print('using device:', device)

using device: cuda


### Data Preparation

In [14]:
class VehicleDataset(Dataset):
    
    def __init__(self, path):
        data = np.load(path)
        self.images = data["images"]
        self.labels = data["labels"]
        print("Images shape:", self.images.shape)
        print("Labels shape:", self.labels.shape)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        label = self.labels[idx]

        image = torch.tensor(image, dtype=torch.float32).permute(2, 0, 1) / 255.0
        label = torch.tensor(label, dtype=torch.long)
        
        return image, label

In [None]:
dataset = VehicleDataset('../dataset/vehicle_detection_dataset.npz')

batch_size = 32
train_size = int(0.7 * len(dataset))
val_size = int(0.2 * len(dataset))
test_size = int(0.1 * len(dataset))

train_dataset, val_dataset, test_dataset = random_split(dataset, [train_size, val_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

Images shape: (17760, 64, 64, 3)
Labels shape: (17760,)


### VGG Models

In [20]:
class BasicBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(BasicBlock, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, bias=False)
        self.relu = nn.ReLU(inplace=True)
    def forward(self, x):
        x = self.conv(x)
        x = self.relu(x)
        return x

In [28]:
class VGG11(nn.Module):
    def __init__(self):
        super(VGG11, self).__init__()
        self.layer1 = BasicBlock(3, 64)
        self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer2 = BasicBlock(64, 128)
        self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer3 = BasicBlock(128, 256)
        self.layer4 = BasicBlock(256, 256)
        self.maxpool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer5 = BasicBlock(256, 512)
        self.layer6 = BasicBlock(512, 512)
        self.maxpool4 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer7 = BasicBlock(512, 512)
        self.layer8 = BasicBlock(512, 512)
        self.maxpool5 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.classifier = nn.Sequential(
            nn.Linear(512 * 2 * 2, 4096),
            nn.ReLU(),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Linear(4096, 1)
        )
    def forward(self, x):
        x = self.layer1(x)
        x = self.maxpool1(x)
        x = self.layer2(x)
        x = self.maxpool2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.maxpool3(x)
        x = self.layer5(x)
        x = self.layer6(x)
        x = self.maxpool4(x)
        x = self.layer7(x)
        x = self.layer8(x)
        x = self.maxpool5(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

### Training

In [40]:
model = VGG11().to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device).float().unsqueeze(1)  # Shape: (batch_size, 1)

        # Forward
        outputs = model(images)
        loss = criterion(outputs, labels.float())

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Metrics
        running_loss += loss.item() * images.size(0)
        preds = torch.sigmoid(outputs) > 0.5  # convert logits to binary
        correct += (preds.float() == labels).sum().item()
        total += labels.size(0)

    epoch_loss = running_loss / total
    epoch_acc = correct / total

    print(f"Epoch {epoch+1}/{num_epochs} - Loss: {epoch_loss:.4f} - Accuracy: {epoch_acc:.4f}")
    
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device).float().unsqueeze(1)
            outputs = model(images)
            preds = torch.sigmoid(outputs) > 0.5
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    val_acc = correct / total
    print(f'Validation Acc: {val_acc:.4f}')

Epoch 1/5 - Loss: 0.5200 - Accuracy: 0.8158
Validation Acc: 0.9037
Epoch 2/5 - Loss: 0.3265 - Accuracy: 0.8983
Validation Acc: 0.8449
Epoch 3/5 - Loss: 0.2243 - Accuracy: 0.9158
Validation Acc: 0.9578
Epoch 4/5 - Loss: 0.1546 - Accuracy: 0.9570
Validation Acc: 0.6498
Epoch 5/5 - Loss: 0.1810 - Accuracy: 0.9344
Validation Acc: 0.9507


### Testing

In [41]:
def test_model(model, test_loader, device):
    model.eval()  # Set model to evaluation mode
    correct = 0
    total = 0

    all_preds = []
    all_labels = []

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device).float().unsqueeze(1)
            outputs = model(images)
            preds = torch.sigmoid(outputs) > 0.5  # Threshold at 0.5

            correct += (preds == labels).sum().item()
            total += labels.size(0)

            all_preds.append(preds.cpu())
            all_labels.append(labels.cpu())

    accuracy = correct / total
    print(f'Test Accuracy: {accuracy:.4f}')

    # Concatenate all predictions and labels if needed for further analysis
    all_preds = torch.cat(all_preds)
    all_labels = torch.cat(all_labels)
    
    return accuracy, all_preds, all_labels

In [42]:
test_model(model, test_loader, device)

Test Accuracy: 0.9476


(0.9476351351351351,
 tensor([[ True],
         [ True],
         [False],
         ...,
         [False],
         [ True],
         [False]]),
 tensor([[1.],
         [1.],
         [0.],
         ...,
         [0.],
         [1.],
         [0.]]))

In [45]:
class VGG13(nn.Module):
    def __init__(self):
        super(VGG13, self).__init__()
        self.layer1 = BasicBlock(3, 64)
        self.layer11 = BasicBlock(64, 64)
        self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer2 = BasicBlock(64, 128)
        self.layer22 = BasicBlock(128, 128)
        self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer3 = BasicBlock(128, 256)
        self.layer4 = BasicBlock(256, 256)
        self.maxpool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer5 = BasicBlock(256, 512)
        self.layer6 = BasicBlock(512, 512)
        self.maxpool4 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer7 = BasicBlock(512, 512)
        self.layer8 = BasicBlock(512, 512)
        self.maxpool5 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.classifier = nn.Sequential(
            nn.Linear(512 * 2 * 2, 4096),
            nn.ReLU(),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Linear(4096, 1)
        )
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer11(x)
        x = self.maxpool1(x)
        x = self.layer2(x)
        x = self.layer22(x)
        x = self.maxpool2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.maxpool3(x)
        x = self.layer5(x)
        x = self.layer6(x)
        x = self.maxpool4(x)
        x = self.layer7(x)
        x = self.layer8(x)
        x = self.maxpool5(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

In [46]:
model = VGG13().to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device).float().unsqueeze(1)  # Shape: (batch_size, 1)

        # Forward
        outputs = model(images)
        loss = criterion(outputs, labels.float())

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Metrics
        running_loss += loss.item() * images.size(0)
        preds = torch.sigmoid(outputs) > 0.5  # convert logits to binary
        correct += (preds.float() == labels).sum().item()
        total += labels.size(0)

    epoch_loss = running_loss / total
    epoch_acc = correct / total

    print(f"Epoch {epoch+1}/{num_epochs} - Loss: {epoch_loss:.4f} - Accuracy: {epoch_acc:.4f}")
    
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device).float().unsqueeze(1)
            outputs = model(images)
            preds = torch.sigmoid(outputs) > 0.5
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    val_acc = correct / total
    print(f'Validation Acc: {val_acc:.4f}')

Epoch 1/5 - Loss: 0.6067 - Accuracy: 0.6717
Validation Acc: 0.8260
Epoch 2/5 - Loss: 0.2952 - Accuracy: 0.8718
Validation Acc: 0.8843
Epoch 3/5 - Loss: 0.3351 - Accuracy: 0.8630
Validation Acc: 0.8865
Epoch 4/5 - Loss: 0.3055 - Accuracy: 0.8889
Validation Acc: 0.9096
Epoch 5/5 - Loss: 0.3179 - Accuracy: 0.8830
Validation Acc: 0.9060


In [47]:
test_model(model, test_loader, device)

Test Accuracy: 0.9189


(0.918918918918919,
 tensor([[ True],
         [ True],
         [False],
         ...,
         [False],
         [ True],
         [False]]),
 tensor([[1.],
         [1.],
         [0.],
         ...,
         [0.],
         [1.],
         [0.]]))

In [48]:
class VGG16(nn.Module):
    def __init__(self):
        super(VGG16, self).__init__()
        self.layer1 = BasicBlock(3, 64)
        self.layer11 = BasicBlock(64, 64)
        self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer2 = BasicBlock(64, 128)
        self.layer22 = BasicBlock(128, 128)
        self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer3 = BasicBlock(128, 256)
        self.layer4 = BasicBlock(256, 256)
        self.layer41 = BasicBlock(256, 256)
        self.maxpool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer5 = BasicBlock(256, 512)
        self.layer6 = BasicBlock(512, 512)
        self.layer61 = BasicBlock(512, 512)
        self.maxpool4 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer7 = BasicBlock(512, 512)
        self.layer8 = BasicBlock(512, 512)
        self.layer81 = BasicBlock(512, 512)
        self.maxpool5 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.classifier = nn.Sequential(
            nn.Linear(512 * 2 * 2, 4096),
            nn.ReLU(),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Linear(4096, 1)
        )
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer11(x)
        x = self.maxpool1(x)
        x = self.layer2(x)
        x = self.layer22(x)
        x = self.maxpool2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer41(x)
        x = self.maxpool3(x)
        x = self.layer5(x)
        x = self.layer6(x)
        x = self.layer61(x)
        x = self.maxpool4(x)
        x = self.layer7(x)
        x = self.layer8(x)
        x = self.layer81(x)
        x = self.maxpool5(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

In [49]:
model = VGG16().to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device).float().unsqueeze(1)  # Shape: (batch_size, 1)

        # Forward
        outputs = model(images)
        loss = criterion(outputs, labels.float())

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Metrics
        running_loss += loss.item() * images.size(0)
        preds = torch.sigmoid(outputs) > 0.5  # convert logits to binary
        correct += (preds.float() == labels).sum().item()
        total += labels.size(0)

    epoch_loss = running_loss / total
    epoch_acc = correct / total

    print(f"Epoch {epoch+1}/{num_epochs} - Loss: {epoch_loss:.4f} - Accuracy: {epoch_acc:.4f}")
    
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device).float().unsqueeze(1)
            outputs = model(images)
            preds = torch.sigmoid(outputs) > 0.5
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    val_acc = correct / total
    print(f'Validation Acc: {val_acc:.4f}')

Epoch 1/5 - Loss: 0.6998 - Accuracy: 0.5087
Validation Acc: 0.4975
Epoch 2/5 - Loss: 0.3252 - Accuracy: 0.8624
Validation Acc: 0.9265
Epoch 3/5 - Loss: 0.2773 - Accuracy: 0.9125
Validation Acc: 0.9659
Epoch 4/5 - Loss: 0.2573 - Accuracy: 0.9258
Validation Acc: 0.9552
Epoch 5/5 - Loss: 0.6558 - Accuracy: 0.9156
Validation Acc: 0.8803


In [50]:
test_model(model, test_loader, device)

Test Accuracy: 0.8778


(0.8778153153153153,
 tensor([[ True],
         [ True],
         [False],
         ...,
         [False],
         [ True],
         [False]]),
 tensor([[1.],
         [1.],
         [0.],
         ...,
         [0.],
         [1.],
         [0.]]))

In [51]:
class VGG19(nn.Module):
    def __init__(self):
        super(VGG19, self).__init__()
        self.layer1 = BasicBlock(3, 64)
        self.layer11 = BasicBlock(64, 64)
        self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer2 = BasicBlock(64, 128)
        self.layer22 = BasicBlock(128, 128)
        self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer3 = BasicBlock(128, 256)
        self.layer4 = BasicBlock(256, 256)
        self.layer41 = BasicBlock(256, 256)
        self.layer42 = BasicBlock(256, 256)
        self.maxpool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer5 = BasicBlock(256, 512)
        self.layer6 = BasicBlock(512, 512)
        self.layer61 = BasicBlock(512, 512)
        self.layer62 = BasicBlock(512, 512)
        self.maxpool4 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.layer7 = BasicBlock(512, 512)
        self.layer8 = BasicBlock(512, 512)
        self.layer81 = BasicBlock(512, 512)
        self.layer82 = BasicBlock(512, 512)
        self.maxpool5 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.classifier = nn.Sequential(
            nn.Linear(512 * 2 * 2, 4096),
            nn.ReLU(),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Linear(4096, 1)
        )
    def forward(self, x):
        x = self.layer1(x)
        x = self.layer11(x)
        x = self.maxpool1(x)
        x = self.layer2(x)
        x = self.layer22(x)
        x = self.maxpool2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer41(x)
        x = self.layer42(x)
        x = self.maxpool3(x)
        x = self.layer5(x)
        x = self.layer6(x)
        x = self.layer61(x)
        x = self.layer62(x)
        x = self.maxpool4(x)
        x = self.layer7(x)
        x = self.layer8(x)
        x = self.layer81(x)
        x = self.layer82(x)
        x = self.maxpool5(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

In [52]:
model = VGG19().to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device).float().unsqueeze(1)  # Shape: (batch_size, 1)

        # Forward
        outputs = model(images)
        loss = criterion(outputs, labels.float())

        # Backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Metrics
        running_loss += loss.item() * images.size(0)
        preds = torch.sigmoid(outputs) > 0.5  # convert logits to binary
        correct += (preds.float() == labels).sum().item()
        total += labels.size(0)

    epoch_loss = running_loss / total
    epoch_acc = correct / total

    print(f"Epoch {epoch+1}/{num_epochs} - Loss: {epoch_loss:.4f} - Accuracy: {epoch_acc:.4f}")
    
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device).float().unsqueeze(1)
            outputs = model(images)
            preds = torch.sigmoid(outputs) > 0.5
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    val_acc = correct / total
    print(f'Validation Acc: {val_acc:.4f}')

Epoch 1/5 - Loss: 0.9064 - Accuracy: 0.5037
Validation Acc: 0.4975
Epoch 2/5 - Loss: 0.6933 - Accuracy: 0.5038
Validation Acc: 0.4975
Epoch 3/5 - Loss: 0.6932 - Accuracy: 0.4980
Validation Acc: 0.4975
Epoch 4/5 - Loss: 0.6931 - Accuracy: 0.5059
Validation Acc: 0.4975
Epoch 5/5 - Loss: 0.6932 - Accuracy: 0.5027
Validation Acc: 0.4975


In [53]:
test_model(model, test_loader, device)

Test Accuracy: 0.5135


(0.5135135135135135,
 tensor([[False],
         [False],
         [False],
         ...,
         [False],
         [False],
         [False]]),
 tensor([[1.],
         [1.],
         [0.],
         ...,
         [0.],
         [1.],
         [0.]]))