In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, random_split

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


### Preparing dataset for model training
Defining a Dataset class to load the tensor label pairs and dataloader to feed the training in batches

In [7]:
from torch.utils.data import Dataset
class GTSRBImageDataset(Dataset):
    def __init__(self, label_file, file_dir = "", transform = None):
        self.df = pd.read_csv(label_file)
        self.labels = self.df["Labels"]
        self.file_names = self.df["Path"]
        self.file_dir = file_dir
        self.transform = transform
    
    def __len__(self):
        return len(self.labels)

    def __getitem__(self, index):
        file_path = os.path.join(self.file_dir, self.file_names[index])
        img = cv2.imread(file_path)
        label = self.labels[index]
        if self.transform:
            img = self.transform(img)
        return img, label

In [8]:
from torchvision.transforms import ToTensor
transform = ToTensor()
dataset = GTSRBImageDataset("labels.csv", transform=transform)

In [9]:
from torch.utils.data import random_split
train, test = random_split(dataset, [0.7,0.3])
training_loader = DataLoader(train, batch_size=32, shuffle=True)
test_loader = DataLoader(test, batch_size=32, shuffle=True)

### Test Model
Setting up a simple CNN model to test training on the datset

In [10]:
import torch.nn.functional as F


class SimpleCNN(nn.Module):
    def __init__(self, num_classes=10):
        super(SimpleCNN, self).__init__()
        
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(128)

        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.fc1 = nn.Linear(128 * 8 * 8, 256)
        self.fc2 = nn.Linear(256, num_classes)

    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))
        x = self.pool(F.relu(self.bn2(self.conv2(x))))
        x = self.pool(F.relu(self.bn3(self.conv3(x))))
        
        x = torch.flatten(x, start_dim=1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        
        return x


### Helper functions
Preparing helper functions for training and validation

In [11]:
from tqdm import tqdm
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count
from torchmetrics import Accuracy
def train_one_epoch(model, train_loader, loss_fn, optimizer, epoch=None, num_classes=10):
    model.to(device)
    loss_fn.to(device)
    model.train()
    
    loss_train = AverageMeter()
    acc_train = Accuracy(task="multiclass", num_classes=num_classes).to(device)
    acc_train.reset()

    with tqdm(train_loader, unit="batch") as tepoch:
        if epoch is not None:
            tepoch.set_description(f"Epoch {epoch}")

        for inputs, targets in tepoch:
            inputs, targets = inputs.to(device), targets.to(device)

            optimizer.zero_grad() 
            outputs = model(inputs)
            loss = loss_fn(outputs, targets)

            loss.backward()
            optimizer.step()

            loss_train.update(loss.item())
            acc_train.update(outputs, targets.int())

            tepoch.set_postfix(loss=loss_train.avg)

    final_accuracy = acc_train.compute().item()
    return model, loss_train.avg, final_accuracy

import sklearn.metrics as metrics
def validation(model, test_loader, loss_fn):
  model.eval()
  with torch.no_grad():
    loss_valid = AverageMeter()
    acc_valid = Accuracy(task="multiclass", num_classes=43).to(device)
    predictions, labels = [], []
    for i, (inputs, targets) in enumerate(test_loader):
      inputs = inputs.to(device)
      targets = targets.to(device)

      outputs = model(inputs)
      loss = loss_fn(outputs, targets)

      loss_valid.update(loss.item())
      acc_valid(outputs, targets.int())
      predictions.append(torch.max(outputs.data, 1)[1].cpu().numpy())
      labels.append(targets.cpu().numpy())
    predictions = np.concatenate(predictions)
    labels = np.concatenate(labels)
    confusion = metrics.confusion_matrix(labels,predictions)
  return loss_valid.avg, acc_valid.compute().item(), confusion

### Training the sample model

In [12]:
net = SimpleCNN(43)
print(net)
net.to(device)
criterion = torch.nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
for epoch in range(20):
    net, loss, acc = train_one_epoch(net, training_loader, criterion, optimizer, num_classes=43, epoch=epoch)
print(f"Model accuracy {acc}, Loss {loss}")

SimpleCNN(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=8192, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=43, bias=True)
)


Epoch 0: 100%|██████████| 858/858 [00:14<00:00, 58.19batch/s, loss=1.09]
Epoch 1: 100%|██████████| 858/858 [00:09<00:00, 87.73batch/s, loss=0.158]
Epoch 2: 100%|██████████| 858/858 [00:09<00:00, 88.09batch/s, loss=0.0709]
Epoch 3: 100%|██████████| 858/858 [00:09<00:00, 87.84batch/s, loss=0.0416]
Epoch 4: 100%|██████████| 858/858 [00:09<00:00, 87.40batch/s, loss=0.0283]
Epoch 5: 100%|██████████| 858/858 [00:09<00:00, 88.00batch/s, loss=0.0202]
Epoch 6: 100%|██████████| 858/858 [00:09<00:00, 88.35batch/s, loss=0.0146]
Epoch 7: 100%|██████████| 858/858 [00:09<00:00, 87.74batch/s, loss=0.0115]
Epoch 8: 100%|██████████| 858/858 [00:09<00:00, 87.88batch/s, loss=0.00939]
Epoch 9: 100%|██████████| 858/858 [00:09<00:00, 88.00batch/s, loss=0.00735]
Epoch 10: 100%|██████████| 858/858 [00:09<00:00, 87.30batch/s, loss=0.00629]
Epoch 11: 100%|██████████| 858/858 [00:09<00:00, 87.53batch/s, loss=0.00583]
Epoch 12: 100%|██████████| 858/858 [00:09<00:00, 87.55batch/s, loss=0.00485]
Epoch 13: 100%|█████

Model accuracy 0.9999635815620422, Loss 0.0024589568231598636





In [13]:
test_loss, test_acc, confusion = validation(net, test_loader, criterion)
print(f"Test accuracy {test_acc}, test loss {test_loss}")
def print_array(array):
  np.set_printoptions(threshold=np.inf)
  print(array)
  np.set_printoptions(threshold=1000)
print_array(confusion)

Test accuracy 0.9961740970611572, test loss 0.016993765021771138
[[ 73   0   1   0   0   0   0   0   1   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0]
 [  1 671   1   0   0   0   0   1   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0]
 [  0   2 684   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0]
 [  0   1   0 437   0   2   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0]
 [  0   0   1   0 606   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0]
 [  0   0   0   0   0 547   0   1   1   

In [14]:
total_params = sum(p.numel() for p in net.parameters())
print(total_params)

2202155
