In [50]:
import csv
import os
from typing import Tuple, Optional, Callable

import PIL
import torch
import torch.nn as nn
from torch.optim import Adam, lr_scheduler
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision.transforms.v2 import ToTensor, Resize, Compose, ColorJitter, RandomRotation, AugMix, GaussianBlur, \
    RandomEqualize, RandomHorizontalFlip, RandomVerticalFlip

In [51]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [52]:
class GTSRB(Dataset):
    def __init__(self,
                 root: str,
                 split: str,
                 transform: Optional[Callable] = None):
        self.base_folder = root
        self.csv_file = os.path.join(self.base_folder, 'Train.csv' if split == 'train' else 'Test.csv')

        with open(self.csv_file) as csvfile:
            samples = [(os.path.join(self.base_folder, row['Path']), int(row['ClassId']))
                       for row in csv.DictReader(csvfile, delimiter=',', skipinitialspace=True)
                       ]

        self.samples = samples
        self.split = split
        self.transform = transform

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, index: int) -> Tuple:
        path, classId = self.samples[index]
        sample = PIL.Image.open(path).convert('RGB')
        if self.transform is not None:
            sample = self.transform(sample)
        return sample, classId

In [53]:
train_transforms = Compose([
    ColorJitter(brightness=1.0, contrast=0.5, saturation=1, hue=0.1),
    RandomEqualize(0.4),
    AugMix(),
    RandomHorizontalFlip(0.3),
    RandomVerticalFlip(0.3),
    GaussianBlur((3, 3)),
    RandomRotation(30),

    Resize([50, 50]),
    ToTensor(),

])
validation_transforms = Compose([
    Resize([50, 50]),
    ToTensor(),

])

In [54]:
def train_test_split(dataset, train_size):
    train_size = int(train_size * len(dataset))
    test_size = int(len(dataset) - train_size)
    return random_split(dataset, [train_size, test_size])

In [55]:
dataset = GTSRB(root='../data/gtsrb', split="train")
train_set, validation_set = train_test_split(dataset, train_size=0.8)
print(f'training size : {len(train_set)}, Validation size : {len(validation_set)}')

training size : 31367, Validation size : 7842


In [56]:
train_set.dataset.transform = train_transforms
validation_set.dataset.transform = validation_transforms

In [57]:
BATCH_SIZE = 64
train_loader = DataLoader(dataset=train_set, batch_size=BATCH_SIZE, shuffle=True)
validation_loader = DataLoader(dataset=validation_set, batch_size=BATCH_SIZE)

In [58]:
from tqdm import tqdm


class CNN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(CNN, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim

        self.metrics = {}

        self.flatten = nn.Flatten()

        self.dropout2 = nn.Dropout(0.2)
        self.dropout3 = nn.Dropout(0.3)

        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(2)

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.batchnorm1 = nn.BatchNorm2d(64)

        self.conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1)
        self.batchnorm2 = nn.BatchNorm2d(256)

        self.conv5 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3)
        self.conv6 = nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=3)
        self.batchnorm3 = nn.BatchNorm2d(1024)

        self.l1 = nn.Linear(1024 * 4 * 4, 512)
        self.l2 = nn.Linear(512, 128)
        self.batchnorm4 = nn.LayerNorm(128)
        self.l3 = nn.Linear(128, output_dim)

    def forward(self, input):

        conv = self.conv1(input)
        conv = self.conv2(conv)
        batchnorm = self.relu(self.batchnorm1(conv))
        maxpool = self.maxpool(batchnorm)

        conv = self.conv3(maxpool)
        conv = self.conv4(conv)
        batchnorm = self.relu(self.batchnorm2(conv))
        maxpool = self.maxpool(batchnorm)

        conv = self.conv5(maxpool)
        conv = self.conv6(conv)
        batchnorm = self.relu(self.batchnorm3(conv))
        maxpool = self.maxpool(batchnorm)

        flatten = self.flatten(maxpool)

        dense_l1 = self.l1(flatten)
        dropout = self.dropout3(dense_l1)
        dense_l2 = self.l2(dropout)
        batchnorm = self.batchnorm4(dense_l2)
        dropout = self.dropout2(batchnorm)
        output = self.l3(dropout)

        return output

    def training_metrics(self, positives, data_size, loss):
        acc = positives / data_size
        return loss, acc

    def validation_metrics(self, validation_data, loss_function):
        data_size = len(validation_data)
        correct_predictions = 0
        total_samples = 0
        val_loss = 0

        model = self.eval()
        with torch.no_grad():
            for step, (input, label) in enumerate(validation_data):
                input, label = input.to(device), label.to(device)
                prediction = model.forward(input)
                loss = loss_function(prediction, label)
                val_loss = loss.item()
                _, predicted = torch.max(prediction, 1)
                correct_predictions += (predicted == label).sum().item()
                total_samples += label.size(0)

        val_acc = correct_predictions / total_samples

        return val_loss, val_acc

    def history(self):
        return self.metrics

    def compile(self, train_data, validation_data, epochs, loss_function, optimizer, learning_rate_scheduler):
        val_acc_list = []
        val_loss_list = []

        train_acc_list = []
        train_loss_list = []

        learning_rate_list = []

        print('training started ...')
        STEPS = len(train_data)
        for epoch in range(epochs):
            lr = optimizer.param_groups[0]["lr"]
            learning_rate_list.append(lr)
            correct_predictions = 0
            total_examples = 0
            loss = 0
            with tqdm.trange(STEPS) as progress:

                for step, (input, label) in enumerate(train_loader):
                    input, label = input.to(device), label.to(device)
                    prediction = self.forward(input)

                    _, predicted = torch.max(prediction, 1)
                    correct_predictions += (predicted == label).sum().item()
                    total_examples += label.size(0)
                    l = loss_function(prediction, label)
                    loss = l.item()
                    l.backward()
                    optimizer.step()
                    optimizer.zero_grad()

                    progress.colour = 'green'
                    progress.desc = f'Epoch [{epoch}/{EPOCHS}], Step [{step}/{STEPS}], Learning Rate [{lr}], Loss [{"{:.4f}".format(l)}], Accuracy [{"{:.4f}".format(correct_predictions / total_examples)}]'
                    progress.update(1)

            training_loss, training_acc = self.training_metrics(correct_predictions, total_examples, loss)
            train_acc_list.append(training_acc)
            train_loss_list.append(training_loss)

            val_loss, val_acc = self.validation_metrics(validation_data, loss_function)
            val_acc_list.append(val_acc)
            val_loss_list.append(val_loss)

            print(f'val_accuracy [{val_acc}], val_loss [{val_loss}]')

            learning_rate_scheduler.step()

        metrics_dict = {
            'train_acc': train_acc_list,
            'train_loss': train_loss_list,
            'val_acc': val_acc_list,
            'val_loss': val_loss_list,
            'learning_rate': optimizer.param_groups[0]["lr"]
        }
        self.metrics = metrics_dict
        print('training complete !')


In [61]:
import tqdm

EPOCHS = 5
LEARNING_RATE = 0.001
INPUT_DIM = 3 * 50 * 50
OUTPUT_DIM = 43
model = CNN(INPUT_DIM, OUTPUT_DIM).to(device)

optimizer = Adam(params=model.parameters(), lr=LEARNING_RATE)
lr_s = lr_scheduler.LinearLR(optimizer, start_factor=1.0, end_factor=0.5, total_iters=10)
loss = nn.CrossEntropyLoss()

In [62]:
model.compile(train_data=train_loader, validation_data=validation_loader, epochs=EPOCHS, loss_function=loss,
              optimizer=optimizer, learning_rate_scheduler=lr_s)

training started ...


Epoch [0/5], Step [490/491], Learning Rate [0.001], Loss [0.0263], Accuracy [0.5776]: 100%|[32m██████████[0m| 491/491 [02:47<00:00,  2.93it/s]


val_accuracy [0.9517980107115531], val_loss [0.2991262972354889]


Epoch [1/5], Step [490/491], Learning Rate [0.00095], Loss [0.0150], Accuracy [0.9807]: 100%|[32m██████████[0m| 491/491 [02:49<00:00,  2.89it/s]


val_accuracy [0.9908186687069626], val_loss [0.07485243678092957]


Epoch [2/5], Step [490/491], Learning Rate [0.0009000000000000001], Loss [0.0154], Accuracy [0.9953]: 100%|[32m██████████[0m| 491/491 [02:47<00:00,  2.94it/s]


val_accuracy [0.9914562611578679], val_loss [0.08432421088218689]


Epoch [3/5], Step [490/491], Learning Rate [0.0008500000000000001], Loss [0.0009], Accuracy [0.9989]: 100%|[32m██████████[0m| 491/491 [02:25<00:00,  3.37it/s]


val_accuracy [0.9978321856669217], val_loss [0.0047309850342571735]


Epoch [4/5], Step [490/491], Learning Rate [0.0008], Loss [0.0004], Accuracy [0.9999]: 100%|[32m██████████[0m| 491/491 [02:41<00:00,  3.03it/s]


val_accuracy [0.9980872226472839], val_loss [0.0041684540919959545]
training complete !


In [64]:
torch.save(model.state_dict(), '../models/best_CNN.pt')

In [65]:
model2 = torch.load('../models/best_CNN.pt')

In [72]:
import requests
from PIL import Image
import io

import torchvision.transforms as transforms

# define a transform to convert the image to tensor and normalize it
transform = transforms.Compose([
    transforms.Resize((50, 50)),  # resize the image to the same size as your trained model input size
    transforms.ToTensor(),  # convert the image to a PyTorch Tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # normalize the image (optional)
])

url = 'https://i.ytimg.com/vi/D-rXSzu_fLk/maxresdefault.jpg'
r = requests.get(url, stream=True)
img = Image.open(io.BytesIO(r.content))

input = transform(img)
input = input.unsqueeze(0).to(device)
output = model(input)

probabilities = torch.nn.functional.softmax(output[0], dim=0)

print(probabilities)

tensor(0.0233, device='cuda:0', grad_fn=<MeanBackward0>)


In [74]:
output.max()

tensor(9.2053, device='cuda:0', grad_fn=<MaxBackward1>)

In [76]:
_, top_class = probabilities.max(dim=0)

In [77]:
top_class

tensor(12, device='cuda:0')

In [78]:
top_prob, top_classes = probabilities.topk(k=3, dim=0)

In [79]:
top_prob

tensor([0.7102, 0.2098, 0.0251], device='cuda:0', grad_fn=<TopkBackward0>)

In [80]:
top_classes

tensor([12, 17, 25], device='cuda:0')