In [1]:
import os
import cv2 as cv
import numpy as np
import pandas as pd
from tqdm import tqdm
import random
import matplotlib.pyplot as plt
import torch
from torch import nn
from torch.optim import Adam
from torch.nn import functional as F
from torch.utils.data import Dataset,TensorDataset, DataLoader
from pytorch_lightning import metrics

# Введение
В данной работе предлагается построить классификатор состояний светофоров на основе свёрточной нейронной сети (CNN)

In [3]:
train_val_dir = '/kaggle/input/sirius-traffic-lights-competition/train_val'
train_val_keys = pd.read_csv(os.path.join(train_val_dir, 'keys.csv'))

In [4]:
FIXED_IMG_HEIGHT = 64
FIXED_IMG_WIDTH = 64

def load_img(subset_dir, pic_id):
    img = cv.imread(os.path.join(subset_dir, 'pic', pic_id + '.jpg'))
    return cv.cvtColor(img, cv.COLOR_BGR2RGB)  # BGR -> RGB convertion

def resize_img(img, res_shape=(FIXED_IMG_HEIGHT, FIXED_IMG_WIDTH)):
    height, width = img.shape[:2]
    resized_width = int(res_shape[0] * (float(width) / height))
    img_resized = cv.resize(img, (resized_width, int(res_shape[0])))
    width_to_copy = min(resized_width, res_shape[1])
    img_resized_filled_with_zero = np.zeros([res_shape[0], res_shape[1], 3], dtype=img.dtype)
    img_resized_filled_with_zero[:, :width_to_copy, :] = img_resized[:, :width_to_copy, :]
    return img_resized_filled_with_zero

def norm_img(img):
    return (img.astype(np.float32) - 128.) / 255.

Загрузка train\validation датасета

In [5]:
images_train_val = np.zeros([len(train_val_keys['id']), FIXED_IMG_HEIGHT, FIXED_IMG_WIDTH, 3], dtype=np.float32)
for idx, pic_id in tqdm(enumerate(train_val_keys['id'])):
    images_train_val[idx] = norm_img(resize_img(load_img(train_val_dir, pic_id)))

45633it [07:54, 96.07it/s] 


In [6]:
tl_states = np.unique(train_val_keys['category'])
print(tl_states)

['disabled' 'green' 'red' 'red_yellow' 'yellow']


In [7]:
labels_train_val = np.zeros(len(train_val_keys), dtype=np.int)
for idx, state in enumerate(tl_states):
    labels_train_val[train_val_keys['category'] == state] = idx


Разобъем датасет на train/validation в соотношении 4/1

In [8]:
def to_categorical(y, num_classes):
    return np.eye(num_classes, dtype='uint8')[y]

In [9]:
num_classes = len(tl_states)
val_samples_num = int(len(images_train_val) * 0.2)
images_val = images_train_val[:val_samples_num]
labels_val = labels_train_val[:val_samples_num]
y_val = to_categorical(labels_val, num_classes)

images_train = images_train_val[val_samples_num:]
labels_train = labels_train_val[val_samples_num:]
y_train = to_categorical(labels_train, num_classes)

In [10]:
print(np.sum(y_val, axis=0))
print(np.sum(y_train, axis=0))
print(y_val.shape, images_val.shape)
print(y_train.shape, images_train.shape)
print(f'size of train dataset: {len(y_train)}, size of validation dataset: {len(y_val)}')

[ 974 3162 4089  419  482]
[ 3614 12898 16561  1622  1812]
(9126, 5) (9126, 64, 64, 3)
(36507, 5) (36507, 64, 64, 3)
size of train dataset: 36507, size of validation dataset: 9126


# Задание 1
Неоходимо создать модель CNN (Convolutional Neural Network), состоящей из нескольких последовательных сверточных слоев. Между сверточными слоями целесообразно добавить несколько max-pooling слев для увеличения receptive field. В самом конце необходимо добавить полносвязный слой (dense), размер выхода которого совпадает с количеством классов (в нашем случае 5).

В данной работе используется библиотеки  tensorflow + keras для формирования и обучения модели, однако такой выбор не является обязательным.

Ниже приведен пример шаблона для такой нейронной сети. Целевой функцией потерь была выбрана бинарная кросс-энтропия, что является класическим выбором при решении задачи классификации. Стоит обратить внимание на параметр from_logits=True, указывающий на то что в качестве выхода нейронная сеть будет выдавать "сырые" значения в диапазоне  $(-\inf; +\inf)$ а не вероятности (в противном случае последним слоем нейронной сети должен быть soft-max).

In [11]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

images_train = torch.tensor(images_train).to(device)
images_val = torch.tensor(images_val).to(device)
y_train = torch.tensor(y_train).to(device)
y_val = torch.tensor(y_val).to(device)

batch_size = 128

data_train = TensorDataset(images_train, y_train)
data_val = TensorDataset(images_val, y_val)

data_loader_train = DataLoader(data_train, shuffle=True, batch_size=batch_size)
data_loader_val = DataLoader(data_train, shuffle=True, batch_size=batch_size)

In [17]:
class Model(nn.Module):    
    
    def __init__(self, channels=8, latent_size=64, samplings=4, out_channels=5):       
        super().__init__() 
        modules = [nn.Conv2d(latent_size, channels, kernel_size=1, stride=1, padding=0)]
        for i in range(samplings):
            channels *= 2
            modules += [nn.Conv2d(channels//2, channels, kernel_size=3, stride=2, padding=1),
                           nn.BatchNorm2d(channels),
                           nn.ReLU()]
        modules += [nn.Conv2d(channels, channels*2, kernel_size=3, stride=2, padding=1),
                   nn.Dropout(p=0.2),
                   nn.Flatten(),
                   nn.Linear(channels*4, out_channels)]
        self.layers = nn.Sequential(*modules)

        
    def forward(self, x: torch.Tensor) -> torch.Tensor:   
        return self.layers(x)

# Задание 2

Обучите модель и оцените её точность как на train, так и на validation подмножествах.

Попробуйте варьировать кол-во слоёв, их размер, а также параметры обучения. Необходимо получить точность > 90% на валидационном подмножестве.

In [18]:
def calculate_loss(X: torch.Tensor, y: torch.Tensor, model: Model):
    predictions = model.forward(X)
    loss = nn.BCEWithLogitsLoss()
    return loss(predictions, y.type_as(predictions))

def train(model: Model, max_epochs=101):
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    train_accuracy = metrics.Accuracy().to(device)
    valid_accuracy = metrics.Accuracy(compute_on_step=False).to(device)
    for epoch in range(max_epochs):       
        # Train
        for X, y in data_loader_train:
            y_hat = model(X)
            batch_acc = train_accuracy(y_hat, y)
            loss = calculate_loss(X, y, model)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step() 
        # Val  
        with torch.set_grad_enabled(False):         
            for X, y in data_loader_val:
                y_hat = model(X)
                valid_accuracy(y_hat, y)
                loss = calculate_loss(X, y, model)
        total_train_accuracy = train_accuracy.compute()
        total_valid_accuracy = valid_accuracy.compute()
        if epoch % 10 == 0:
            print(f"Epoch: {epoch}, | Train accuracy: {total_train_accuracy}, Validation accuracy, {total_valid_accuracy}")                 
    return 
                  
model = Model()
model.to(device)
train(model)

Epoch: 0, | Train accuracy: 0.8711151480674744, Validation accuracy, 0.8976853489875793
Epoch: 10, | Train accuracy: 0.9592516422271729, Validation accuracy, 0.9609718918800354
Epoch: 20, | Train accuracy: 0.9713588953018188, Validation accuracy, 0.973626971244812
Epoch: 30, | Train accuracy: 0.975506067276001, Validation accuracy, 0.9789684414863586
Epoch: 40, | Train accuracy: 0.9806831479072571, Validation accuracy, 0.9795053005218506
Epoch: 50, | Train accuracy: 0.9829950332641602, Validation accuracy, 0.9846440553665161
Epoch: 60, | Train accuracy: 0.9850330352783203, Validation accuracy, 0.9871860146522522
Epoch: 70, | Train accuracy: 0.9863149523735046, Validation accuracy, 0.9876845479011536
Epoch: 80, | Train accuracy: 0.9876571893692017, Validation accuracy, 0.9881502389907837
Epoch: 90, | Train accuracy: 0.9886049032211304, Validation accuracy, 0.9912619590759277
Epoch: 100, | Train accuracy: 0.9895581603050232, Validation accuracy, 0.991311252117157
