# Подготовка тестируемого видео для детекции на нем светофоров

In [72]:
import cv2 as cv
import matplotlib.pyplot as plt
import numpy as np
import torch
import os
import pandas as pd
from torchvision import transforms
import torchvision

In [2]:
cap = cv.VideoCapture("phase_1/video_0.MP4") # Вывод с видео файла
length = int(cap.get(cv.CAP_PROP_FRAME_COUNT))
width  = int(cap.get(cv.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT))
print(length)
print(width)
print(height)

1203
1280
720


## Создание генератора датасета

### Создание класса датасета

In [129]:
class LISADataset(object):
    def __init__(self, transforms):
        self.transforms = transforms
        # упорядоченный список названий кадров из одной папки (пока что) для дальнейшей 
        self.imgs = list(sorted(os.listdir(os.path.join("LISA\dayTrain\dayTrain\dayClip1", "frames"))))
        # загрузка датасета аннотаций для bounding box'ов
        self.df = pd.read_csv("LISA\Annotations\Annotations\dayTrain\dayClip1\\frameAnnotationsBOX.csv", sep = ';')

    def __getitem__(self, idx):
        # load images ad masks
        img_path = os.path.join("LISA\dayTrain\dayTrain\dayClip1", "frames", self.imgs[idx])
        img = cv.imread(img_path)
        img = cv.cvtColor(img, cv.COLOR_BGR2RGB)

        # get bounding box coordinates for each mask
        num_objs = len(self.df[self.df["Origin frame number"]==idx])
        boxes = []
        for i in range(num_objs):
            print(i,idx)
            x_left = list(self.df[self.df["Origin frame number"]==idx]["Upper left corner X"])[i]
            x_right = list(self.df[self.df["Origin frame number"]==idx]["Lower right corner X"])[i]
            y_left = list(self.df[self.df["Origin frame number"]==idx]["Upper left corner Y"])[i]
            y_right = list(self.df[self.df["Origin frame number"]==idx]["Lower right corner Y"])[i]
            boxes.append([x_left, y_left, x_right, y_right])

        # convert everything into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # there is only one class
        labels = torch.ones((num_objs,), dtype=torch.int64)

        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        
        if self.transforms is not None:
            img = self.transforms(img)

        return img, target

    def __len__(self):
        return len(self.imgs)

In [130]:
def get_transform(train):
    transform = []
    transform.append(transforms.ToTensor())
    return transforms.Compose(transform)

In [139]:
dataset = LISADataset(get_transform(train=True))
data_loader = torch.utils.data.DataLoader(dataset, batch_size=2, shuffle=True, num_workers=0)

In [141]:
images,targets = next(iter(data_loader))

0 1684
1 1684
2 1684
3 1684
4 1684
0 266
1 266
2 266


RuntimeError: stack expects each tensor to be equal size, but got [5, 4] at entry 0 and [3, 4] at entry 1

In [137]:
images = list(image for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

In [135]:
print(targets)

[{'boxes': tensor([[[ 838.,  180.,  871.,  238.],
         [ 987.,  215., 1014.,  260.],
         [1158.,  295., 1185.,  335.]],

        [[ 838.,  180.,  871.,  238.],
         [ 987.,  215., 1014.,  260.],
         [1158.,  295., 1185.,  335.]]]), 'labels': tensor([[1, 1, 1],
        [1, 1, 1]]), 'image_id': tensor([[1080],
        [1123]]), 'area': tensor([[1914., 1215., 1080.],
        [1914., 1215., 1080.]])}]


In [136]:
output = model(images,targets)

ValueError: Expected target boxes to be a tensorof shape [N, 4], got torch.Size([2, 3, 4]).

In [75]:
device = torch.device('cuda:0')

In [74]:
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

In [76]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

N_CLASS = 4 

INP_FEATURES = model.roi_heads.box_predictor.cls_score.in_features

model.roi_heads.box_predictor = FastRCNNPredictor(INP_FEATURES, N_CLASS)

model.to(device)

params = [p for p in model.parameters() if p.requires_grad]

optimizer = torch.optim.Adam(params)

lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer)

In [None]:
for epoch in range(2):
    
    start_time = time()
    model.train()
    lossHist.reset()
    
    for images, targets, image_ids in tqdm(trainDataLoader):
        
        images = torch.stack(images).to(device)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        
        bs = images.shape[0]
        
        loss_dict = model(images, targets)
        
        totalLoss = sum(loss for loss in loss_dict.values())
        lossValue = totalLoss.item()
        
        lossHist.update(lossValue,bs)

        optimizer.zero_grad()
        totalLoss.backward()
        optimizer.step()
    
    if lr_scheduler is not None:
        lr_scheduler.step(totalLoss)

    print(f"[{str(datetime.timedelta(seconds = time() - start_time))[2:7]}]")
    print(f"Epoch {epoch}/{EPOCHS}")
    print(f"Train loss: {lossHist.avg}")

# Примеры

In [None]:
ret, frame = cap.read()

In [8]:
frame.shape

(720, 1280, 3)

In [5]:
cv.imshow("frame", frame)
cv.waitKey()
cv.destroyAllWindows()