# Подготовка тестируемого видео для детекции на нем светофоров

In [1]:
import cv2 as cv
import matplotlib.pyplot as plt
import numpy as np
import torch
import os
import pandas as pd
from torchvision import transforms
import torchvision

In [2]:
cap = cv.VideoCapture("phase_1/video_0.MP4") # Вывод с видео файла
length = int(cap.get(cv.CAP_PROP_FRAME_COUNT))
width  = int(cap.get(cv.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT))
print(length)
print(width)
print(height)

1203
1280
720


## Создание генератора датасета

### Создание класса датасета

In [3]:
class LISADataset(object):
    def __init__(self):
        # загрузка датасета аннотаций для bounding box'ов
        self.df = pd.read_csv("LISA\Annotations\Annotations\dayTrain\dayClip1\\frameAnnotationsBOX.csv", sep = ';')
        # упорядоченный список названий кадров из одной папки (пока что)
        imgs_temp = list(sorted(os.listdir(os.path.join("LISA\dayTrain\dayTrain\dayClip1", "frames"))))
        self.imgs = [imgs_temp[i] for i in self.df["Origin frame number"].unique()]

    def __getitem__(self, idx):
        # load images
        img_path = os.path.join("LISA\dayTrain\dayTrain\dayClip1", "frames", self.imgs[idx])
        img = cv.imread(img_path)
        img = cv.cvtColor(img, cv.COLOR_BGR2RGB)

        # get bounding box coordinates for each mask
        num_objs = len(self.df[self.df["Origin frame number"]==idx])
        boxes = []
        for i in range(num_objs):
            x_left = list(self.df[self.df["Origin frame number"]==idx]["Upper left corner X"])[i]
            x_right = list(self.df[self.df["Origin frame number"]==idx]["Lower right corner X"])[i]
            y_left = list(self.df[self.df["Origin frame number"]==idx]["Upper left corner Y"])[i]
            y_right = list(self.df[self.df["Origin frame number"]==idx]["Lower right corner Y"])[i]
            boxes.append([x_left, y_left, x_right, y_right])

        # convert everything into a torch.Tensor
        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        # there is only one class
        labels = torch.ones((num_objs,), dtype=torch.int64)

        image_id = torch.tensor([idx])
        if num_objs == 0:
            print(idx)
            boxes = torch.as_tensor([[0,0,0,0]], dtype=torch.float32)
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        
        img = torch.tensor(img)
        return img, target

    def __len__(self):
        return len(self.imgs)

In [4]:
def collate_fn(batch):
    return tuple(zip(*batch))

# Test

In [5]:
device = torch.device('cuda:0')

In [6]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
dataset = LISADataset()
data_loader = torch.utils.data.DataLoader(dataset, batch_size=2, shuffle=True, num_workers=0, collate_fn=collate_fn)
# For Training
images,targets = next(iter(data_loader))
images = list(torch.reshape(image, (3, image.shape[0], image.shape[1])).to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
model.to(device)
output = model(images,targets)   # Returns losses and detections

# For inference
images,targets = next(iter(data_loader))
images = list(torch.reshape(image, (3, image.shape[0], image.shape[1])).to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
model.eval()
with torch.no_grad():
    predictions = model(images)           # Returns predictions

RuntimeError: cannot reshape tensor of 0 elements into shape [0, -1] because the unspecified dimension size -1 can be any value and is ambiguous

# Препроцессинг видео

In [None]:
ret, frame = cap.read()

In [8]:
frame.shape

(720, 1280, 3)

In [5]:
cv.imshow("frame", frame)
cv.waitKey()
cv.destroyAllWindows()