In [None]:
import os 
import sys
import cv2
import yaml
import random
from torch import nn
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import albumentations as albu
import torch

from models.model import UnetClipped
from metrics.utils import masks_to_bboxes

plt.rcParams['figure.dpi'] = 200

In [None]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="1"

In [None]:
with open('configs/train_config.yaml', 'r') as f:
    config = yaml.load(f, Loader=yaml.FullLoader)

In [None]:
# load necessary params from config 
image_size = config["train_data_params"]["size"]
stride = config["train_data_params"]["output_stride"]
num_classes = sum(config["train_data_params"]["subclasses"])
out_img_size = image_size // stride

In [None]:
# Init model, load state dict

torch.set_grad_enabled(False)

model = UnetClipped(**config["model"])

ckpt_path = "checkpoints/epoch=024-val_loss=0.912-val_map=0.723.ckpt"
state_dict = torch.load(ckpt_path)['state_dict']
fixed_state_dict = {key.replace('net.', ''): value for key, value in state_dict.items()}

model.load_state_dict(fixed_state_dict)
model = model.cuda().eval()

In [None]:
source_folder = "test_videos/video"
dist_folder = "test_videos/video_predict"

In [None]:
colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(num_classes)]

# define augmentations
resize_aug = [albu.SmallestMaxSize(max_size=image_size, always_apply=True),
              albu.PadIfNeeded(min_height=None, min_width=None, pad_height_divisor=32, pad_width_divisor=32, border_mode=0)]
norm_aug = [albu.Normalize(mean=[0.449, 0.449, 0.449], std=[0.226, 0.226, 0.226])]

resize_pipeline = albu.Compose(resize_aug, p=1)
preproc_pipeline = albu.Compose(resize_aug + norm_aug, p=1)

In [None]:
for video_name in os.listdir(source_folder):
    
    print(f"{video_name} is being processed")
    
    video_path = os.path.join(source_folder, video_name)
    dist_path = os.path.join(dist_folder, os.path.splitext(video_name)[0] + "_predict.mp4")
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    fourcc = cv2.VideoWriter_fourcc(*'FMP4')
    out = None
    
    current_frame = 0
    while(cap.isOpened()):
            
        ret, img = cap.read()  
        
        try:
            h, w, _ = img.shape
        except:
            break

        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  
        
        # prepare model input
        model_input = preproc_pipeline(image=img)["image"]
        model_input = torch.from_numpy(model_input.transpose((2, 0, 1))).float().unsqueeze(0)
        output = model(model_input.cuda())
        
        # resize image
        img_result = resize_pipeline(image=img)["image"]
        
        # draw detected boxes
        bboxes = masks_to_bboxes(output, num_classes, max_bbox_per_img=5, threshold=0.4, out_size=out_img_size, is_predict=True)      
        for box in bboxes[0]:
            x1, y1, x2, y2 = 2 * box[:4].astype(int)
            class_id = box[4].astype(int)
            if class_id not in [2]:
                cv2.rectangle(img_result, (x1, y1), (x2, y2), colors[class_id], 2)
        
        # add frame to the final video
        img_result = cv2.cvtColor(img_result, cv2.COLOR_BGR2RGB) 
        if out is None:
            out = cv2.VideoWriter(dist_path, fourcc, int(fps), (img_result.shape[1], img_result.shape[0]), True)
        out.write(np.uint8(img_result))
        
    cap.release()
    out.release()
    
    print(f"{video_name} is finished")