In [1]:
import os
import sys
sys.path.append('/home/ubuntu/project')
os.environ["CUDA_VISIBLE_DEVICES"]="0"
import warnings
warnings.simplefilter("ignore", UserWarning)
import cv2, mmcv
import torch
import numpy as np
import time
import math

from PIL import Image, ImageDraw
from utils.get_utube import get_youtube
from utils.display import video_display
from detectors import DSFD

In [9]:

res = ['480p']

videos = get_youtube("https://www.youtube.com/watch?v=eoligIhaKkw", res, use_cache=False)


In [3]:
def get_videos(paths):
    videos = []
    for path in paths:
        video = mmcv.VideoReader(path)
        frames = [cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) for frame in video]

        videos.append(frames)
    return videos

In [4]:
paths = ['../tmp/MOT16-01-raw.webm', '../tmp/MOT16-09-raw.webm']
res = ['MOT16-01', 'MOT16-09']

videos = get_videos(paths)

In [3]:
def xyxy_to_xywh(boxes, width, height):
    xywh_boxes = []
    
    for box in boxes:
        x1 = int(box[0] * width)
        y1 = int(box[1] * height)
        x2 = int(box[2] * width)
        y2 = int(box[3] * height)
                
        w = abs(x2 - x1)
        h = abs(y2 - y1)
        x = x1 + w/2
        y = y1 + h/2
        
        xywh_boxes.append([x, y, w, h])
    
    return np.array(xywh_boxes)

In [None]:
from yolo.models import Yolov4
from yolo.tool.utils import plot_boxes_cv2
from yolo.tool.torch_utils import do_detect
from deepsort.deep_sort import DeepSort

temp_folder = '../tmp/'
weight_file = '../yolo/weight/yolov4.pth'
sort_weight = '../deepsort/deep/checkpoint/ckpt.t7'
class_names = ['person']

model = Yolov4(inference=True)
pretrained_dict = torch.load(weight_file, map_location=torch.device('cuda'))
model.load_state_dict(pretrained_dict)

model.cuda()

deepsort = DeepSort(model_path=sort_weight, nms_max_overlap=0.5, use_cuda=True)

width = 416
height = 416

colors = np.array([[1, 0, 1], [0, 0, 1], [0, 1, 1], [0, 1, 0], [1, 1, 0], [1, 0, 0]], dtype=np.float32)

def get_color(c, x, max_val):
        ratio = float(x) / max_val * 5
        i = int(math.floor(ratio))
        j = int(math.ceil(ratio))
        ratio = ratio - i
        r = (1 - ratio) * colors[i][c] + ratio * colors[j][c]
        return int(r * 255)

for j, r in enumerate(res):
    frames = videos[j]
    
    frames_tracked = []
    
    start = time.monotonic()
    
    for i, frame in enumerate(frames):
        print('Tracking frame: {} res : {}'.format(i + 1, r), end='\r')
        
        H, W, _ = frame.shape
        
        sized = cv2.resize(frame, (width, height))

        # Detect faces
        boxes = np.array(do_detect(model, sized, 0.4, 0.6, True)[0])
        
        outputs = []

        if len(boxes) > 0:
            bbox, cls_conf, cls_ids = boxes[:, 0:4], boxes[:, 5], boxes[:, 6]

            # Select boxes class 0 (person)
            mask = cls_ids == 0
            
            # Do tracking if only person detected
            if np.any(mask):
                bbox = bbox[mask]
                cls_conf = cls_conf[mask]

                bbox = xyxy_to_xywh(bbox, width, height)

                outputs = deepsort.update(bbox, cls_conf, sized)


        # Draw faces
        frame_draw = sized.copy()
        if len(outputs) > 0:
            for box in outputs:
                x1 = int(box[0])
                y1 = int(box[1])
                x2 = int(box[2])
                y2 = int(box[3])
                
                identity = box[-1]
                
                offset = identity * 123457 % 80
                red = get_color(2, offset, 80)
                green = get_color(1, offset, 80)
                blue = get_color(0, offset, 80)
                
                rgb = (red, green, blue)
                
                frame_draw = cv2.putText(frame_draw, f'person_{identity}', (x1 - 10, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX,0.5, rgb, 2)
                frame_draw = cv2.rectangle(frame_draw, (x1, y1), (x2, y2), rgb, 1)

        # Add to frame list
        frames_tracked.append(cv2.resize(frame_draw, (W, H)))
    
    print(f'{r} tracked in {time.monotonic() - start}')

    H, W, _ = frames_tracked[0].shape
    
    dim = (W, H)

    # Use vp9 codec and webm format to show video in notebook
    # TODO : Why opencv doesn't support h264? -> how can we show video in format mp4
    fourcc = cv2.VideoWriter_fourcc(*'VP90')   
    video_tracked = cv2.VideoWriter(temp_folder + f'video_tracked_{r}.webm', fourcc, 20.0, dim)
    for frame in frames_tracked:
        video_tracked.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
    video_tracked.release()
    
    # Use vp9 codec and webm format to show video in notebook
    # TODO : Why opencv doesn't support h264? -> how can we show video in format mp4
    fourcc = cv2.VideoWriter_fourcc(*'XVID')   
    video_tracked = cv2.VideoWriter(temp_folder + f'video_tracked_{r}.mp4', fourcc, 20.0, dim)
    for frame in frames_tracked:
        video_tracked.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
    video_tracked.release()

480p tracked in 70.2793398060021


In [None]:
video_display(temp_folder + 'video_tracked_480p.webm')