<a href="https://colab.research.google.com/github/brianegge/yolov3/blob/master/yolov3_video.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Prepare YoloV3

In [0]:
%cd /content/
!mkdir -p input_video
!mkdir -p output_compressed
!pip install filterpy numba scikit-image lap
!git clone https://github.com/brianegge/yolov3
%cd yolov3
!git submodule add https://github.com/brianegge/sort

In [0]:
%cd /content/yolov3
!git pull
import time
import glob
import torch
import os

import argparse
from sys import platform

from models import *
from utils.datasets import *
from utils.utils import *
from sort.sort import *

from IPython.display import HTML
from base64 import b64encode
from pprint import pprint

parser = argparse.ArgumentParser()
parser.add_argument('--cfg', type=str, default='cfg/yolov3-spp.cfg', help='*.cfg path')
parser.add_argument('--names', type=str, default='data/coco.names', help='*.names path')
parser.add_argument('--weights', type=str, default='weights/yolov3-spp-ultralytics.pt', help='weights path')

parser.add_argument('--img-size', type=int, default=160, help='inference size (pixels)')
parser.add_argument('--conf-thres', type=float, default=0.3, help='object confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.6, help='IOU threshold for NMS')


parser.add_argument('--device', default='', help='device id (i.e. 0 or 0,1) or cpu')


parser.add_argument('--classes', nargs='+', type=int, help='filter by class')
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
opt = parser.parse_args(args = [])

torch.no_grad()

weights = opt.weights
img_size =  opt.img_size

# Initialize
device = torch_utils.select_device(device='cpu' if ONNX_EXPORT else opt.device)

# Initialize model
model = Darknet(opt.cfg, img_size)

# Load weights
attempt_download(weights)
if weights.endswith('.pt'):  # pytorch format
    model.load_state_dict(torch.load(weights, map_location=device)['model'])
else:  # darknet format
    load_darknet_weights(model, weights)

model.to(device).eval();

# Get names and colors
names = load_classes(opt.names)
colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(names))]


In [0]:
def predict_one_video(path_video, output_dir = 'output'): 

    mot_tracker = Sort(max_age=3, min_hits=2)
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    cap  = cv2.VideoCapture(path_video)
    _, img0 = cap.read()
    # skip 40 frames
    for x in range(40):
      _, img0 = cap.read()

    save_path = os.path.join(output_dir, os.path.split(path_video)[-1]) 
    if os.path.exists(save_path):
      os.remove(save_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    w_out = 1080
    h_out = 540
    
    vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*'MP4V'), fps, (w_out, h_out))
    speeders={}
    img_count = 0
    while img0 is not None: 
        img_count += 1
        # crop
        img_cropped = img0[180:180 + h_out, 1920:1920 + w_out]
        # Padded resize
        img = letterbox(img_cropped, new_shape=opt.img_size)[0]

        # Convert
        img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3xHxW
        img = np.ascontiguousarray(img)

        img = torch.from_numpy(img).to(device)
        img = img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        if img.ndimension() == 3:
            img = img.unsqueeze(0)

        pred = model(img)[0]
        # Apply NMS
        pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
        # Process detections
        for i, det in enumerate(pred):  # detections per image
            ##### Ganti im0s menjadi img0

            # update SORT
            if det is not None:
              # print('Frame %i, index %i, detections %i' % (img_count, i, len(det)) )
              track_bbs_ids = torch.from_numpy(mot_tracker.update(det.cpu().detach().numpy()))
              # print('Frame %i, found trackers %i of %i' % (img_count, len(track_bbs_ids), len(mot_tracker.trackers)) )

              if track_bbs_ids is not None and len(track_bbs_ids):
                  # Rescale boxes from img_size to im0 size
                  track_bbs_ids[:, :4] = scale_coords(img.shape[2:], track_bbs_ids[:, :4], img_cropped.shape).round()

                  # Write results
                  count = 0
                  for *xyxy, track_id, cls in track_bbs_ids:
                      name = names[int(cls)]
                      color = colors[count]
                      trk = next(i for i in mot_tracker.trackers if i.id == track_id)
                      if not name in ['car','bicycle','motorcycle','truck','bus','person']:
                        continue
                      real_speed = img_cropped.shape[0] / img.shape[2:][0] * trk.speed / 1.532 # 25 mph
                      
                      print("Frame %i, %s %i, speed %.2fmph, age %i" % 
                            (img_count, name, track_id, real_speed, trk.age)
                      )
                      label = '%s %i %.1f mph' % (name, track_id, real_speed)
                      plot_one_box(xyxy, img_cropped, label=label, color=color)
                      key = '%s-%i' % (name,track_id)
                      if not key in speeders:
                        speeders[key] = [trk,trk.speed,img_cropped]
                      elif speeders[key][1] < trk.speed:
                        speeders[key] = [trk,trk.speed,img_cropped]
            else:
              # print('Frame %i, index %i, detections None' % (img_count, i) )
              mot_tracker.update()

        vid_writer.write(img_cropped)
        # cv.imwrite("%s/output-%i.jpg" % (output_dir, count) , img)
        _, img0 = cap.read()

    vid_writer.release()
    for key in speeders:
      trk,speed,img_cropped = speeders[key]
      o = os.path.join(output_dir, "%s.jpg" % key) 
      if not cv2.imwrite(o, img_cropped):
        raise Exception("Could not write image %s" % o)

    return save_path


# Process Video

In [0]:
%cd /content/
input_file = os.listdir("input_video")[0]
path_video = os.path.join("input_video",input_file)
print("Processing %s" % path_video)
for filename in os.listdir('output'):
  f = os.path.join('output', filename)
  if os.path.isfile(f):
    os.unlink(f)
save_path = predict_one_video(path_video)

# compress video
compressed_path = os.path.join("output_compressed", os.path.split(save_path)[-1])
print("Compressing %s to %s" % (save_path,compressed_path))
if os.path.exists(compressed_path):
  os.remove(compressed_path)
os.system(f"ffmpeg -i {save_path} -vcodec libx264 {compressed_path}")

# Show video
mp4 = open(compressed_path,'rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
HTML("""
<video width=400 controls>
      <source src="%s" type="video/mp4">
</video>
""" % data_url)

To Process your own video, upload your video inside `input_video` folder 

# Git clone to get short videos

In [0]:
!git clone https://github.com/vindruid/yolov3-in-colab.git
!cp -r "yolov3-in-colab"/input_video/* ./input_video/