# Human detection - YOLOv5* Models
**Warning - Notebook runned on colab.research platform**

Also, don't forget to activate GPU

# Prerequies - download & install

## 1. **human-detection** package

In [None]:
!git clone https://github.com/artzet-s/human-detection
%cd human-detection
%run setup.py install
# WARNING - You must restart environement after
%cd ..

Cloning into 'human-detection'...
remote: Enumerating objects: 92, done.[K
remote: Counting objects: 100% (92/92), done.[K
remote: Compressing objects: 100% (64/64), done.[K
remote: Total 92 (delta 44), reused 76 (delta 28), pack-reused 0[K
Unpacking objects: 100% (92/92), done.
/content/human-detection
running install
running bdist_egg
running egg_info
creating human_detection.egg-info
writing human_detection.egg-info/PKG-INFO
writing dependency_links to human_detection.egg-info/dependency_links.txt
writing top-level names to human_detection.egg-info/top_level.txt
writing manifest file 'human_detection.egg-info/SOURCES.txt'
writing manifest file 'human_detection.egg-info/SOURCES.txt'
installing library code to build/bdist.linux-x86_64/egg
running install_lib
running build_py
creating build
creating build/lib
creating build/lib/human_detection
copying human_detection/json_tool.py -> build/lib/human_detection
copying human_detection/video_frames.py -> build/lib/human_detection
copyi

zip_safe flag not set; analyzing archive contents...
human_detection.__pycache__.__init__.cpython-36: module references __path__


creating dist
creating 'dist/human_detection-0.0.1-py3.6.egg' and adding 'build/bdist.linux-x86_64/egg' to it
removing 'build/bdist.linux-x86_64/egg' (and everything under it)
Processing human_detection-0.0.1-py3.6.egg
creating /usr/local/lib/python3.6/dist-packages/human_detection-0.0.1-py3.6.egg
Extracting human_detection-0.0.1-py3.6.egg to /usr/local/lib/python3.6/dist-packages
Adding human-detection 0.0.1 to easy-install.pth file

Installed /usr/local/lib/python3.6/dist-packages/human_detection-0.0.1-py3.6.egg
Processing dependencies for human-detection==0.0.1
Finished processing dependencies for human-detection==0.0.1
/content


# **Restart your environment ! In order to have human_detection package** (CTRL + M)

## 2 Ultralytics yolov5 github

In [None]:
import torch
from IPython.display import clear_output

!git clone https://github.com/ultralytics/yolov5  # clone repo
%cd yolov5
%pip install -qr requirements.txt  # install dependencies

clear_output()
print('Setup complete. Using torch %s %s' % (
    torch.__version__, 
    torch.cuda.get_device_properties(0) 
    if torch.cuda.is_available() else 'CPU'))

Setup complete. Using torch 1.7.0+cu101 _CudaDeviceProperties(name='Tesla T4', major=7, minor=5, total_memory=15079MB, multi_processor_count=40)


# Prepare the data

## 1. Download youtube video from url with youtube-dl

In [None]:
video_url = "https://youtu.be/h4s0llOpKrU"
video_filename = "/content/input_video.mp4"

!pip install --upgrade youtube-dl
!youtube-dl -F $video_url
!youtube-dl -f 137 $video_url  -o $video_filename 


Collecting youtube-dl
[?25l  Downloading https://files.pythonhosted.org/packages/46/9c/69f5ede4f4b3e01390a9e9b355cb3bbe4e7550439bd0c33daa0faf87c1ba/youtube_dl-2020.12.14-py2.py3-none-any.whl (1.8MB)
[K     |████████████████████████████████| 1.9MB 16.3MB/s 
[?25hInstalling collected packages: youtube-dl
Successfully installed youtube-dl-2020.12.14
[youtube] h4s0llOpKrU: Downloading webpage
[info] Available formats for h4s0llOpKrU:
format code  extension  resolution note
249          webm       audio only tiny   53k , opus @ 50k (48000Hz), 282.96KiB
250          webm       audio only tiny   71k , opus @ 70k (48000Hz), 374.94KiB
140          m4a        audio only tiny  130k , m4a_dash container, mp4a.40.2@128k (44100Hz), 713.10KiB
251          webm       audio only tiny  140k , opus @160k (48000Hz), 738.09KiB
394          mp4        256x144    144p   71k , av01.0.00M.08, 25fps, video only, 363.07KiB
278          webm       256x144    144p   92k , webm container, vp9, 25fps, video only,

## 2. Extract video frames

In [None]:
import human_detection
from human_detection.video_frames import video_to_frames, \
                                         write_txt_list_of_filename

frames_dir = "/content/input_frames"
frames_txt = "/content/input_frames.txt"

video_to_frames(video_filename, frames_dir, False)
write_txt_list_of_filename(frames_dir, frames_txt)

# Make prediction with several YOLOv5 model


In [None]:
import os
import os.path
from human_detection.video_frames import frames_to_video
from human_detection.draw_box import draw_person_box_on_frames
from human_detection.json_tool import labels_dir_to_json

models = ["yolov5l", "yolov5m", "yolov5s", "yolov5x"]
project_dir = "/content/output"

for model in models:

  weights = "{}.pt".format(model)

  project_name = model
  labels_dir = os.path.join(project_dir, project_name, "labels")
  json_result = "/content/{}_result.json".format(model)
  pred_frames_dir = "/content/{}_frames".format(model)
  pred_video = "/content/{}_video.mp4".format(model)

  !python detect.py \
    --weights $weights \
    --img 640 \
    --conf 0.25 \
    --project $project_dir \
    --name $project_name \
    --save-txt \
    --save-conf \
    --classes 0 \
    --source $frames_dir

  clear_output()

  print("Convert labels_dir result to json file")
  labels_dir_to_json(frames_dir, 
                    labels_dir, 
                    json_result)

  print("Draw person box on frames")
  draw_person_box_on_frames(json_result, 
                            pred_frames_dir,
                            confidence=0.25)

  print("Convert frames to video")
  frames_to_video(pred_frames_dir, 
                  video_filename=pred_video, 
                  fps=25)

Convert labels_dir result to json file
Draw person box on frames


KeyboardInterrupt: ignored

# Appendix

## Same things with python code

In [None]:
import argparse
import time
from pathlib import Path

import cv2
import torch
import torch.backends.cudnn as cudnn
from numpy import random

from models.experimental import attempt_load
from utils.datasets import LoadStreams, LoadImages
from utils.general import check_img_size, \
                          non_max_suppression, \
                          apply_classifier, \
                          scale_coords, \
                          xyxy2xywh, \
                          strip_optimizer, \
                          set_logging, \
                          increment_path
import torch
from IPython.display import clear_output
from utils.plots import plot_one_box
from utils.torch_utils import select_device, load_classifier, time_synchronized


def detect(weights='yolov5s.pt', 
           source='data/images',
           img_size=640,
           conf_thres=0.25,
           iou_thres=0.45,
           device='',
           agnostic_nms=False,
           augment=False,
           update=False):

    imgsz = img_size

    # Initialize
    device = select_device(device)
    half = device.type != 'cpu'  # half precision only supported on CUDA

    # Load model
    model = attempt_load(weights, map_location=device)  # load FP32 model
    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
    if half:
        model.half()  # to FP16

    # Set Dataloader
    dataset = LoadImages(source, img_size=imgsz)

    # Get names and colors
    names = model.module.names if hasattr(model, 'module') else model.names
    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

    # Run inference
    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
    
    _ = model(img.half() if half else img) if device.type != 'cpu' else None  # run once

    data = list()
    for path, img, im0s, vid_cap in dataset:
      # normalize image
      img = torch.from_numpy(img).to(device)
      img = img.half() if half else img.float()  # uint8 to fp16/32
      img /= 255.0  # 0 - 255 to 0.0 - 1.0
      if img.ndimension() == 3:
        img = img.unsqueeze(0)

      # Inference
      pred = model(img, augment=augment)[0]

      # Apply NMS - return list
      detections = non_max_suppression(pred, 
                                 conf_thres, 
                                 iou_thres, 
                                 classes=None, 
                                 agnostic=agnostic_nms)[0]

      d = dict()
      d["filename"] = path
      d["objects"] = list()
      data.append(d)

      if len(detections):
          p, s, im0, frame = Path(path), '', im0s, getattr(dataset, 'frame', 0)
          gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
          # Rescale boxes from img_size to im0 size
          detections[:, :4] = scale_coords(img.shape[2:], 
                                           detections[:, :4], 
                                           im0.shape).round()
          # Write results
          for *xyxy, conf, cls in reversed(detections):
            xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh

            d["objects"].append({
                 "name": names[int(cls)],
                 "confidence": float(conf),
                 "relative_coordinates": {
                    "center_x" : xywh[0],
                    "center_y" : xywh[1],
                    "width" : xywh[2],
                    "height" : xywh[3]}})
            
      print()
      data.append(d)

    return data

In [None]:
import json

models = ["yolov5l", "yolov5m", "yolov5s", "yolov5x"]

for model in models:
  weights = "{}.pt".format(model)
  json_result = "/content/{}_result.json".format(model)
  pred_frames_dir = "/content/{}_frames".format(model)
  pred_video = "/content/{}_video.mp4".format(model)

  data = detect(weights=weights, 
                source=frames_dir, 
                img_size=640,
                conf_thres=0.25,
                iou_thres=0.45)

  with open(json_result, 'w') as outfile:
        json.dump(data, outfile, indent=4)
    
  print("Draw person box on frames")
  draw_person_box_on_frames(json_result, 
                            pred_frames_dir,
                            confidence=0.25)

  print("Convert frames to video")
  frames_to_video(pred_frames_dir, 
                  video_filename=pred_video, 
                  fps=25)

Fusing layers... 
image 1/1125 /content/input_frames/0.jpg: 
image 2/1125 /content/input_frames/1.jpg: 
image 3/1125 /content/input_frames/10.jpg: 
image 4/1125 /content/input_frames/100.jpg: 
image 5/1125 /content/input_frames/1000.jpg: 
image 6/1125 /content/input_frames/1001.jpg: 
image 7/1125 /content/input_frames/1002.jpg: 
image 8/1125 /content/input_frames/1003.jpg: 
image 9/1125 /content/input_frames/1004.jpg: 
image 10/1125 /content/input_frames/1005.jpg: 
image 11/1125 /content/input_frames/1006.jpg: 
image 12/1125 /content/input_frames/1007.jpg: 
image 13/1125 /content/input_frames/1008.jpg: 
image 14/1125 /content/input_frames/1009.jpg: 
image 15/1125 /content/input_frames/101.jpg: 
image 16/1125 /content/input_frames/1010.jpg: 
image 17/1125 /content/input_frames/1011.jpg: 
image 18/1125 /content/input_frames/1012.jpg: 
image 19/1125 /content/input_frames/1013.jpg: 
image 20/1125 /content/input_frames/1014.jpg: 
image 21/1125 /content/input_frames/1015.jpg: 
image 22/1125 

KeyboardInterrupt: ignored