In [None]:
!git clone https://github.com/Megvii-BaseDetection/YOLOX.git

Cloning into 'YOLOX'...
remote: Enumerating objects: 1723, done.[K
remote: Counting objects: 100% (1/1), done.[K
remote: Total 1723 (delta 0), reused 1 (delta 0), pack-reused 1722[K
Receiving objects: 100% (1723/1723), 6.83 MiB | 22.14 MiB/s, done.
Resolving deltas: 100% (1020/1020), done.


In [None]:
!wget https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_m.pth

--2022-08-31 12:28:02--  https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_m.pth
Resolving github.com (github.com)... 20.205.243.166
Connecting to github.com (github.com)|20.205.243.166|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/386811486/a0b0f1ca-0e3c-43e4-829d-d9177f6be5f7?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20220831%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20220831T122802Z&X-Amz-Expires=300&X-Amz-Signature=99f11799fa3d506613e4831186fc2bcdedc9e850a1d2425670ab6c863ef31a25&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=386811486&response-content-disposition=attachment%3B%20filename%3Dyolox_m.pth&response-content-type=application%2Foctet-stream [following]
--2022-08-31 12:28:02--  https://objects.githubusercontent.com/github-production-release-asset-2e65be/386811486/a0b0f1ca-0e3c-43e4-829d-d9177f6be5f7?

In [None]:
pip install YOLOX/

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Processing ./YOLOX
[33m  DEPRECATION: A future pip version will change local packages to be built in-place without first copying to a temporary directory. We recommend you use --use-feature=in-tree-build to test your packages with this new behavior before it becomes the default.
   pip 21.3 will remove support for this functionality. You can find discussion regarding this at https://github.com/pypa/pip/issues/7555.[0m
Collecting loguru
  Downloading loguru-0.6.0-py3-none-any.whl (58 kB)
[K     |████████████████████████████████| 58 kB 6.9 MB/s 
Collecting thop
  Downloading thop-0.1.1.post2207130030-py3-none-any.whl (15 kB)
Collecting ninja
  Downloading ninja-1.10.2.3-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl (108 kB)
[K     |████████████████████████████████| 108 kB 65.0 MB/s 
Collecting onnx==1.8.1
  Downloading onnx-1.8.1-cp37-cp37m-manylinux2010_x86_64.whl (14.5 MB)


In [None]:
import argparse
import os
import time
from unittest import result
from loguru import logger
import cv2
from google.colab.patches import cv2_imshow as im
from threading import Thread

import torch

from yolox.data.data_augment import ValTransform
from yolox.data.datasets import COCO_CLASSES
from yolox.exp import get_exp
from yolox.utils import fuse_model, get_model_info, postprocess, vis

In [None]:
IMAGE_EXT = [".jpg", ".jpeg", ".webp", ".bmp", ".png"]

class Yolox(object):
    def __init__(
        self,
        exp_name,
        ckpt,
        conf=0.25,
        nms=0.45,
        img_size=None,
        cls_names=COCO_CLASSES,
        decoder=None,
        device="cpu",
        fp16=False,
        legacy=False,
    ):
        # get model config
        exp = get_exp(None, exp_name)
        if conf is not None:
            exp.test_conf = conf
        if nms is not None:
            exp.nmsthre = nms
        if img_size is not None:
            exp.test_size = (img_size, img_size)

        # build yolox model
        model = exp.get_model()
        logger.info("Model Summary: {}".format(get_model_info(model, exp.test_size)))
        if device == "gpu":
            model.cuda()
            if fp16:
                model.half()  # to FP16
        model.eval()

        # load model weights/checkpoint
        logger.info("loading checkpoint")
        ckpt = torch.load(ckpt, map_location="cpu")
        model.load_state_dict(ckpt["model"])
        logger.info("loaded checkpoint done.")

        self.model = model
        self.cls_names = cls_names
        self.decoder = decoder
        self.num_classes = exp.num_classes
        self.confthre = exp.test_conf
        self.nmsthre = exp.nmsthre
        self.test_size = exp.test_size
        self.device = device
        self.fp16 = fp16
        self.preproc = ValTransform(legacy=legacy)
    
    def predict(self, img):
        img_info = {"id": 0}
        if isinstance(img, str):
            img_info["file_name"] = os.path.basename(img)
            img = cv2.imread(img)
        else:
            img_info["file_name"] = None

        height, width = img.shape[:2]
        img_info["height"] = height
        img_info["width"] = width
        img_info["raw_img"] = img

        # to convert pixel coordinat from absolute value to relative
        ratio = min(self.test_size[0] / img.shape[0], self.test_size[1] / img.shape[1])
        img_info["ratio"] = ratio

        # preprocess img according to transform used when training
        img, _ = self.preproc(img, None, self.test_size)
        img = torch.from_numpy(img).unsqueeze(0)
        img = img.float()
        if self.device == "gpu":
            img = img.cuda()
            if self.fp16:
                img = img.half()  # to FP16

        # predict
        with torch.no_grad():
            outputs = self.model(img)
            if self.decoder is not None:
                outputs = self.decoder(outputs, dtype=outputs.type())
            outputs = postprocess(
                outputs, self.num_classes, self.confthre,
                self.nmsthre, class_agnostic=True
            )

        # return result
        results = []
        if outputs[0] is None:
            return results

        else:
            output = outputs[0].cpu()
            bboxes = (output[:, 0:4]/ratio).type(torch.int).tolist()
            cls = output[:, 6].type(torch.int).tolist()
            scores = (output[:, 4] * output[:, 5]).tolist()
            results = [bboxes, cls, scores, img_info]

        return results


In [None]:
def draw_bbox(img, bbox, classname):
    w, h = 10 + (7*(len(classname))), 17
    x0_name = bbox[0]
    y0_name = bbox[1]
    x1_name = x0_name + w
    y1_name = y0_name + h

    cv2.rectangle(img, (x0_name, y0_name), (x1_name, y1_name), (225, 225, 0), -1)
    cv2.putText(
                          img,
                          classname,
                          (x0_name+5,
                          y0_name+12),
                          cv2.FONT_HERSHEY_SIMPLEX,0.4,
                          (255,255,255),
                          1
                      )
    cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (225, 225, 0), 2)

    return img


In [None]:
# def process_video(cap, duration):
#     ret_val, img = cap.read()
#     row, col, ch = img.shape
#     results = model.predict(img)
#     if len(results) != 0:
#         bboxes, cls_ids, scores, img_info = results

#     cls_names = [COCO_CLASSES[idx] for idx in cls_ids]
#     visualize_list = ["car", "truck", "bus", "motorcycle"]

#     count = {cls_name:cls_names.count(cls_name) for cls_name in visualize_list}
#     count["total"] = sum(list(count.values()))
#     # print(count["total"])

#     duration[0] = count["total"]
#     # duration[0] = 0
#     # if count["total"] <= 15:
#     #     duration[0] = 15
#     # elif count["total"] <= 30:
#     #     duration[0] = 30
#     # else:
#     #     duration[0] = 45

In [None]:
def process_video(video_path, model):
  save_result = True
  filename, ext = os.path.splitext(video_path)
  save_path = filename + "_result" + ext

  # instantiate videocapture
  cap = cv2.VideoCapture(video_path)
  width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)  # float
  height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)  # float
  fps = cap.get(cv2.CAP_PROP_FPS)
  # instantiate videowriter
  if save_result:
      logger.info(f"video save_path is {save_path}")
      vid_writer = cv2.VideoWriter(
          save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (int(width), int(height))
      )
  # loop through all frames
  while True:
    # read frame per frame
    ret_val, img = cap.read()
    # if frame is not none
    if ret_val:
        row, col, ch = img.shape
        # predict object
        results = model.predict(img)
        if len(results) != 0:
            bboxes, cls_ids, scores, img_info = results

        # convert obj_class_ids to obj_class_names
        cls_names = [COCO_CLASSES[idx] for idx in cls_ids]
        
        # filter out unused class from visualization
        visualize_list = ["car", "truck", "bus", "motorcycle"]
        for bbox, cls_name, score in zip(bboxes, cls_names, scores):
            if cls_name in visualize_list:
                img = draw_bbox(img, bbox, cls_name)

        # counting
        count = {cls_name:cls_names.count(cls_name) for cls_name in visualize_list}
        count["total"] = sum(list(count.values()))
        # print(count)

        # duration logic
        duration = 0
        if count["total"] <= 15:
            duration = 15
        elif count["total"] <= 30:
            duration = 30
        else:
            duration = 45

        # put text in frame
        cv2.putText(
            img,
            f"total kendaraan: {count['total']}",
            (10,row-10),
            cv2.FONT_HERSHEY_SIMPLEX,0.5,
            (0,255,255),
            1
        )
        cv2.putText(
            img,
            f"durasi: {duration} detik",
            (10,row-25),
            cv2.FONT_HERSHEY_SIMPLEX,0.5,
            (0,255,255),
            1
        )

        # save video result to save_path
        if save_result:
            vid_writer.write(img)
        # else visualize directly
        else:
            cv2.namedWindow("yolox", cv2.WINDOW_NORMAL)
            cv2.imshow("yolox", img)
        ch = cv2.waitKey(1)
        if ch == 27 or ch == ord("q") or ch == ord("Q"):
            break
    else:
        break

In [None]:
if __name__ == "__main__":
    # instantiate obj det model
    model = Yolox(
        exp_name="yolox-m",
        ckpt="yolox_m.pth",
        conf=0.2,
        nms=0.45,
        img_size=960,
        device="gpu",
    )

    # prepare input and output path
    video_path1 = "sample_data/Capture_01.wmv"
    video_path2 = "sample_data/Capture_02.wmv"
    video_path3 = "sample_data/Capture_03.wmv"
    video_path4 = "sample_data/Capture_04.wmv"
    # process_video(video_path, model)

    thread0 = Thread(target=process_video, args=(video_path1, model))
    thread1 = Thread(target=process_video, args=(video_path2, model))
    thread2 = Thread(target=process_video, args=(video_path3, model))    
    thread3 = Thread(target=process_video, args=(video_path4, model))
    
    thread0.start()
    thread1.start()
    thread2.start()
    thread3.start()
    
    thread0.join()
    thread1.join()
    thread2.join()
    thread3.join()

    # save_result = True
    # filename, ext = os.path.splitext(video_path)
    # save_path = filename + "_result" + ext

    # # instantiate videocapture
    # cap = cv2.VideoCapture(video_path)
    # width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)  # float
    # height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)  # float
    # fps = cap.get(cv2.CAP_PROP_FPS)
    # # instantiate videowriter
    # if save_result:
    #     logger.info(f"video save_path is {save_path}")
    #     vid_writer = cv2.VideoWriter(
    #         save_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (int(width), int(height))
    #     )
    # # loop through all frames
    # while True:
    #     # read frame per frame
    #     ret_val, img = cap.read()
    #     # if frame is not none
    #     if ret_val:
    #         row, col, ch = img.shape
    #         # predict object
    #         results = model.predict(img)
    #         if len(results) != 0:
    #             bboxes, cls_ids, scores, img_info = results

    #         # convert obj_class_ids to obj_class_names
    #         cls_names = [COCO_CLASSES[idx] for idx in cls_ids]
            
    #         # filter out unused class from visualization
    #         visualize_list = ["car", "truck", "bus", "motorcycle"]
    #         for bbox, cls_name, score in zip(bboxes, cls_names, scores):
    #             if cls_name in visualize_list:
    #                 img = draw_bbox(img, bbox, cls_name)

    #         # counting
    #         count = {cls_name:cls_names.count(cls_name) for cls_name in visualize_list}
    #         count["total"] = sum(list(count.values()))
    #         # print(count)

    #         # duration logic
    #         duration = 0
    #         if count["total"] <= 15:
    #             duration = 15
    #         elif count["total"] <= 30:
    #             duration = 30
    #         else:
    #             duration = 45

    #         # put text in frame
    #         cv2.putText(
    #             img,
    #             f"total kendaraan: {count['total']}",
    #             (10,row-10),
    #             cv2.FONT_HERSHEY_SIMPLEX,0.5,
    #             (0,255,255),
    #             1
    #         )
    #         cv2.putText(
    #             img,
    #             f"durasi: {duration} detik",
    #             (10,row-25),
    #             cv2.FONT_HERSHEY_SIMPLEX,0.5,
    #             (0,255,255),
    #             1
    #         )

    #         # save video result to save_path
    #         if save_result:
    #             vid_writer.write(img)
    #         # else visualize directly
    #         else:
    #             cv2.namedWindow("yolox", cv2.WINDOW_NORMAL)
    #             cv2.imshow("yolox", img)
    #         ch = cv2.waitKey(1)
    #         if ch == 27 or ch == ord("q") or ch == ord("Q"):
    #             break
    #     else:
    #         break

2022-08-31 12:32:34.895 | INFO     | __main__:__init__:28 - Model Summary: Params: 25.33M, Gflops: 166.46
2022-08-31 12:32:38.915 | INFO     | __main__:__init__:36 - loading checkpoint
2022-08-31 12:32:39.118 | INFO     | __main__:__init__:39 - loaded checkpoint done.
2022-08-31 12:32:39.217 | INFO     | __main__:process_video:13 - video save_path is sample_data/Capture_02_result.wmv
2022-08-31 12:32:39.220 | INFO     | __main__:process_video:13 - video save_path is sample_data/Capture_04_result.wmv
2022-08-31 12:32:39.222 | INFO     | __main__:process_video:13 - video save_path is sample_data/Capture_01_result.wmv
2022-08-31 12:32:39.223 | INFO     | __main__:process_video:13 - video save_path is sample_data/Capture_03_result.wmv


In [None]:
# buffer0 = [0]
# buffer1 = [0]
# prev_buffer0 = 0
# prev_buffer1 = 0

# model = Yolox(
#     exp_name="yolox-m",
#     ckpt="yolox_m.pth",
#     conf=0.2,
#     nms=0.45,
#     img_size=960,
#     device="gpu",
# )

# video_path = "sample_data/Capture_01.wmv"
# cap0 = cv2.VideoCapture(video_path)
# cap1 = cv2.VideoCapture(video_path)
# cap2 = cv2.VideoCapture(video_path)
# cap3 = cv2.VideoCapture(video_path)

# cap = cv2.VideoCapture('sample_data/Capture_01.wmv')
# while(cap.isOpened()):
#   ret, frame = cap.read()
#   # gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
#   process_video(cap0, buffer0)
#   if cv2.waitKey(1) & 0xFF == ord('q'):
#     break

# cap.release()
# cv2.destroyAllWindows()


# # for i in range(150):
# #     thread0 = Thread(target=process_video, args=(cap0, buffer0))
# #     thread1 = Thread(target=process_video, args=(cap1, buffer1))
# #     thread2 = Thread(target=process_video, args=(cap1, buffer1))
# #     thread3 = Thread(target=process_video, args=(cap1, buffer1))
# #     thread0.start()
# #     thread1.start()
# #     thread2.start()
# #     thread3.start()
# #     thread0.join()
# #     thread1.join()
# #     thread2.join()
# #     thread3.join()

# #     print(buffer0, buffer1)

2022-08-31 07:19:47.662 | INFO     | __main__:__init__:28 - Model Summary: Params: 25.33M, Gflops: 166.46
2022-08-31 07:19:47.717 | INFO     | __main__:__init__:36 - loading checkpoint
2022-08-31 07:19:47.927 | INFO     | __main__:__init__:39 - loaded checkpoint done.
