# SETUP

In [1]:
!nvcc --version
!nvidia-smi

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Tue_Mar__8_18:18:20_PST_2022
Cuda compilation tools, release 11.6, V11.6.124
Build cuda_11.6.r11.6/compiler.31057947_0
Sun Feb 12 09:46:44 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.65.01    Driver Version: 515.65.01    CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100-SXM...  On   | 00000000:90:00.0 Off |                    0 |
| N/A   37C    P0    61W / 400W |      0MiB / 81920MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+------

In [None]:
!python -m pip install paddlepaddle-gpu==2.4.1.post117 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html
!pip install decord

In [None]:
import sys

In [None]:
%%bash
git clone https://github.com/PaddlePaddle/PaddleDetection.git
cd PaddleDetection
pip install -r requirements.txt
python setup.py install
cd ../
sys.path.append("PaddleDetection")

In [None]:
!pip install decord

In [None]:
!git clone https://github.com/ifzhang/ByteTrack.git
!cd ByteTrack && pip3 install -q -r requirements.txt
!cd ByteTrack && python3 setup.py -q develop
sys.path.append(f"{HOME}/ByteTrack")

In [None]:
!git clone https://github.com/Wiqzard/hack-sordi-tracking.git
sys.path.append(f"{HOME}/tracking-tools")

In [None]:
!python3 -m pip install numpy==1.22.2

### First transform dataset from bmw format to yolov5. Then from yolov5 to COCO.

# Eval

In [None]:
%%bash
cd PaddleDetection

WEIGHTS=/home/5qx9nf8a/team_workspace/PaddleDetection/tracking/model_final.pdparams
#configs=/home/5qx9nf8a/team_workspace/PaddleDetection/configs/mot/bytetrack/detector/ppyoloe_plus_m_bytetrack.yml
CONFIGS=/home/5qx9nf8a/team_workspace/PaddleDetection/configs/ppyoloe/ppyoloe_plus_crn_m_80e_coco.yml 

CUDA_VISIBLE_DEVICES=0 python tools/eval.py -c ${CONFIGS} -o weights=${WEIGHTS} --amp


# Export

In [None]:
%%bash
cd PaddleDetection

CONFIGS=/home/5qx9nf8a/team_workspace/PaddleDetection/configs/ppyoloe/ppyoloe_plus_crn_m_80e_coco.yml 
WEIGHTS=/home/5qx9nf8a/team_workspace/PaddleDetection/tracking/model_final.pdparams

python tools/export_model.py -c ${CONFIGS} -o weights=${WEIGHTS} trt=True

# Test TRT Inference

In [None]:
%%bash
cd PaddleDetection

MODEL_DIR=/home/5qx9nf8a/team_workspace/PaddleDetection/output_inference/ppyoloe_plus_crn_m_80e_coco
VIDEO_PATH=/home/5qx9nf8a/team_workspace/data/Hackathon_Stage2/Evaluation_set/video/eval_video_1.mp4 #--video_file=${VIDEO_PATH}
IMAGE_PATH=/home/5qx9nf8a/team_workspace/data/Hackathon_Stage2/Evaluation_set/dataset/images/124.jpg

CUDA_VISIBLE_DEVICES=0 python deploy/python/infer.py --model_dir=${MODEL_DIR} --image_file=${IMAGE_PATH} --run_mode=trt_fp16 --device=gpu --run_benchmark=True

# Video Processing (Detection + Tracking + Scanning + Annotating)

In [4]:
from decord import VideoReader, cpu, gpu
import numpy as np
from typing import Generator
import itertools
import sys
from video_tools.source import generate_shifted_frames


def get_video_frames_batch_generator_v2(
    video_path: str, batch_size: int = 1, stride: int = 8, reduction_factor: int = 1
) -> Generator:
    """
    Returns a generator that yields the frames of the video in batches.

    :param video_path: str : The path of the video file.
    :param batch_size: int : The size of the batch.
    :return: Generator : Generator that yields the frames of the video in batches.
    """   
    vr = VideoReader(video_path, ctx=cpu(0))
    total_frames = len(vr)
    frames_list = list(range(0, total_frames, reduction_factor))
    saved_count = 0
    last_frame = vr[-1].asnumpy()
    hor_size = last_frame.shape[1]
    extra_frames = int(0.85 * 1280/stride)
    shifted_frames_generator = generate_shifted_frames(last_frame, int(0.85 * hor_size), stride)
    announced = False
    for idx in range(0, len(frames_list) + extra_frames, batch_size):
        if idx >= len(frames_list):
            if not announced:
                print("This is the last frame.")
                announced = True
            frames = np.array(list(itertools.islice(shifted_frames_generator, batch_size)))
            if frames.shape[0] != batch_size:
                continue
            yield frames
        else:
            yield vr.get_batch(frames_list[idx:idx+batch_size]).asnumpy()

In [13]:
from dataclasses import dataclass, field
from __future__ import annotations


@dataclass(frozen=True)
class BYTETrackerArgs:
    track_thresh: float = 0.2 #0.4
    track_buffer: int = 30
    match_thresh: float = 0.7 #0.7
    aspect_ratio_thresh: float = 3.0
    min_box_area: float = 1.0
    mot20: bool = False

@dataclass(frozen=True)
class Args:
    #BYTE_TRACKER_ARGS: BYTETrackerArgs = field(default_factory=lambda:BYTETrackerArgs())
    BYTE_TRACKER_ARGS = BYTETrackerArgs
    STRIDE: int = 8
    REDUCTION_FACTOR: int = 3
    SOURCE_VIDEO_PATH: str = (
        "data/Hackathon_Stage2/Evaluation_set/video/eval_video_1.mp4"
    )
    TARGET_VIDEO_PATH: str = "/temp/eval_video_1.mp4"
    MODEL_DIR: str = "/home/5qx9nf8a/team_workspace/PaddleDetection/output_inference/ppyoloe_plus_crn_m_80e_coco"
    BATCH_SIZE: int = 32
    RUN_MODE: str = "trt_fp16" #"paddle", "trt_int8"
    CPU_THREADS: int = 1
    MAX_DETECTIONS: int = 300
    CLASS_NAMES_DICT: dict = field(
        default_factory=lambda:{
            0: "klt_box_empty",
            1: "klt_box_full",
            2: "rack_1",
            3: "rack_2",
            4: "rack_3",
            5: "rack_4",
            6: "placeholder",
        }
    )

    SCANNER_X: int = 300
    SCANNER_Y: int = 50
    BOX_THRESHOLD: float = 0.35
    RACK_THRESHOLD: float = 0.79

args = Args()

In [6]:
import logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)

INFO:root:test


In [None]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 

from typing import Generator, Tuple, Any, Union
import os
import cv2
import numpy as np
from tqdm import tqdm
from pathlib import Path
import logging
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
from functools import partial
#import pickle
#import concurrent.futures
import multiprocessing as mp
import paddle
paddle.utils.run_check()

from PaddleDetection.deploy.python.infer import Detector
from ByteTrack.yolox.tracker.byte_tracker import BYTETracker

from video_tools.video_info import VideoInfo
from video_tools.sink import VideoSink
from tracking.rack_counter_new import RackScanner, ScannerCounterAnnotator
from tracking.tracking_utils import detections2boxes, match_detections_with_tracks
from tracking.tracking_counter import create_submission_dict, write_submission
from detection.detection_tools import BoxAnnotator, Detections, process_placeholders
from draw.color import ColorPalette, Color
from geometry.geometry import Point


Frame = np.ndarray
Path = Union[str, Path]

class VideoProcessor:
    """
    A class to detect and track objects in a video.
    ...

    Attributes
    ----------
    args : Args
        contains all the necessary information
    source_video_path : str | Path
        path of the video source
    target_video_path : str | Path
        path where the result is written

    Methods
    -------
    info(additional=""):
        Prints the person's name and age.
    """
    
    def __init__(
        self,
        args: Args,
        source_video_path: Union[str, Path],
        target_dir: Union[str, Path]="/temp/"
    ) -> None:
        self.args = args
        
        if not os.path.exists(source_video_path) or not source_video_path.endswith(".mp4"):
            raise ValueError("Invalid source video path")
        self.source_video_path = source_video_path
        
        self.target_dir = os.path.dirname(target_dir)
        #os.makedirs(target_dir, exist_ok=True)
        
        self.target_video_path = os.path.join(target_dir, "processed_eval_video.mp4")
                
        logger.info("<---------- BUILD VIDEOPROCESSOR ---------->")
        self.video_info: VideoInfo = VideoInfo.from_video_path(self.source_video_path)
        self._frame_shape: Tuple[int, int] = self.video_info.shape
        self.video_sink: VideoSink = VideoSink(self.target_video_path, self.args.REDUCTION_FACTOR, self.video_info)
        
        #self.detector: Detector = self._build_detector()
        self.tracker: Tracker = self._build_tracker()
        self.scanner: Scanner = self._build_scanner()
        self.box_annotator: BoxAnnotator = self._build_box_annotator()
        self.scanner_annotator: ScannerAnnotator = self._build_scanner_annotator()
        logger.info("<--------- INITILIAZATION COMPLETE ---------> \n")
    


    def _build_detector(self) -> Detector:
        logger.info("*** BUILD DETECTOR ***")
        return Detector(model_dir=self.args.MODEL_DIR,
                 device='GPU',
                 run_mode=self.args.RUN_MODE,
                 batch_size=self.args.BATCH_SIZE,
                 trt_min_shape=1,
                 trt_max_shape=1280,
                 trt_opt_shape=640,
                 trt_calib_mode=False,
                 cpu_threads=self.args.CPU_THREADS,
                 enable_mkldnn=False,
                 enable_mkldnn_bfloat16=False,
                 output_dir='output_paddle',
                 threshold=0.5,
                 delete_shuffle_pass=False)
    
    def _build_tracker(self) -> BYTETracker:
        logger.info("*** BUILD TRACKER ***")
        return BYTETracker(self.args.BYTE_TRACKER_ARGS())
    
    def _build_scanner(self) -> RackScanner:
        logger.info("*** BUILD SCANNER ***")
        return RackScanner(Point(x=self.args.SCANNER_X, y=self.args.SCANNER_Y), 620)

    def _build_box_annotator(self) -> BoxAnnotator:
        logger.info("*** BUILD BOX ANNOTATOR ***")
        return BoxAnnotator(
                color=ColorPalette(),
                thickness=2,
                text_thickness=1,
                text_scale=0.3,
                text_padding=2,
            )
    
    def _build_scanner_annotator(self) -> ScannerCounterAnnotator:
        logger.info("*** BUILD SCNANER ANNOTATOR ***")
        return ScannerCounterAnnotator(
                thickness=2,
                color=Color.white(),
                text_thickness=2,
                text_color=Color.red(),
                text_scale=0.6,
                text_offset=1.5,
                text_padding=10,
            )
              
    def _build_generator(self) -> Generator:
        return get_video_frames_batch_generator_v2(
            self.source_video_path, batch_size=self.args.BATCH_SIZE, stride=self.args.STRIDE, reduction_factor=self.args.REDUCTION_FACTOR
        )
    def _infer_batch(self, detector, batch: List[Frame]) -> List[Detection]:
        inputs = detector.preprocess(batch)
        detector.predictor.run()
        results = self.postprocess(detector)
        return results
    
    def create_submission(self, mAP: float, fps: float, save: bool=False) -> dict:
        """mAP in percent points"""
        if fps < 25 or mAP < 10:
            raise ValueError("fps or mAP in wrong format")
        submission_dict = create_submission_dict(scanned_racks=self.scanner.rack_tracks, mAP=mAP, fps=fps)
        if save:
            submission_path = os.path.join(self.target_dir, "AcademicWeapons.json")
            write_submission(submission_dict=submission_dict, submission_path=submission_path)
        return submission_dict
    
    def postprocess(self, detector):
        np_boxes_num, np_boxes, np_masks = np.array([0]), None, None
        output_names = detector.predictor.get_output_names()
        boxes_tensor = detector.predictor.get_output_handle(output_names[0])
        np_boxes = boxes_tensor.copy_to_cpu()
        boxes_num = detector.predictor.get_output_handle(output_names[1])
        np_boxes_num = boxes_num.copy_to_cpu()

        result = dict(boxes=np_boxes, masks=np_masks, boxes_num=np_boxes_num)
        np_boxes_num = result['boxes_num']
        if not isinstance(np_boxes_num, np.ndarray):
            raise ValueError("np_boxes_num` should be a `numpy.ndarray`")

        if np_boxes_num.sum() <= 0:
            logger.warning('[WARNNING] No object detected.')
            result = {'boxes': np.zeros([0, 6]), 'boxes_num': np_boxes_num}
        result = {k: v for k, v in result.items() if v is not None}
        return result

    def process_video(self, detector, with_scanner: bool=True, with_placeholders: bool=True, with_annotate_scanner: bool=True) -> None:
        self.with_scanner, self.with_placeholders = with_scanner, with_placeholders
              
        generator = self._build_generator()
        with self.video_sink as sink:
            for idx, batch in tqdm(
                enumerate(generator),
                total=int(self.video_info.total_frames / self.args.BATCH_SIZE/ self.args.REDUCTION_FACTOR),
            ):
                if batch is None:
                    continue
                results = self._infer_batch(detector, batch)
                # Run detector in batches

               # Process each frame in batch
               # with ProcessPoolExecutor(max_workers=self.args.BATCH_SIZE) as executor:
                with ThreadPoolExecutor() as executor:
                    frames_gen = ((i, frame) for i, frame in enumerate(batch))
                    #results_gen = executor.map(partial(self._initial_results_to_detections, results), range(len(batch)))
                    results_gen = ((self._initial_results_to_detections(results, i)) for  i in range(len(batch)))
                    detections_dict: dict[int, Detections] = {key: value for key, value in results_gen}
                    
                    if len(detections_dict) == 0:
                        frames = dict(frames_gen)
                        for i in len(batch):
                            sink.write_frame(frames[i])
                        continue
                    
                    detections_dict = {i : self._get_tracks(detections_dict[i]) for i in range(len(batch))}
                    if with_scanner and not with_annotate_scanner:
                        temp = [self._update_scanner(detections_dict[i]) for i in range(len(batch))]
                    if with_scanner and with_annotate_scanner:
                        frames_gen = (self._annotate_scanner(batch[i], i) for i  in range(len(batch)) if not self._update_scanner(detections_dict[i]))

                    # if tracks
                    #if not all(val is None for val in detections_dict.values()):
                    #    if with_scanner and with_annotate_scanner: 
                    #        frames_gen: Generator[int, Frame] = executor.map(self._annotate_scanner, batch, range(len(batch)))

                        frames_detections_gen = ((i, frame, detections_dict[i]) for i, frame in frames_gen)
                        if with_placeholders:
                            frames_gen = executor.map(self._annotate_placeholders, frames_detections_gen) 

                        frames_detections_gen = ((i, frame, detections_dict[i]) for i, frame in frames_gen)
                        frames_gen = executor.map(self._annotate_detections, frames_detections_gen)
                    frames_ordered = sorted(list(frames_gen), key=lambda x: x[0])#xist(frames_gen).sort(key=lambda x: x[0])
                    frames_ordered = [x[1] for x in frames_ordered] 

                    for frame in frames_ordered:
                        sink.write_frame(frame)

    def sort_indexed_tuple(self, tup: Tuple[Tuple[int, Any]]):
        tup.sort(key=lambda x: x[0]) 
        return tup 

    def _initial_results_to_detections(self, results, idx: int) -> Tuple[int, Detections]:
        boxes = results['boxes'][idx*self.args.MAX_DETECTIONS : (idx+1) * self.args.MAX_DETECTIONS, :]
        boxes = boxes[boxes[:,1] > self.args.BOX_THRESHOLD]
        detections = Detections(
                xyxy=boxes[:,2:],
                confidence=boxes[:,1],
                class_id=boxes[:,0].astype(int)
            )
        # filter where center of box below threshold
        position_mask = (detections.xyxy[:,1] + (detections.xyxy[:, 3] - detections.xyxy[:, 1])/2) < 600
        area_mask = ((detections.xyxy[:, 2] - detections.xyxy[:, 0]) * (detections.xyxy[:, 3] - detections.xyxy[:, 1]) < 2500)
        mask_conf_klt =  np.isin(detections.class_id, [0, 1])
        mask_klt = np.logical_and(area_mask, mask_conf_klt)
        mask_conf_rack = np.logical_and(detections.confidence > self.args.RACK_THRESHOLD, np.isin(detections.class_id, [2, 3, 4, 5]))
        mask = np.logical_and(np.logical_or(mask_conf_klt, mask_conf_rack), position_mask)
        
        detections.filter(mask=mask, inplace=True)
        return (idx, detections)
    
    def _get_tracks(self, detections: Detections) -> Detections:
        tracks = self.tracker.update(
                output_results=detections2boxes(detections=detections),
                img_info=self.video_info.shape,
                img_size=self.video_info.shape
            )
        if len(detections) == 0 and len(tracks) == 0:
            return detections

        tracker_id = match_detections_with_tracks(detections=detections, tracks=tracks)
        detections.tracker_id = np.array(tracker_id)

        # filtering out detections without trackers
        mask = np.array([tracker_id is not None for tracker_id in detections.tracker_id], dtype=bool)
        detections.filter(mask=mask, inplace=True)
        return detections
    
    def _annotate_placeholders(self, frame_detections) -> Union[int, Frame]:
        
        idx, frame, detections = frame_detections
        placeholders, placeholder_labels= process_placeholders(detections, self.scanner.scanner.x)
        if placeholders and placeholder_labels:
            frame = self.box_annotator.annotate(
                    frame=frame, detections=placeholders, labels=placeholder_labels
                )   
        return idx, frame#, detections
        
              
    def _annotate_detections(self, frame_detections: Tuple[int, Frame, Detections]) -> Tuple[int, Frame]:
        idx, frame, detections = frame_detections
        labels = [
            f"#{tracker_id} {self.args.CLASS_NAMES_DICT[class_id]} {confidence:0.2f}"
            for _, confidence, class_id, tracker_id
            in detections]
        # annotatoe detection boxes
        frame = self.box_annotator.annotate(
            frame=frame, detections=detections, labels=labels
        )
        #print(frame.shape)
        return idx, frame
    
    def _update_scanner(self, detections: Detections) -> None:
        self.scanner.update(detections)
              
    def _annotate_scanner(self, frame: Frame, idx: int) -> Tuple[int, Frame]:
        return idx, self.scanner_annotator.annotate(frame=frame, rack_scanner=self.scanner)

In [10]:
if __name__ == "__main__":  
    SOURCE_VIDEO_PATH = "data/live_demo/Evaluation_set/demo_eval_video/full_eval_demo_video.mp4"#"data/Hackathon_Stage2/Evaluation_set/video/eval_video_1.mp4"
    TARGET_VIDEO_PATH = "/home/5qx9nf8a/team_workspace/temp/"
    detector = Detector(model_dir=args.MODEL_DIR,
                 device='GPU',
                 run_mode=args.RUN_MODE,
                 batch_size=args.BATCH_SIZE,
                 trt_min_shape=1,
                 trt_max_shape=1280,
                 trt_opt_shape=640,
                 trt_calib_mode=False,
                 cpu_threads=args.CPU_THREADS,
                 enable_mkldnn=False,
                 enable_mkldnn_bfloat16=False,
                 output_dir='output_paddle',
                 threshold=0.3,
                 delete_shuffle_pass=False)
    
    video_processor = VideoProcessor(source_video_path = SOURCE_VIDEO_PATH, 
                                    target_dir = TARGET_VIDEO_PATH,
                                    args = args)
    video_processor.process_video(detector, with_scanner=True, with_placeholders=True, with_annotate_scanner=True)

-----------  Model Configuration -----------
Model Arch: YOLO
Transform Order: 
--transform op: Resize
--transform op: NormalizeImage
--transform op: Permute
--------------------------------------------


INFO:root:<---------- BUILD VIDEOPROCESSOR ---------->
INFO:root:*** BUILD TRACKER ***
INFO:root:*** BUILD SCANNER ***
INFO:root:*** BUILD BOX ANNOTATOR ***
INFO:root:*** BUILD SCNANER ANNOTATOR ***
INFO:root:<--------- INITILIAZATION COMPLETE ---------> 



<---------- Processing video ---------->


100it [01:30,  1.35it/s]                       

This is the last frame.


104it [01:32,  1.12it/s]

Elapsed time: 92.49 seconds for 9589 frames
Average FPS: 103.68 frames per second





In [12]:
video_processor.create_submission(mAP=93.3, fps=103.68, save=True)

{'eval_video': [{'rack_name': 'rack_1',
   'rack_conf': 0.983929,
   'N_full_KLT': 14,
   'N_empty_KLT': 11,
   'N_Pholders': 23,
   'shelf_N_Pholders': {'shelf_1': 6,
    'shelf_2': 8,
    'shelf_3': 1,
    'shelf_4': 8}},
  {'rack_name': 'rack_2',
   'rack_conf': 0.961904,
   'N_full_KLT': 8,
   'N_empty_KLT': 7,
   'N_Pholders': 24,
   'shelf_N_Pholders': {'shelf_1': 6,
    'shelf_2': 4,
    'shelf_3': 9,
    'shelf_4': 5}},
  {'rack_name': 'rack_4',
   'rack_conf': 0.984042,
   'N_full_KLT': 6,
   'N_empty_KLT': 7,
   'N_Pholders': 3,
   'shelf_N_Pholders': {'shelf_1': 1, 'shelf_3': 1, 'shelf_4': 1}},
  {'rack_name': 'rack_3',
   'rack_conf': 0.984296,
   'N_full_KLT': 2,
   'N_empty_KLT': 4,
   'N_Pholders': 0,
   'shelf_N_Pholders': {}},
  {'rack_name': 'rack_3',
   'rack_conf': 0.984311,
   'N_full_KLT': 2,
   'N_empty_KLT': 2,
   'N_Pholders': 2,
   'shelf_N_Pholders': {'shelf_2': 1, 'shelf_3': 1}},
  {'rack_name': 'rack_4',
   'rack_conf': 0.985188,
   'N_full_KLT': 5,
   'N_e

{'eval_video': [{'rack_name': 'rack_1',
   'rack_conf': 0.976074,
   'N_full_KLT': 13,
   'N_empty_KLT': 9,
   'N_Pholders': 26,
   'shelf_N_Pholders': {'shelf_1': 8,
    'shelf_2': 8,
    'shelf_3': 2,
    'shelf_4': 8}},
  {'rack_name': 'rack_2',
   'rack_conf': 0.955078,
   'N_full_KLT': 5,
   'N_empty_KLT': 4,
   'N_Pholders': 30,
   'shelf_N_Pholders': {'shelf_1': 7,
    'shelf_2': 7,
    'shelf_3': 9,
    'shelf_4': 7}},
  {'rack_name': 'rack_4',
   'rack_conf': 0.973633,
   'N_full_KLT': 6,
   'N_empty_KLT': 7,
   'N_Pholders': 3,
   'shelf_N_Pholders': {'shelf_1': 1, 'shelf_3': 1, 'shelf_4': 1}},
  {'rack_name': 'rack_3',
   'rack_conf': 0.971191,
   'N_full_KLT': 2,
   'N_empty_KLT': 4,
   'N_Pholders': 0,
   'shelf_N_Pholders': {}},
  {'rack_name': 'rack_3',
   'rack_conf': 0.969238,
   'N_full_KLT': 2,
   'N_empty_KLT': 2,
   'N_Pholders': 2,
   'shelf_N_Pholders': {'shelf_2': 1, 'shelf_3': 1}},
  {'rack_name': 'rack_4',
   'rack_conf': 0.966309,
   'N_full_KLT': 5,
   'N_empty_KLT': 5,
   'N_Pholders': 6,
   'shelf_N_Pholders': {'shelf_1': 1,
    'shelf_2': 1,
    'shelf_3': 2,
    'shelf_4': 1,
    'shelf_5': 1}},
  {'rack_name': 'rack_1',
   'rack_conf': 0.964355,
   'N_full_KLT': 15,
   'N_empty_KLT': 10,
   'N_Pholders': 23,
   'shelf_N_Pholders': {'shelf_1': 5,
    'shelf_2': 6,
    'shelf_3': 6,
    'shelf_4': 6}},
  {'rack_name': 'rack_2',
   'rack_conf': 0.978027,
   'N_full_KLT': 12,
   'N_empty_KLT': 5,
   'N_Pholders': 22,
   'shelf_N_Pholders': {'shelf_1': 6,
    'shelf_2': 3,
    'shelf_3': 9,
    'shelf_4': 4}}],
 'mAP': 96.3,
 'FPS': 0}

In [None]:
%%bash
cd PaddleDetection/PaddleYOLO
pwd

model_name=ppyoloe 
job_name=ppyoloe_plus_crn_m_80e_coco 

config=/home/5qx9nf8a/team_workspace/PaddleDetection/PaddleYOLO/configs/custom/ppyoloe_plus_crn_m_80e_coco.yml #/configs/custom/${job_name}.yml
log_dir=log_dir/${job_name}
weights=output/${job_name}/model_final.pdparams
video_path=/home/5qx9nf8a/team_workspace/data/Hackathon_Stage2/Evaluation_set/video/eval_video_1.mp4
img_folder=/home/5qx9nf8a/team_workspace/data/trans_data/val/images/
# 1.training（single GPU / multi GPU）
# CUDA_VISIBLE_DEVICES=0 python tools/train.py -c ${config} --eval --amp

# 2.eval
#CUDA_VISIBLE_DEVICES=0 python tools/eval.py -c ${config} -o weights=${weights} --classwise

# 3.infer
#CUDA_VISIBLE_DEVICES=0 python tools/infer.py -c ${config} -o weights=${weights} --infer_dir=${img_folder} --draw_threshold=0.5


# 4.export
CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c ${config} -o weights=${weights} # trt=True

# CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c ${config} -o weights=${weights} exclude_post_process=True # trt=True

# CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c ${config} -o weights=${weights} exclude_nms=True # trt=True

# 5.deploy infer
#CUDA_VISIBLE_DEVICES=0 python deploy/python/infer.py --model_dir=output_inference/${job_name} --image_file=demo/000000014439_640x640.jpg --device=GPU

# 6.deploy speed, add '--run_mode=trt_fp16' to test in TensorRT FP16 mode
#CUDA_VISIBLE_DEVICES=0 python deploy/python/infer.py --model_dir=output_inference/${job_name} --image_file=demo/000000014439_640x640.jpg --device=GPU --run_benchmark=True # --run_mode=trt_fp16

# 7.export onnx
#paddle2onnx --model_dir output_inference/${job_name} --model_filename model.pdmodel --params_filename model.pdiparams --opset_version 12 --save_file ${job_name}.onnx

# 8.onnx speed
#/usr/local/TensorRT-8.0.3.4/bin/trtexec --onnx=${job_name}.onnx --workspace=4096 --avgRuns=10 --shapes=input:1x3x640x640 --fp16
#/usr/local/TensorRT-8.0.3.4/bin/trtexec --onnx=${job_name}.onnx --workspace=4096 --avgRuns=10 --shapes=input:1x3x640x640 --fp32



In [None]:
OLD APPROACH

## Export: To TRT and ONNX

In [10]:
#!CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c configs/mot/bytetrack/detector/ppyoloe_crn_l_36e_640x640_mot17half.yml -o weights=https://paddledet.bj.bcebos.com/models/mot/ppyoloe_crn_l_36e_640x640_mot17half.pdparams
%cd PaddleDetection
!CUDA_VISIBLE_DEVICES=0 python tools/export_model.py -c /home/5qx9nf8a/team_workspace/PaddleDetection/configs/mot/bytetrack/detector/ppyoloe_plus_l_bytetrack.yml -o weights=/home/5qx9nf8a/team_workspace/PaddleDetection//tracking/model_final.pdparams
    

[Errno 2] No such file or directory: 'PaddleDetection'
/teams/hack_team_09/workspace/PaddleDetection
NOTE! Installing ujson may make loading annotations faster.
[02/09 05:14:34] ppdet.utils.checkpoint INFO: The shape [72] in pretrained weight backbone.stages.0.attn.fc.bias is unmatched with the shape [96] in model backbone.stages.0.attn.fc.bias. And the weight backbone.stages.0.attn.fc.bias will not be loaded
[02/09 05:14:34] ppdet.utils.checkpoint INFO: The shape [72, 72, 1, 1] in pretrained weight backbone.stages.0.attn.fc.weight is unmatched with the shape [96, 96, 1, 1] in model backbone.stages.0.attn.fc.weight. And the weight backbone.stages.0.attn.fc.weight will not be loaded
[02/09 05:14:34] ppdet.utils.checkpoint INFO: The shape [36] in pretrained weight backbone.stages.0.blocks.0.conv1.bn._mean is unmatched with the shape [48] in model backbone.stages.0.blocks.0.conv1.bn._mean. And the weight backbone.stages.0.blocks.0.conv1.bn._mean will not be loaded
[02/09 05:14:34] ppdet.u

In [21]:
model.info()

YOLOv8m summary: 218 layers, 25843234 parameters, 0 gradients, 78.7 GFLOPs


In [36]:
model.val()

Ultralytics YOLOv8.0.11 🚀 Python-3.8.13 torch-1.13.1+cu117 CUDA:0 (NVIDIA A100-SXM4-80GB, 81112MiB)
[34m[1mval: [0mScanning /teams/hack_team_09/workspace/data/trans_data/val/labels.cache... 1000 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1000/1000 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:35<00:00,  1.78it/s]
                   all       1000      39332      0.944      0.896      0.963      0.931
         klt_box_empty       1000      16453      0.999      0.782      0.926      0.883
          klt_box_full       1000      19518      0.995      0.762      0.942      0.882
                rack_1       1000        715      0.678      0.933      0.947      0.902
                rack_2       1000        581          1      0.977      0.987      0.963
                rack_3       1000       1063          1      0.971      0.991      0.987
                rack_4       1000       1002      0.

In [20]:
from tracking.tracking_counter import create_submission_dict, write_submission

#submission_dict = create_submission_dict(scanned_racks=scanner.rack_tracks, mAP=96.3, fps=19.47)99.71
submission_dict = create_submission_dict(scanned_racks=saved_racks, mAP=96.3, fps=0) 

write_submission(submission_dict=submission_dict, submission_path="temp/AcademicWeapons.json")


In [19]:
video = "data/Hackathon_Stage2/Evaluation_set/video/eval_video_1.mp4"
img = "data/Hackathon_Stage2/Evaluation_set/dataset/images/109.jpg"
engine = "runs/detect/train2/weights/best.engine"
data = "data/trans_data/val/images"
import cv2
im = cv2.imread(img)[None]
print(im.shape)
from ultralytics.yolo.v8.detect.predict import DetectionPredictor
predictor = DetectionPredictor() #CustomPredictor()#
predictor.setup_model(model=engine)
predictor.imgsz = (3, 960, 960)
predictor.args.mode = "predict"
predictor.args.imgsz = 960
predictor.args.save = False
predictor.args.cache = True



Ultralytics YOLOv8.0.11 🚀 Python-3.8.13 torch-1.13.1+cu117 CUDA:0 (NVIDIA A100-SXM4-80GB, 81112MiB)
Loading runs/detect/train2/weights/best.engine for TensorRT inference...


(1, 720, 1280, 3)
[02/02/2023-00:12:52] [TRT] [I] The logger passed into createInferRuntime differs from one already provided for an existing builder, runtime, or refitter. Uses of the global logger, returned by nvinfer1::getLogger(), will return the existing value.

[02/02/2023-00:12:52] [TRT] [I] Loaded engine size: 51 MiB
[02/02/2023-00:12:52] [TRT] [I] [MemUsageChange] Init cuDNN: CPU +0, GPU +35, now: CPU 2647, GPU 3498 (MiB)
[02/02/2023-00:12:52] [TRT] [W] TensorRT was linked against cuDNN 8.6.0 but loaded cuDNN 8.5.0
[02/02/2023-00:12:52] [TRT] [I] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +50, now: CPU 3, GPU 1624 (MiB)
[02/02/2023-00:12:52] [TRT] [I] [MemUsageChange] Init cuDNN: CPU +0, GPU +32, now: CPU 2595, GPU 3498 (MiB)
[02/02/2023-00:12:52] [TRT] [W] TensorRT was linked against cuDNN 8.6.0 but loaded cuDNN 8.5.0
[02/02/2023-00:12:52] [TRT] [I] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU

In [69]:
from time import time

with VideoSink(TARGET_VIDEO_PATH, 1, video_info) as sink:
    detect_time, track_up_time, track_matcher, box_anno, scanner_up, scanner_anno, writer = 0,0,0,0,0,0,0
    
    for idx, frame in tqdm(enumerate(generator), total=video_info.total_frames):
        
        start = time()
        results = predictor(source=frame)
        end = time()
        detect_time += end - start
        
        detections = Detections(
            xyxy=results[0].boxes.xyxy.cpu().numpy(),
            confidence=results[0].boxes.conf.cpu().numpy(),
            class_id=results[0].boxes.cls.cpu().numpy().astype(int)
        )
        if detections.xyxy.shape[0] != 0:
            # tracking detections
            start = time()
            tracks = byte_tracker.update(
                output_results=detections2boxes(detections=detections),
                img_info=frame.shape,
                img_size=frame.shape
            )
            end = time()
            track_up_time += end - start
            
            start = time()
            tracker_id = match_detections_with_tracks(detections=detections, tracks=tracks)
            end = time()
            track_matcher += end - start
            
            detections.tracker_id = np.array(tracker_id)

            # filtering out detections without trackers
            mask = np.array([tracker_id is not None for tracker_id in detections.tracker_id], dtype=bool)
            detections.filter(mask=mask, inplace=True)
            detection_list.append(detections)
            
            # format custom labels
            labels = [
                f"#{tracker_id} {CLASS_NAMES_DICT[class_id]} {confidence:0.2f}"
                for _, confidence, class_id, tracker_id
                in detections
            ]
            
            # annotatoe detection boxes
            start = time()
            #frame = box_annotator.annotate(
            #    frame=frame, detections=detections, labels=labels
            #)
            end = time()
            box_anno += end - start
            
            # update the scanner
            start = time()
            scanner.update(detections=detections)
            end = time()
            scanner_up += end - start
            # draw the scanner
            start = time()
            #scanner_annotator.annotate(frame=frame, rack_scanner=scanner)
            end = time()
            scanner_anno += end - start
            # add the annotated frame to video
        start = time()
        sink.write_frame(frame)
        end = time()
        writer += end - start
print(round(detect_time, 3)*  1000)
print(round(track_up_time, 3)* 1000)
print(round(track_matcher, 3)* 1000)
print(round(box_anno, 3)* 1000)
print(round(scanner_up, 3)* 1000)
print(round(scanner_anno, 3)* 1000)
print(round(writer, 3)* 1000)

3253it [01:57, 29.32it/s]                          

This is the last frame


3361it [02:00, 27.91it/s]

Elapsed time: 120.44 seconds for 3252 frames
Average FPS: 27.00 frames per second
40580.0
26731.0
3037.0
1.0
5884.0
1.0
37239.0



