In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


To install dependencies

Note: After installation, please restart the runtime if it is required.

In [None]:
!pip install ultralytics
!pip install av
!pip install filterpy

Collecting ultralytics
  Downloading ultralytics-8.0.161-py3-none-any.whl (609 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m609.5/609.5 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: ultralytics
Successfully installed ultralytics-8.0.161
Collecting av
  Downloading av-10.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (31.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.0/31.0 MB[0m [31m41.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: av
Successfully installed av-10.0.0
Collecting filterpy
  Downloading filterpy-1.4.5.zip (177 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m178.0/178.0 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: filterpy
  Building wheel for filterpy (setup.py) ... [?25l[?25hdone
  Created wheel for filterpy: filename=filterpy-1.4.5-py

The Player & ball tracking with norfair

In [None]:
import sys
root_path = '/content/drive/MyDrive/basketball-broadcaster'
sys.path.append(root_path)

import os
os.chdir(root_path)

import av
import time
from PIL import Image
import numpy as np

from pb_detector import PBDetector
from pb_tracker import PBTracker

input_ = av.open("videos/test1_clip.mp4")
output = av.open("output.mp4", "w")

court_region = [[690, 330], [1900, 400], [1900, 1000], [0, 800]]
detector = PBDetector(court_region)
tracker = PBTracker()

# Make an output video stream using the input video stream as a template
in_video_stream = input_.streams.video[0]
out_video_stream = output.add_stream(codec_name="h264",
                                      width=in_video_stream.width,
                                      height=in_video_stream.height,
                                      rate=int(in_video_stream.average_rate),
                                      bit_rate=in_video_stream.bit_rate,
                                      pix_fmt=in_video_stream.pix_fmt)

# Make an output audio stream using the input audio stream as a template
in_audio_stream = input_.streams.audio[0]
out_audio_stream = output.add_stream(template=in_audio_stream)

frame_idx = 0
det_cycle = 2

st = time.time()
prec_time = st
decoding_time = 0
inference_time = 0
mot_time = 0
encoder_time = 0

for packet in input_.demux():
    # We need to skip the "flushing" packets that `demux` generates.
    if packet.dts is None:
        continue

    if packet.stream == in_video_stream:
        for frame in packet.decode():
            # To get an input frame
            image = frame.to_image() # PIL image
            image = np.array(image)
            current_time = time.time()
            decoding_time += (current_time - prec_time)
            prec_time = current_time

            ##############  To process by AI engine  ############

            # To process the input frame
            player_detections = []
            ball_detections = []
            if frame_idx % det_cycle == 0:
                player_detections, ball_detections = detector.detect(image)
                current_time = time.time()
                inference_time += (current_time - prec_time)
                prec_time = current_time

            player_detections, ball_detections = tracker.run(image, player_detections, ball_detections, period=det_cycle)
            frame_idx += 1

            # To draw the result
            image = Image.fromarray(image)
            image = tracker.draw_detections(image, player_detections, ball_detections)
            current_time = time.time()
            mot_time += (current_time - prec_time)
            prec_time = current_time

            ####################################################

            output_frame = av.VideoFrame.from_image(image)
            output_frame.pts = None
            for output_packet in out_video_stream.encode(output_frame):
                # Write the packets to the container
                output_packet.stream = out_video_stream
                output.mux(output_packet)
    elif packet.stream == in_audio_stream:
        packet.stream = out_audio_stream
        output.mux(packet)
        current_time = time.time()
        encoder_time += (current_time - prec_time)
        prec_time = current_time

elapsed = time.time() - st

input_.close()
output.close()


0: 384x640 12 persons, 127.6ms
Speed: 21.3ms preprocess, 127.6ms inference, 41.2ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 13 persons, 7.5ms
Speed: 3.8ms preprocess, 7.5ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 12 persons, 7.4ms
Speed: 3.6ms preprocess, 7.4ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 10 persons, 7.4ms
Speed: 3.6ms preprocess, 7.4ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)


OSError: ignored

In [None]:
# To print the FPS
print('Processing FPS: ', int(in_video_stream.frames / elapsed))
print('Decoding time: ', int(decoding_time))
print('Inference time: ', int(inference_time))
print('MOT time: ', int(mot_time))
print('Encoder time: ', int(encoder_time))