In [None]:
!pip install numpy
!pip install opencv-python
!pip install torch~=2.5.0
!pip install torchvision
!pip install onnx
!pip install matplotlib
!pip install PyYAM
!pip install scipy
!pip install tqdm
!pip install tensorboard
!pip install pillow
!pip install thop
!pip install scikit-image
!pip install pycocotools~=2.0.1
!pip install pathlib~=1.0.1
!pip install setuptools

In [None]:
!wget https://www.agentspace.org/download/watch.zip
!unzip watch.zip
!rm watch.zip

In [None]:
# model
!cat ./model_configs/custom/yolov3_tiny.cfg

In [None]:
# hyperparameters
!cat ./configs/custom/yolov3_tiny.yaml

In [None]:
import sys

In [None]:
sys.argv = [ 'train.py', './configs/custom/yolov3_tiny.yaml' ]
from tools.train import main as training
training()

In [None]:
sys.argv = [
    'export.py',
    '--img-size', '416',
    '--cfg', './model_configs/custom/yolov3_tiny.cfg',
    '--weights', './results/train/yolov3-tiny-custom-20240707_120114/last.pth.tar',
    '--export-mode', 'torch',
    '--export-dir', './results/export'
]
from tools.export import main as export
export()


In [None]:
!ls ./results/export/

In [None]:
from google.colab import files
files.download('./results/export/last.pth')

In [None]:
import torch
from yolov3_pytorch.utils import scale_coords, xyxy2xywh, non_max_suppression, plot_one_box
from yolov3_pytorch.data.data_augment import letterbox
import numpy as np
import cv2

device='cuda'

model_path = './results/export/last.pth'
model = torch.load(model_path)
print(f"Loaded {model_path} successfully.")
model.eval()
img_size = 416
conf_thresh = 0.08
iou_thresh = 0.45

with open('./data/custom/custom.names','r') as f:
    names = [ name[:-1].strip() for name in f.readlines() ]

video = cv2.VideoCapture('./test_data/watch.avi')
fps = video.get(cv2.CAP_PROP_FPS)
hasFrame, frame = video.read()

out = cv2.VideoWriter()
out.open('./results/watch-labelled.avi',cv2.VideoWriter_fourcc('M','J','P','G'),fps,(frame.shape[1],frame.shape[0]))

while True:

    # preprocess
    img, _, _ = letterbox(frame,new_shape=img_size)
    #print(f'blob resolution: {img.shape[1]} x {img.shape[0]}')
    img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
    blob = cv2.dnn.blobFromImage(img,1.0/255)
    blob = torch.tensor(blob)
    blob = blob.to(device)

    # Inference
    with torch.no_grad():
        output, _ = model(blob, False)

    # postprocess
    output = non_max_suppression(output, conf_thresh, iou_thresh)
    detect_result  = output[0]
    if detect_result is not None:
        detect_result[:, :4] = scale_coords(blob.shape[2:], detect_result[:, :4], frame.shape).round()
        best = detect_result[:,4].argmax()
        *xyxy, confidence, classes  = detect_result[best]
        print(f'{confidence.item():.2f},{int(xyxy[0].item())},{int(xyxy[1].item())},{int(xyxy[2].item())},{int(xyxy[3].item())}')
        plot_one_box(xyxy, frame, label=names[0], color=(0,255,0))

    out.write(frame)

    hasFrame, frame = video.read()
    if not hasFrame:
        break

out.release()

In [None]:
from google.colab import files
files.download('./results/watch-labelled.avi')