In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import numpy as np
import cv2
import matplotlib.pyplot as plt

In [None]:
# select device
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

In [None]:
# fake wandb (used by YOLOv3 implementation but not needed now)
!echo "def log(data=None, step=None):" > wandb.py
!echo "    pass" >> wandb.py
!echo "def init(config=None, project=None, name=None):" >> wandb.py
!echo "    pass" >> wandb.py
!echo "def finish():" >> wandb.py
!echo "    pass" >> wandb.py
import wandb

In [None]:
# clone YOLOv3 implemementation
!git clone https://github.com/Lornatang/YOLOv3-PyTorch.git

In [None]:
!ls YOLOv3-PyTorch

In [None]:
# install YOLOv3
!ln -sf YOLOv3-PyTorch/yolov3_pytorch yolov3_pytorch
!ln -sf YOLOv3-PyTorch/tools tools
!ln -sf YOLOv3-PyTorch/configs configs
!ln -sf YOLOv3-PyTorch/model_configs model_configs

In [None]:
# patching source codes
!mv tools/export.py tools/export.py.org
!mv yolov3_pytorch/engine/trainer.py yolov3_pytorch/engine/trainer.py.org
!wget http://www.agentspace.org/download/yolov3_pytorch_patch.zip
!unzip -o yolov3_pytorch_patch.zip
!rm yolov3_pytorch_patch.zip

In [None]:
!pip install thop

In [None]:
# take images, e.g. using https://imageonline.io/take-photo/

In [None]:
# annotate images, using https://www.makesense.ai/ add label / select ROI + label / action / export / yolo

In [None]:
# download train and test images with annotations
!wget http://www.agentspace.org/download/watch-annotated.zip
!unzip -o watch-annotated.zip
!rm watch-annotated.zip

In [None]:
!more data/custom/labels/train/000010.txt

In [None]:
# show train images

def load(list_file):
    samples = []
    with open(list_file, "r") as f:
        for line in f:
            img_path = line.strip()
            if not img_path:
                continue
            img = cv2.imread(img_path)
            if img is None:
                continue
            annotation_path = img_path.replace(".jpg", ".txt").replace('images', 'labels')
            boxes = []
            with open(annotation_path, "r") as g:
                for record in g:
                    box = [float(x) for x in record[:-1].split()]
                    boxes.append(box)
            samples.append((img,boxes))
    return samples

def show(samples):
    plt.figure(figsize=(20, 4*(len(samples)+3)//4))
    for i, (image,boxes) in enumerate(samples):
        disp = np.copy(image)
        H, W = disp.shape[:2]
        for label, cx, cy, cw, ch in boxes:
            x, y, w, h = (cx - cw/2)*W, (cy - ch/2)*H, cw*W, ch*H
            cv2.rectangle(disp, (int(x), int(y)), (int(x+w), int(y+h)), (0, 255, 0), 2)
            cv2.putText(disp, str(int(label)), (int(x)+3, int(y)+15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        plt.subplot((len(samples)+3)//4, 4, i+1)
        plt.imshow(cv2.cvtColor(disp,cv2.COLOR_BGR2RGB))
        plt.axis('off')
    plt.show()

In [None]:
train_samples = load("data/custom/train.txt")
show(train_samples)

In [None]:
test_samples = load("data/custom/test.txt")
show(test_samples)

In [None]:
!ls -lR model_configs/

In [None]:
!cp -v model_configs/COCO-Detection/yolov3_tiny.cfg yolov3_tiny.cfg

In [None]:
# edit yolov3_tiny.cfg and set the class number to 1
# change classes in each YOLO layer
# change filters to (5 + classes) * num_masked_anchors in the convolutional layer before the YOLO layer
# (e.g. we have 6 anchors with mask 0,1,2 i.e. we have used 3 anchors)
# (6*3=18)
!sed -i -e 's/^\s*filters\s*=\s*255/filters=18/' -e 's/^\s*classes\s*=\s*80/classes=1/' yolov3_tiny.cfg

In [None]:
# download pretrained model
!wget -O YOLOv3_Tiny-VOC-20231107.pth.tar http://agentspace.org/download/YOLOv3_Tiny-VOC-20231107.pth.tar

In [None]:
!ls -l YOLOv3_Tiny-VOC-20231107.pth.tar

In [None]:
!ls -lR configs/

In [None]:
!cp -v configs/COCO-Detection/yolov3.yaml yolov3_tiny.yaml

In [None]:
# edit yolov3_tiny.yaml
# change mainly CONFIG_PATH, NUM_CLASSES, CLASS_NAMES, dataset ROOT and pretrained model WEIGHTS_PATH

import yaml
# Load YAML
with open("yolov3_tiny.yaml", "r") as f:
    cfg = yaml.safe_load(f)

# Modify fields
cfg["PROJECT_NAME"] = "watch_detector_yolov3_tiny"
cfg["EXP_NAME"] = "watch_detector_yolov3_tiny"
cfg["MODEL"]["CONFIG_PATH"] = "./yolov3_tiny.cfg"
cfg["MODEL"]["NUM_CLASSES"] = 1
cfg["MODEL"]["IMG_SIZE"] = 416 #512
cfg["CLASS_NAMES"] = ["watch"]
cfg["TRAIN"]["DATASET"]["ROOT"] = "./data/custom/train.txt"
cfg["TRAIN"]["DATASET"]["CACHE"] = True
cfg["TRAIN"]["WEIGHTS_PATH"] = "./YOLOv3_Tiny-VOC-20231107.pth.tar"
cfg["TRAIN"]["HYP"]["EPOCHS"] = 20000
cfg["TRAIN"]["HYP"]["IMG_PER_BATCH"] = 5
cfg["TRAIN"]["HYP"]["ACCUMULATE_BATCH_SIZE"] = 2
cfg["TRAIN"]["SAVE_EVERY_EPOCH"] = 100
cfg["VAL"]["DATASET"]["ROOT"] = "./data/custom/test.txt"
cfg["VAL"]["DATASET"]["CACHE"] = True

# Save YAML
with open("yolov3_tiny.yaml", "w") as f:
    yaml.safe_dump(cfg, f, sort_keys=False)

In [None]:
!rm -rfd results/train

In [None]:
# train
import sys
sys.argv = [ 'train.py', './yolov3_tiny.yaml' ]
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
from tools.train import main as train
train()

In [None]:
# download checkpoint after 20000 iterations instead of training
#!mkdir -p results/train/watch_detector_yolov3_tiny
#!rm -f results/train/watch_detector_yolov3_tiny/last.pth.tar
#!wget -O results/train/watch_detector_yolov3_tiny/last.pth.tar http://agentspace.org/download/watch_checkpoint_020000.pth.tar

In [None]:
!ls -lR results/train

In [None]:
# Find the newest last.pth.tar and copy it into the result model
import glob
import os
files = glob.glob("./results/train/watch_detector_yolov3_tiny*/last.pth.tar")
if not files:
    print("No last.pth.tar files found")
else:
    newest = max(files, key=os.path.getmtime)
    print("found", newest)

In [None]:
!head -n 10 yolov3_tiny.cfg

In [None]:
# change batch size
!sed -i 's/#batch = 1/batch=1/' yolov3_tiny.cfg
!sed -i 's/#subdivisions = 1/subdivisions = 1/' yolov3_tiny.cfg
!sed -i 's/batch = 64/#batch = 64/' yolov3_tiny.cfg
!sed -i 's/subdivisions = 8/#subdivisions = 8/' yolov3_tiny.cfg

In [None]:
!head -n 10 yolov3_tiny.cfg

In [None]:
# export model
import sys
sys.argv = [
    'export.py',
    '--img-size', '416',
    '--cfg', './yolov3_tiny.cfg',
    '--weights', newest,
    '--export-mode', 'torch',
    '--export-dir', './results/export'
]
from tools.export import main as export
export()


In [None]:
!ls -l results/export

In [None]:
!mv -fv results/export/last.pth watch_detector_yolov3_tiny.pth
!ls -l watch_detector_yolov3_tiny.pth

In [None]:
from google.colab import files
files.download('watch_detector_yolov3_tiny.pth')

In [None]:
# download trained and exported model (instead of the training and export)
#!rm -f watch_detector_yolov3_tiny.pth
#!wget http://www.agentspace.org/download/watch_detector_yolov3_tiny.pth

In [None]:
# load the model
import yolov3_pytorch
from yolov3_pytorch.utils import scale_coords, xyxy2xywh, non_max_suppression, plot_one_box
from yolov3_pytorch.data.data_augment import letterbox
model_path = 'watch_detector_yolov3_tiny.pth'
model = torch.load(model_path, weights_only=False).to(device)
model.eval()

In [None]:
names = ['watch']

In [None]:
# load image
frame = cv2.imread("data/custom/images/test/000011.jpg")
frame.shape

In [None]:
# preprocessing
img_size = 416
img, _, _ = letterbox(frame,new_shape=img_size)
img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
blob = cv2.dnn.blobFromImage(img,1.0/255)
blob = torch.tensor(blob)
blob = blob.to(device)
blob.shape

In [None]:
# inference
with torch.no_grad():
    output, _ = model(blob, False)

output.shape

In [None]:
# postprocessing - non-maximum supression
conf_thresh = 0.08
iou_thresh = 0.45
detections = non_max_suppression(output, conf_thresh, iou_thresh)[0]
detections.shape

In [None]:
# postprocessing - rescaling
detections[:, :4] = scale_coords(blob.shape[2:], detections[:, :4], frame.shape).round()

In [None]:
# visualize result
disp = np.copy(frame)
for detection in detections:
    *xyxy, confidence, classid  = detection
    plot_one_box(xyxy, disp, label=names[classid.int().item()], color=(0,0,255))

plt.imshow(cv2.cvtColor(disp,cv2.COLOR_BGR2RGB))
plt.axis('off')
plt.show()

In [None]:
# process image (all in one)
def process_image(frame, conf_thresh=0.1, iou_thresh=0.45):
    img_size = 416
    img, _, _ = letterbox(frame,new_shape=img_size)
    img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
    blob = cv2.dnn.blobFromImage(img,1.0/255)
    blob = torch.tensor(blob)
    blob = blob.to(device)
    with torch.no_grad():
        output, _ = model(blob, False)
    detections = non_max_suppression(output, conf_thresh, iou_thresh)[0]
    if detections is None:
        return frame
    detections[:, :4] = scale_coords(blob.shape[2:], detections[:, :4], frame.shape).round()
    disp = np.copy(frame)
    for detection in detections:
        *xyxy, confidence, classid  = detection
        plot_one_box(xyxy, disp, label=names[classid.int().item()], color=(0,0,255))
    return disp

In [None]:
# upload video
#!wget http://www.agentspace.org/download/watch.avi
#videofile = 'watch.avi'

In [None]:
# upload video
from google.colab import files
print('upload video')
uploaded = files.upload()
videofile = list(uploaded.keys())[0]
print(videofile)

In [None]:
# process video
resultfile = 'result.avi'
video = cv2.VideoCapture(videofile)
fps = video.get(cv2.CAP_PROP_FPS)
hasFrame, frame = video.read()
out = cv2.VideoWriter()
out.open(resultfile,cv2.VideoWriter_fourcc('M','J','P','G'),fps,(frame.shape[1],frame.shape[0]))
while True:
    result = process_image(frame, conf_thresh=0.08, iou_thresh=0.45)
    out.write(result)
    hasFrame, frame = video.read()
    if not hasFrame:
        break
out.release()

In [None]:
# download video
files.download(resultfile)