In [None]:
import torch
import torchvision
import numpy as np
import cv2
import matplotlib.pyplot as plt

In [None]:
# select device
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

In [None]:
# clone YOLOv3 implemementation
!git clone https://github.com/Lornatang/YOLOv3-PyTorch.git

In [None]:
!ls YOLOv3-PyTorch

In [None]:
# install YOLOv3
!ln -sf YOLOv3-PyTorch/yolov3_pytorch yolov3_pytorch
!ln -sf YOLOv3-PyTorch/tools tools
!ln -sf YOLOv3-PyTorch/configs configs
!ln -sf YOLOv3-PyTorch/model_configs model_configs

In [None]:
!pip install thop

In [None]:
# take images, e.g. using https://imageonline.io/take-photo/ and record also video

In [None]:
# download test images
!wget http://www.agentspace.org/download/watch-annotated.zip
!unzip watch-annotated.zip
!rm watch-annotated.zip

In [None]:
# download trained model
!wget http://www.agentspace.org/download/watch_detector_yolov3_tiny.pth
#from google.colab import files
#print('upload','watch_detector_yolov3_tiny.pth')
#uploaded = files.upload()

In [None]:
!ls -l *.pth

In [None]:
#!wget http://www.agentspace.org/download/watch_detector_yolov3_tiny.pth

In [None]:
from yolov3_pytorch.utils import scale_coords, xyxy2xywh, non_max_suppression, plot_one_box
from yolov3_pytorch.data.data_augment import letterbox

In [None]:
# load the model
import yolov3_pytorch
model_path = 'watch_detector_yolov3_tiny.pth'
model = torch.load(model_path, weights_only=False).to(device)
model.eval()

In [None]:
names = ['watch']

In [None]:
frame = cv2.imread("data/custom/images/test/000011.jpg")
frame.shape

In [None]:
# preprocessing
img_size = 416
img, _, _ = letterbox(frame,new_shape=img_size)
img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
blob = cv2.dnn.blobFromImage(img,1.0/255)
blob = torch.tensor(blob)
blob = blob.to(device)
blob.shape

In [None]:
# inference
with torch.no_grad():
    output, _ = model(blob, False)

output.shape

In [None]:
output[0,0] # x1, y1, x2, y2, objectness, probability of class 1

In [None]:
# postprocessing - non-maximum supression
conf_thresh = 0.08
iou_thresh = 0.1
detections = non_max_suppression(output, conf_thresh, iou_thresh)[0]

In [None]:
detections.shape

In [None]:
detections[:10] # x1, y1, x2, y2, confidence, classid

In [None]:
# postprocessing - rescaling
detections[:, :4] = scale_coords(blob.shape[2:], detections[:, :4], frame.shape).round()

In [None]:
detections[:10] # x1, y1, x2, y2, confidence, classid

In [None]:
# visualization
for detection in detections:
    *xyxy, confidence, classid  = detection
    print(f'{confidence.item():.2f},{int(xyxy[0].item())},{int(xyxy[1].item())},{int(xyxy[2].item())},{int(xyxy[3].item())},{names[classid.int().item()]}')
    plot_one_box(xyxy, frame, label=names[classid.int().item()], color=(0,0,255))

In [None]:
plt.imshow(cv2.cvtColor(frame,cv2.COLOR_BGR2RGB))
plt.axis('off')
plt.show()

In [None]:
# upload video
from google.colab import files
uploaded = files.upload()
videofile = list(uploaded.keys())[0]
print(videofile)

In [None]:
# process image
def process_image(frame):
    img_size = 416
    img, _, _ = letterbox(frame,new_shape=img_size)
    img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
    blob = cv2.dnn.blobFromImage(img,1.0/255)
    blob = torch.tensor(blob)
    blob = blob.to(device)
    with torch.no_grad():
        output, _ = model(blob, False)
    conf_thresh = 0.08
    iou_thresh = 0.45
    detections = non_max_suppression(output, conf_thresh, iou_thresh)[0]
    if detections is None:
        return frame
    detections[:, :4] = scale_coords(blob.shape[2:], detections[:, :4], frame.shape).round()
    disp = np.copy(frame)
    for detection in detections:
        *xyxy, confidence, classid  = detection
        plot_one_box(xyxy, disp, label=names[classid.int().item()], color=(0,0,255))
    return disp

In [None]:
# process video
resultfile = 'result.avi'
video = cv2.VideoCapture(videofile)
fps = video.get(cv2.CAP_PROP_FPS)
hasFrame, frame = video.read()
out = cv2.VideoWriter()
out.open(resultfile,cv2.VideoWriter_fourcc('M','J','P','G'),fps,(frame.shape[1],frame.shape[0]))
while True:
    result = process_image(frame)
    out.write(result)
    hasFrame, frame = video.read()
    if not hasFrame:
        break
out.release()
cv2.destroyAllWindows()

In [None]:
# download video
files.download(resultfile)

In [None]:
import inspect

In [None]:
print(inspect.getsource(model.__class__))

In [None]:
model

In [None]:
model.module_list[0]

In [None]:
print(inspect.getsource(model.module_list[0].forward))