In [14]:
import numpy as np
import cv2
import pandas as pd
import matplotlib.pyplot as plt


# function for plotting the 

COLOURS = [[255,0,0], [0,255,0], [0,0,255]]

def draw_classified_map(image, boxes, labels):
    names = ["Ball", "Player"]

    for i, label in enumerate(labels):
        # apply a color mask to each object
        x1, y1, x2, y2 = boxes[i]
        x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)

        try: # just because the current model has more than 2 classes... (fix once we have a decent enough model)
            idx = int(label)
            color = COLOURS[0]    # CHANGE
            # draw the bounding boxes around the objects
            cv2.rectangle(image, (x1, y1), (x2, y2), color=color, 
                        thickness=2)
            # put the label text above the objects
            cv2.putText(image, names[idx], (x1, y1-10), 
                        cv2.FONT_HERSHEY_SIMPLEX, 1, color=color, 
                        thickness=2, lineType=cv2.LINE_AA)
        except:
            pass

    return image

In [11]:
import cv2
import torchvision
import torch
from torchvision.models.detection import ssdlite320_mobilenet_v3_large
from torchvision.models.detection.ssdlite import SSDLite320_MobileNet_V3_Large_Weights
import torchvision.transforms as transforms

        
# load a pretained model, send to device
# (put our own model in place of this next line)

model = ssdlite320_mobilenet_v3_large(weights=SSDLite320_MobileNet_V3_Large_Weights.DEFAULT)

for param in model.parameters():
    param.requires_grad = False

convert_tensor = transforms.ToTensor()

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)
model.eval()


In [41]:
videoPath = 'Videos/Arsenal_goal.mp4'
outputPath = 'Videos/output.mp4'


videoCap = cv2.VideoCapture(videoPath)

saveVideo = True

threshold = 0.15

if saveVideo: 
    frameSize = (int(videoCap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(videoCap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    #frameSize = (1920, 1080)
    fps = videoCap.get(cv2.CAP_PROP_FPS)

    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    outputVideo = cv2.VideoWriter(outputPath, fourcc, fps, frameSize)
    output_frames = []


while videoCap.isOpened():
    ret, frame = videoCap.read()

    if ret:
        inputImg = [convert_tensor(frame).to(device)]
        #inputImg.to(device)

        output = model(inputImg)[0] # predict

        # process the predictions
        scores = output['scores']
        boxes = output['boxes']
        labels = output['labels'].tolist()
        labels = [str(i) for i in labels]

        # threshold the predictions
        thresholded_preds_inidices = [i for i in scores if i > threshold]
        thresholded_preds_count = len(thresholded_preds_inidices)

        boxes = boxes[:thresholded_preds_count]
        labels = labels[:thresholded_preds_count]

        outputImg, _ = draw_classified_map(frame, boxes, labels)

        cv2.imshow("Frame", outputImg)
        outputVideo.write(outputImg)
        
    else:
        videoCap.release()
        break

    # to close the cv2 window, press the "q" key
    key = cv2.waitKey(5) & 0xFF
    if key == ord("q"):
        #print("stopped at", count, "(video file) frames")
        break


cv2.destroyAllWindows()
outputVideo.release()