# Setup
- Import libraries
- Download required files
  - YOLO model (weights, classes, config)

In [None]:
import cv2
import numpy as np
import pandas as pd

from google.colab.patches import cv2_imshow

from matplotlib import pyplot as plt

In [None]:
!git clone https://github.com/arunponnusamy/object-detection-opencv.git
!wget https://pjreddie.com/media/files/yolov3.weights

# Run Model
- Run YOLO pre-trained model to detect humans per frame of sample video
   - WIP: Filtering out non-players (by location on court)
- Write results to new video




In [None]:
def setupModel(class_file, weights_file, config_file):
  # read class names from text file
  classes = None
  with open(class_file, 'r') as f:
      classes = [line.strip() for line in f.readlines()]
  # generate different colors for different classes 
  COLORS = np.random.uniform(0, 255, size=(len(classes), 3))

  # read pre-trained model and config file
  net = cv2.dnn.readNet(weights_file, config_file)

  return (net, classes, COLORS)

def processFrame(frame, net, classes, COLORS):
  Width = frame.shape[1]
  Height = frame.shape[0]
  scale = 0.00392

  # create input blob 
  blob = cv2.dnn.blobFromImage(frame, scale, (416,416), (0,0,0), True, crop=False)

  # set input blob for the network
  net.setInput(blob)

  layer_names = net.getLayerNames()
  output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
  outs = net.forward(output_layers)

  class_ids = []
  confidences = []
  boxes = []
  conf_threshold = 0.5
  nms_threshold = 0.4

  for out in outs:
      for detection in out:
          scores = detection[5:]
          class_id = np.argmax(scores)
          confidence = scores[class_id]
          center_x = int(detection[0] * Width)
          center_y = int(detection[1] * Height)
          w = int(detection[2] * Width)
          h = int(detection[3] * Height)
          x = center_x - w / 2
          y = center_y - h / 2
          class_ids.append(class_id)
          confidences.append(float(confidence))
          boxes.append([x, y, w, h])


  indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)

  for i in indices:
      box = boxes[i]
      x = box[0]
      y = box[1]
      w = box[2]
      h = box[3]
      round(x), round(y), round(x+w), round(y+h)
      label = str(classes[class_id])
      color = COLORS[class_id]
      cv2.rectangle(frame, (round(x),round(y)), (round(x+w),round(y+h)), color, 2)
      cv2.putText(frame, label, (round(x)-10,round(y)-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)



# Initialize video writer to write output video
out = cv2.VideoWriter('output_video.avi',cv2.VideoWriter_fourcc(*'MJPG'), 30, (1280, 720))

# Initialize video capture to process video
cap = cv2.VideoCapture('/content/sample_video.mp4')

# Setup net
(net, classes, COLORS) = setupModel('/content/object-detection-opencv/yolov3.txt', '/content/yolov3.weights', '/content/object-detection-opencv/yolov3.cfg')

# Iterate through frames of video
n = 0
while cap.isOpened():
  # Get the current frame
  ret, frame = cap.read()

  # If the frame is None (video has ended), exit
  if not ret or frame is None:
    break
  processFrame(frame, net, classes, COLORS)
  # cv2_imshow(frame)
  out.write(frame)  
  n += 1
  # Debug statement
  # print(f"Processed frame #{n}")

cap.release()
out.release()
cv2.destroyAllWindows()
print('done')