# Player Detection

Use Yolov3 for **detecting players** and **detecting and classifying their shirt number**

More details here: https://medium.com/analytics-vidhya/player-detection-using-deep-learning-492122c3bf9

In [None]:
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import load_img
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelBinarizer
import numpy as np
import pandas as pd
import cv2
import os
import pickle
from skimage.color import rgb2gray
from PIL import Image

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Load number detection and number classification

In [None]:
drive_path = 'drive/MyDrive/Colab Notebooks/'
data_path = 'drive/MyDrive/Colab Notebooks/numbers_detection/'
darknet_path = 'drive/MyDrive/Colab Notebooks/darknet/'

In [None]:
classifier_numbers = tf.keras.models.load_model(data_path+'numbers_classifier_aug.h5')
classifier_bbox_numbers = tf.keras.models.load_model(data_path+'number_detection.h5')

In [None]:
# run number detection model on a given image
def detect_numbers(image):
    image = cv2.resize(image, dsize=(224, 224))
    image = image / 255.0
    image = np.expand_dims(image, axis=0)
    
    preds_bbox = classifier_bbox_numbers.predict(image)[0]
    (startX, startY, endX, endY) = preds_bbox
    
    return (startX, startY, endX, endY)

# run number classification model on a given image
def identify_numbers(image):
    
    #image = rgb2gray(image)
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    image = np.zeros_like(image)
    image[:,:,0] = gray
    
    image[:,:,1] = gray
    image[:,:,2] = gray

    image = cv2.GaussianBlur(cv2.resize(image, dsize=(224, 224)), (5,5), 0)
    
    copy_image = image.copy()
    
    image = image / 255.0
    image = np.expand_dims(image, axis=0)
    
    preds = classifier_numbers.predict(image)[0]
    
    i = np.argmax(preds)
    
    if preds[i] > 0.4:
        pass
        
    else:
        return -1
    
    return i

## Inference

In [None]:
filename = 'example.mp4'
input_video_path = f'{data_path}/video_samples/input/{filename}'
output_video_path = f'{data_path}/video_samples/output/{filename}'

In [None]:
# initialize minimum probability to eliminate weak predictions
p_min = 0.5

# threshold when applying non-maxia suppression
thres = 0.

# 'VideoCapture' object and reading vicv2.mean(image, mask=mask)deo from a file
video = cv2.VideoCapture(input_video_path)

# Preparing variable for writer
# that we will use to write processed frames
writer = None

# Preparing variables for spatial dimensions of the frames
h, w = None, None

# Create labels into list
with open(drive_path+'coco.names') as f:
    labels = [line.strip() for line in f]

np.random.seed(30)
# Initialize colours for representing every detected object
colours = np.random.randint(0, 255, size=(len(labels), 3))
colours[0] = np.array([255, 0, 0])

# Loading trained YOLO v3 Objects Detector
# with the help of 'dnn' library from OpenCV
# Reads a network model stored in Darknet model files.
network = cv2.dnn.readNet(drive_path+'darknet/cfg/yolov3.weights', drive_path+'darknet/cfg/yolov3.cfg')


# Getting only output layer names that we need from YOLO
ln = network.getLayerNames()
ln = [ln[i[0] - 1] for i in network.getUnconnectedOutLayers()]

# Defining loop for catching frames
while True:
    ret, frame = video.read()
    if not ret:
        break

    # Getting dimensions of the frame for once as everytime dimensions will be same
    if w is None or h is None:
        # Slicing and get height, width of the image
        Height, Width = frame.shape[:2]

    # frame preprocessing for deep learning
    blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416),
                                 swapRB=True, crop=False)

    # perform a forward pass of the YOLO object detector, giving us our bounding boxes
    # and associated probabilities.
    network.setInput(blob)
    output_from_network = network.forward(ln)

    # Preparing lists for detected bounding boxes, confidences and class numbers.
    numbers_bboxes = []
    confidences = []
    class_numbers = []
    preds_number = []
    boxes = []
    
    # Going through all output layers after feed forward pass
    for result in output_from_network:
        
        for detected_objects in result:
            
            scores = detected_objects[5:]
            class_current = np.argmax(scores)
            confidence_current = scores[class_current]

            if confidence_current > 0.1:
                
                center_x = int(detected_objects[0] * Width)
                center_y = int(detected_objects[1] * Height)
                w = int(detected_objects[2] * Width)
                h = int(detected_objects[3] * Height)
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)

                player_image = frame[y:y+h, x:x+w]
                
                if player_image.shape[0] == 0 or player_image.shape[1] == 0 or player_image.shape[2] == 0:
                    continue
                
                 # detect numbers bbox
                (startX, startY, endX, endY) = detect_numbers(player_image)

                pad_h, pad_w = h * 0.05, w * 0.1

                startX = int(startX * w - pad_w)
                startY = int(startY * h + pad_h)
                endX = int(endX * w + pad_w)
                endY = int(endY * h - pad_h)

                if startX == endX or endY == startY:
                    continue

                n_bbox = player_image[endY:startY, startX:endX, :]
                if n_bbox.shape[0] == 0 or n_bbox.shape[1] == 0 or n_bbox.shape[2] == 0:
                    continue
                    
                number_pred = identify_numbers(n_bbox)

                if number_pred != -1:
                    numbers_bboxes.append([startX + x, startY + y, endX + x, endY + y])
                    class_numbers.append(class_current)
                    confidences.append(float(confidence_current))
                    boxes.append([x, y, w, h])
                    preds_number.append(number_pred)
                
                

    
    indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.1, 0.1)

    #check if is people detection
    for i in indices:
        i = i[0]
        box = boxes[i]
        box_numbers = numbers_bboxes[i]

        if class_numbers[i] == 0:
            cv2.rectangle(frame, (round(box_numbers[0]),round(box_numbers[1])), (round(box_numbers[2]),round(box_numbers[3])), (255, 0, 0), 2)
            cv2.rectangle(frame, (round(box[0]),round(box[1])), (round(box[0]+box[2]),round(box[1]+box[3])), (0, 0, 0), 2)
            cv2.putText(frame, f'Player {preds_number[i]}', (round(box[0])-10,round(box[1])-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)




    """Store proccessed frames into result video."""
    # Initialize writer
    if writer is None:
        resultVideo = cv2.VideoWriter_fourcc(*'mp4v')

        # Writing current processed frame into the video file
        writer = cv2.VideoWriter(output_video_path, resultVideo, 15,
                                 (frame.shape[1], frame.shape[0]), True)

    # Write processed current frame to the file
    writer.write(frame)

# Releasing video reader and writer
video.release()
writer.release()