# How to Infere & Extract Data from a Pre-Trained YOLO Detection Model - Glove Framing Tracking
---
If you have any questions, please contact the authors of the repository.

## Pre-work

Let's make sure that we have access to GPU. We can use `nvidia-smi` command to do that. In case of any problems navigate to `Edit` -> `Notebook settings` -> `Hardware accelerator`, set it to `GPU`, and then click `Save`.

In [None]:
!nvidia-smi

## Clone BaseballCV Repo, set as Current Directory and Install Requirements

In [None]:
!git clone https://github.com/dylandru/BaseballCV.git
%cd BaseballCV
!pip install -r requirements.txt

## Import required libraries

In [None]:
from scripts.load_tools import load_model
from ultralytics import YOLO
import cv2
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.backends.backend_agg import FigureCanvasAgg
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
import torch
import moviepy.editor as mpy


## Define the glove movement tracking function

In [None]:
def track_glove_movement(model, video_path, output_path='glove_tracking.mp4'):
    cap = cv2.VideoCapture(video_path)

    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    # Perspective transformation points
    src_points = np.array([
        [frame_width * 0.3, frame_height * 0.8],
        [frame_width * 0.7, frame_height * 0.8],
        [frame_width * 0.7, frame_height * 0.2],
        [frame_width * 0.3, frame_height * 0.2]
    ], dtype=np.float32)

    dst_height = frame_height
    dst_width = int(dst_height * 0.4)
    dst_points = np.array([
        [0, dst_height - 1],
        [dst_width - 1, dst_height - 1],
        [dst_width - 1, 0],
        [0, 0]
    ], dtype=np.float32)

    M = cv2.getPerspectiveTransform(src_points, dst_points)

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

    glove_positions = []
    glove_boxes = []

    play_id = ''

    # Set up plot figure
    fig = plt.figure(figsize=(16, 6))
    gs = fig.add_gridspec(1, 4, width_ratios=[1, 0.01, 0.4, 0.020])

    ax1 = fig.add_subplot(gs[0])
    ax2 = fig.add_subplot(gs[2])
    fig.add_subplot(gs[3]).set_visible(False)
    fig.add_subplot(gs[1]).set_visible(False)

    plt.ion()
    fig.suptitle(
        f'Glove Movement throughout Play {play_id}',
        fontsize=16,
        fontweight='bold'
    )

    glove_img = plt.imread('/content/BaseballCV/assets/baseball_glove.png')

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # YOLO inference
        results = model(frame, device='cuda' if torch.cuda.is_available() else 'cpu')

        for r in results:
            boxes = r.boxes
            for box in boxes:
                if box.cls == 0:  # Assuming 0 is the glove class
                    x1, y1, x2, y2 = box.xyxy[0].tolist()
                    glove_center = ((x1 + x2) / 2, (y1 + y2) / 2)

                    flat_glove_point = cv2.perspectiveTransform(np.array([[glove_center]]), M)[0][0]

                    glove_positions.append(flat_glove_point)
                    glove_boxes.append((x1, y1, x2, y2))

                    centroid_x = int((x1 + x2) / 2)
                    centroid_y = int((y1 + y2) / 2)

                    cv2.circle(frame, (centroid_x, centroid_y), 5, (0, 0, 255), -1)  # Red dot
                    cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)

        ax1.clear()
        ax1.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        ax1.axis('off')

        ax2.clear()
        ax2.set_facecolor('blue')
        ax2.add_patch(plt.Rectangle((0, 0), dst_width, dst_height, fill=True, color='blue'))
        if glove_positions:
            x = [pos[0] for pos in glove_positions]
            y = [pos[1] for pos in glove_positions]
            ax2.plot(x, y, 'r-')

            im = OffsetImage(glove_img, zoom=0.04)
            ab = AnnotationBbox(im, (x[-1], y[-1]), xycoords='data', frameon=False)
            ax2.add_artist(ab)

        ax2.set_xlim(0, dst_width)
        ax2.set_ylim(dst_height, 0)  # Invert y-axis
        ax2.set_xticks([])
        ax2.set_yticks([])

        plt.tight_layout()

        # Convert plot to image
        canvas = FigureCanvasAgg(fig)
        canvas.draw()
        plot_image = np.frombuffer(canvas.tostring_rgb(), dtype='uint8')
        plot_image = plot_image.reshape(canvas.get_width_height()[::-1] + (3,))

        plot_image = cv2.resize(plot_image, (frame_width, frame_height))
        plot_image = cv2.cvtColor(plot_image, cv2.COLOR_RGB2BGR)

        out.write(plot_image)

    cap.release()
    out.release()
    plt.ioff()
    plt.close(fig)

    return glove_positions, glove_boxes

## Define the video to be infered, the model to be used and the variables to receive the glove postions and boxes

In [None]:
SOURCE_VIDEO_PATH = '/content/BaseballCV/assets/example_broadcast_video.mp4'

# Load the model
model = YOLO(load_model('glove_tracking'))

# Run the glove tracking function
glove_positions, glove_boxes = track_glove_movement(model, SOURCE_VIDEO_PATH)

**NOTE:** If you want to run inference using your own file as input, simply upload video to Google Colab and update `SOURCE_VIDEO_PATH` with the path leading to your file.

## Visualize the created video file

In [None]:
# Display the video using moviepy after processing
video = mpy.VideoFileClip("glove_tracking.mp4")
resized_video = video.resize((640, 360))  # Resize as needed
mpy.ipython_display(resized_video)

## Remember you obtained the coordinates from the glove for your desired use.

In [None]:
print(glove_positions)

In [None]:
print(glove_boxes)