In [2]:
from tensorflow.keras.saving import load_model
from src.shot_classification.model import Attention, SequenceAttention, ShotClassifier
from src.shot_classification.neutral_model import NeutralIdentifier, Attention # subclassed models and layers
import numpy as np

In [3]:
# shot_classifier = load_model("serialized_models/shot_classifier_original.keras", custom_objects={
#     "ShotClassifier": ShotClassifier,
#     "Attention": Attention,
#     "SequenceAttention": SequenceAttention,
# })

shot_classifier = ShotClassifier()
shot_classifier.build((None, 33, 99))
shot_classifier.load_weights("serialized_models/shot_classifier.weights.h5")



In [4]:
shot_classifier.summary()

In [5]:
neutral_identifier = load_model("serialized_models/neutrality.keras", custom_objects={
    "NeutralIdentifier": NeutralIdentifier,
    "Attention": Attention,
})

In [6]:
neutral_identifier.summary()

In [7]:
dummy = np.zeros((1, 33, 99), dtype=np.float32)
output = shot_classifier.predict(dummy)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step


In [8]:
for v in shot_classifier.trainable_variables:
    if "kernel" in v.name:
        print(v.shape)

(99, 2048)
(512, 2048)
(99, 2048)
(512, 2048)
(1024, 1024)
(256, 1024)
(1024, 1024)
(256, 1024)
(512, 256)
(256, 512)
(128, 512)
(256, 512)
(128, 512)
(256, 1)
(256, 512)
(512, 512)
(512, 4)


In [9]:
output

array([[0.25, 0.25, 0.25, 0.25]], dtype=float32)

In [10]:
np.zeros((33, 3), dtype=np.float32)

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]], dtype=float32)

In [11]:
shot_classifier.summary()

In [12]:
arr = np.array([1,2,3,4,5])

In [13]:
arr[1:]

array([2, 3, 4, 5])

In [14]:
from ultralytics import YOLO

In [15]:
ball_tracker = YOLO("hugging_face_best.pt").to("cpu")

In [16]:
import cv2 as cv

In [17]:
cap = cv.VideoCapture("assets/demo.mov")

ret, frame = cap.read()

In [18]:
frame.shape

(964, 1690, 3)

In [19]:
%%time

results = ball_tracker.predict(
    source=frame,
    stream=False,
    verbose=True,
    conf=0.25
)


0: 736x1280 (no detections), 139.0ms
Speed: 6.5ms preprocess, 139.0ms inference, 2.9ms postprocess per image at shape (1, 3, 736, 1280)
CPU times: user 131 ms, sys: 22.7 ms, total: 154 ms
Wall time: 171 ms


In [20]:
import torch
import matplotlib.pyplot as plt
teacher = YOLO("hugging_face_best.pt")

def make_gaussian_kernel(size=21, sigma=4):
    """builds a small gaussian kernel to avoid creating multiple full heatmaps"""

    ax = np.arange(size) - size // 2
    xx, yy = np.meshgrid(ax, ax)
    kernel = np.exp(-(xx**2 + yy**2) / (2 * sigma**2))
    kernel /= kernel.max()
    return kernel.astype(np.float16)


GAUSSIAN_KERNEL = make_gaussian_kernel(size=21, sigma=4)
K = GAUSSIAN_KERNEL.shape[0] // 2
EMPTY_HEATMAP = np.zeros((360, 640, 1), dtype=np.float16)

def video_frame_generator(video_dir):

    """uses a generator to load video frames"""
    frame_idx = 0

    cap = cv.VideoCapture(video_dir)
    while True:
        ret, frame = cap.read()
        if not ret: # break if final frame
            break
        frame = cv.resize(frame, (640, 360))
        frame_idx += 1

        # use teacher model to get candidates for labels
        with torch.no_grad():
            results = teacher.predict(
                source=frame,
                conf=0.25,
                stream=False,
                verbose=False,
                device="cpu",
            )[0]

            best_conf = 0
            best_box = None

            # get box with highest confidence
            for box in results.boxes:
                if box.conf[0] >= best_conf:
                    best_box = box
                    best_conf = box.conf[0]
            
            if best_box is not None:

                # create gaussian blur for TrackNet
                x1, y1, x2, y2 = map(int, best_box.xyxy[0])
                
                # get center
                cx = int((x1 + x2) / 2)
                cy = int((y1 + y2) / 2)

                H, W = 360, 640
                heatmap = np.zeros((H, W), dtype=np.float16)

                # bounds for stamping
                x0 = max(0, cx - K)
                x1 = min(W, cx + K + 1)
                y0 = max(0, cy - K)
                y1 = min(H, cy + K + 1)

                # kernel slice bounds
                kx0 = K - (cx - x0)
                kx1 = K + (x1 - cx)
                ky0 = K - (cy - y0)
                ky1 = K + (y1 - cy)

                # stamp
                heatmap[y0:y1, x0:x1] = GAUSSIAN_KERNEL[ky0:ky1, kx0:kx1]

                heatmap = heatmap[..., None]  # add channel dim
                frame = frame.astype(np.float16) / 255.0
                heatmap_vis = (heatmap[...,0] * 255).astype(np.uint8)  # already done

                plt.imshow(heatmap_vis, cmap="gray")  # show grayscale
                plt.title(f"Frame {frame_idx}")       # optional: title
                plt.axis("off")                       # optional: hide axes
                plt.show()

            else:
                # create an empty heatmap
                frame = frame.astype(np.float16) / 255.0
                print(frame_idx)
                continue

# video_frame_generator("src/ball_tracking/ball_tracking_data/videoplayback8.mp4")

In [23]:
from src.ball_tracking.model import TrackNet

In [26]:
tracknet = load_model("serialized_models/tracknet_best.keras", custom_objects={"TrackNet": TrackNet})

In [27]:
tracknet.summary()