In [None]:
import tensorflow as tf
import tensorflow_hub as hub
import cv2
import pandas as pd
import numpy as np
from ultralytics import YOLO

In [None]:
import tensorflow as tf

# Check for GPU devices
gpu_devices = tf.config.list_physical_devices('GPU')

if gpu_devices:
    print(f"‚úÖ GPU is available and TensorFlow is using it: {gpu_devices[0]}")
else:
    print("‚ùå GPU not found. TensorFlow is using the CPU.")

In [None]:
yolo_model = YOLO('yolov8n.pt')  # 'n' is the nano version, fast and small

movenet_model = hub.load("https://tfhub.dev/google/movenet/singlepose/lightning/4")
movenet_input_size = 192

In [None]:
timestamps_df = pd.read_csv(CSV_PATH)


In [None]:

VIDEO_PATH = "raw_videos/Monica Greene unedited tennis match play.mp4"
CSV_PATH = "annotations/Monica Greene unedited tennis match play.mp4.csv"

# Load timestamps
timestamps_df = pd.read_csv(CSV_PATH)

print(f"‚úÖ Models loaded. Processing video: {VIDEO_PATH}")


In [None]:
# Cell 3: Pose Extraction
# ==============================================================================
def run_movenet(input_image):
    """Runs MoveNet on a single image and returns keypoints."""
    # Resize and pad the image to the model's expected input size
    image_for_movenet = tf.image.resize_with_pad(
        tf.expand_dims(input_image, axis=0), movenet_input_size, movenet_input_size
    )
    # Run inference
    infer = movenet_model.signatures['serving_default']
    # Run inference
    outputs = infer(tf.cast(image_for_movenet, dtype=tf.int32))
    # Access the output tensor from the returned dictionary
    keypoints_with_scores = outputs['output_0']
    return keypoints_with_scores

# --- Main Loop ---
video = cv2.VideoCapture(VIDEO_PATH)
all_frame_data = []
frame_count = 0

print("üèÉ‚Äç‚ôÇÔ∏è Starting pose extraction from video...")

while video.isOpened():
    success, frame = video.read()
    if not success:
        break

    # 1. Detect players with YOLO
    results = yolo_model(frame, classes=[0], verbose=False) # class 0 is 'person'

    # 2. Find the far-side player (highest bounding box in frame)
    far_side_player_box = None
    min_y = float('inf')

    for result in results:
        for box in result.boxes:
            x1, y1, x2, y2 = box.xyxy[0].cpu().numpy().astype(int)
            if y1 < min_y:
                min_y = y1
                far_side_player_box = (x1, y1, x2, y2)

    # 3. Get Pose if player is found
    if far_side_player_box:
        x1, y1, x2, y2 = far_side_player_box
        player_crop = frame[y1:y2, x1:x2]

        # Ensure crop is not empty
        if player_crop.shape[0] > 0 and player_crop.shape[1] > 0:
            keypoints_relative = run_movenet(player_crop)

            # Convert keypoints to absolute coordinates
            # MoveNet output is (y, x, score), so we need to adjust
            kps = keypoints_relative[0, 0].numpy()
            abs_kps = np.zeros_like(kps)
            abs_kps[:, 0] = kps[:, 0] * (y2 - y1) + y1 # y-coordinate
            abs_kps[:, 1] = kps[:, 1] * (x2 - x1) + x1 # x-coordinate
            abs_kps[:, 2] = kps[:, 2] # score

            all_frame_data.append({
                "frame_id": frame_count,
                "keypoints": abs_kps
            })

    frame_count += 1

video.release()
print(f"‚úÖ Pose extraction complete. Processed {frame_count} frames.")

In [None]:
# Cell 4: Feature Engineering
# ==============================================================================
print("Engineering features from pose data...")

# Create a dictionary for fast frame-to-keypoint lookup
pose_lookup = {item['frame_id']: item['keypoints'] for item in all_frame_data}

all_feature_data = []

# Loop through all frames processed in the video
for frame_id in range(frame_count):
    if frame_id in pose_lookup and (frame_id - 1) in pose_lookup:
        current_kps = pose_lookup[frame_id]
        prev_kps = pose_lookup[frame_id - 1]

        # Calculate velocity (change in position)
        velocity = current_kps[:, :2] - prev_kps[:, :2]

        # Create a feature vector: [pos_x, pos_y, vel_x, vel_y] for each keypoint
        # Flatten the array to create a single feature vector per frame
        feature_vector = np.concatenate([current_kps[:, :2].flatten(), velocity.flatten()])
        
        all_feature_data.append({
            "frame_id": frame_id,
            "features": feature_vector
        })

print(f"‚úÖ Feature engineering complete. Processed {len(all_feature_data)} frames with features.")

In [None]:
# Cell 5: Create Labeled Sequences
# ==============================================================================
SEQUENCE_LENGTH = 60  # 60 frames = 2 seconds at 30fps
STEP = 15             # Create a new sequence every 0.5 seconds

X = []
y = []

# Convert timestamps to frame numbers (assuming 30fps for video)
# Note: You may need to get the actual FPS from the video if it's not 30
fps = 30 # or video.get(cv2.CAP_PROP_FPS)
timestamps_df['start_frame'] = timestamps_df['start_time'] * fps
timestamps_df['end_frame'] = timestamps_df['end_time'] * fps

def get_label_for_frame(frame_id, df):
    """Checks if a frame_id falls within an active point."""
    for _, row in df.iterrows():
        if row['start_frame'] <= frame_id <= row['end_frame']:
            return 1 # Active
    return 0 # Inactive

print("Creating training sequences...")

# Create a lookup dictionary for features
feature_lookup = {item['frame_id']: item['features'] for item in all_feature_data}
max_frame = max(feature_lookup.keys())

for i in range(0, max_frame - SEQUENCE_LENGTH, STEP):
    sequence = []
    is_valid_sequence = True
    for j in range(i, i + SEQUENCE_LENGTH):
        if j in feature_lookup:
            sequence.append(feature_lookup[j])
        else:
            # If a frame is missing features, this sequence is invalid
            is_valid_sequence = False
            break
    
    if is_valid_sequence:
        X.append(sequence)
        middle_frame_id = i + (SEQUENCE_LENGTH // 2)
        y.append(get_label_for_frame(middle_frame_id, timestamps_df))

X = np.array(X)
y = np.array(y)

print(f"‚úÖ Created training data. Shape of X: {X.shape}, Shape of y: {y.shape}")

In [None]:
# Cell 6: Save Processed Data
# ==============================================================================
# Define a filename for your processed data
DATA_FILENAME = "processed_match_1.npz"

print(f"üíæ Saving processed data to {DATA_FILENAME}...")

# Save both X and y arrays into a single compressed file
np.savez_compressed(DATA_FILENAME, X=X, y=y)

print("‚úÖ Data saved successfully. You can now restart the notebook and load this file.")