# **NeuralTennis**

In [21]:
# 1. Install dependencies
!pip install --quiet torch torchvision opencv-python-headless matplotlib plotly albumentations scikit-learn ultralytics

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.0 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m1.0/1.0 MB[0m [31m41.3 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m25.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
# 2. Mount Google Drive
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [4]:
# 3. Create folders for NeuralTennis project
import os

BASE_DIR = "/content/drive/MyDrive/NeuralTennis"
for sub in ["input", "models", "output"]:
    os.makedirs(f"{BASE_DIR}/{sub}", exist_ok=True)

In [7]:
# 3. Load & check video file
import cv2

video_path = f"{BASE_DIR}/input/4.mp4"
cap = cv2.VideoCapture(video_path)
if not cap.isOpened():
    raise IOError(f"Cannot open {video_path}")
ret, frame = cap.read()
cap.release()
if not ret:
    raise IOError("Cannot read the first frame")
print("Video loaded – resolution:", frame.shape[1], "×", frame.shape[0])

Video loaded – resolution: 1920 × 1080


In [13]:
import cv2

def read_video(video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frames.append(frame)
    cap.release()
    return frames

def save_video(output_video_frames, output_video_path):
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, 30, (output_video_frames[0].shape[1], output_video_frames[0].shape[0]))
    for frame in output_video_frames:
        out.write(frame)
    out.release()
    print(f"Video saved to {output_video_path}")


In [28]:
#!pip install ultralytics
from ultralytics import YOLO
import cv2

class PlayerTracker:
    def __init__(self,model_path):
        self.model = YOLO(model_path)

    def detect_frame(self,frame):
        results = self.model.track(frame, persist=True)[0]
        id_name_dict = results.names

        player_dict = {}
        for box in results.boxes:
            track_id = int(box.id.tolist()[0])
            result = box.xyxy.tolist()[0]
            object_cls_id = box.cls.tolist()[0]
            object_cls_name = id_name_dict[object_cls_id]
            if object_cls_name == "person":
                player_dict[track_id] = result

        return player_dict

    def draw_bboxes(self,video_frames, player_detections):
        output_video_frames = []
        for frame, player_dict in zip(video_frames, player_detections):
            # Draw Bounding Boxes
            for track_id, bbox in player_dict.items():
                x1, y1, x2, y2 = bbox
                cv2.putText(frame, f"Player ID: {track_id}",(int(bbox[0]),int(bbox[1] -10 )),cv2.FONT_HERSHEY_SIMPLEX, 0.9, (197, 197, 197), 2)
                cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (197, 197, 197), 2)
            output_video_frames.append(frame)

        return output_video_frames


class BallTracker:
    def __init__(self,model_path):
        self.model = YOLO(model_path)

    def detect_frame(self,frame):
        results = self.model.predict(frame,conf=0.15)[0]

        ball_dict = {}
        for box in results.boxes:
            result = box.xyxy.tolist()[0]
            ball_dict[1] = result

        return ball_dict

    def draw_bboxes(self,video_frames, player_detections):
        output_video_frames = []
        for frame, ball_dict in zip(video_frames, player_detections):
            # Draw Bounding Boxes
            for track_id, bbox in ball_dict.items():
                x1, y1, x2, y2 = bbox
                cv2.putText(frame, f"Ball ID: {track_id}",(int(bbox[0]),int(bbox[1] -10 )),cv2.FONT_HERSHEY_SIMPLEX, 0.9, (153, 255, 102), 2)
                cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (153, 255, 102), 2)
            output_video_frames.append(frame)

        return output_video_frames

In [29]:
def main():

    input_video_path = "/content/drive/MyDrive/NeuralTennis/input/3.mp4"
    video_frames = read_video(input_video_path)

    player_tracker = PlayerTracker("/content/drive/MyDrive/NeuralTennis/models/yolov8x.pt")
    player_detections = [player_tracker.detect_frame(frame) for frame in video_frames]
    video_frames = player_tracker.draw_bboxes(video_frames, player_detections)

    ball_tracker = BallTracker("/content/drive/MyDrive/NeuralTennis/models/yolov8x.pt")
    ball_detections = [ball_tracker.detect_frame(frame) for frame in video_frames]
    video_frames = ball_tracker.draw_bboxes(video_frames, ball_detections)

    save_video(video_frames, "/content/drive/MyDrive/NeuralTennis/output/3_o.mp4")

if __name__ == "__main__":
    main()


0: 384x640 14 persons, 1 bench, 2 sports balls, 1 clock, 61.8ms
Speed: 2.8ms preprocess, 61.8ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 14 persons, 1 bench, 1 sports ball, 1 clock, 60.3ms
Speed: 2.8ms preprocess, 60.3ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 14 persons, 1 bench, 1 sports ball, 1 clock, 36.6ms
Speed: 3.0ms preprocess, 36.6ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 14 persons, 1 bench, 1 sports ball, 1 clock, 36.6ms
Speed: 2.9ms preprocess, 36.6ms inference, 2.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 14 persons, 1 bench, 1 sports ball, 1 clock, 36.6ms
Speed: 2.8ms preprocess, 36.6ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 14 persons, 1 bench, 1 sports ball, 1 clock, 32.1ms
Speed: 2.8ms preprocess, 32.1ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 14 persons, 1 bench, 