## 1. Import required libraries and the CSV logger


In [4]:
from ultralytics import YOLO
import cv2
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from scipy.optimize import linear_sum_assignment

In [5]:
import csv

class csv_gen:
    def __init__(self, csv_path):
        self.csv_file = open(csv_path, mode='w', newline='')
        self.csv_writer = csv.writer(self.csv_file)
        self.csv_writer.writerow(['frame', 'player_id', 'x_center', 'y_center'])
    
    def log_player(self, frame_idx, player_id, x_center, y_center):
        self.csv_writer.writerow([frame_idx, player_id, x_center, y_center])
    
    def close(self):
        self.csv_file.close()

## 2. Initialize model, video, output writer, and tracking parameters


In [6]:
model_path = 'best.pt'
video_path = '15sec_input_720p.mp4'

model = YOLO(model_path)
cap = cv2.VideoCapture(video_path)

frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter('output_tracking.mp4', fourcc, fps, (frame_width, frame_height))

player_db = {}
next_player_id = 0
appearance_weight = 0.7
position_weight = 0.3
similarity_threshold = 0.65
frame_idx = 0

logger = csv_gen('player_positions.csv')

## 3. Define helper functions for feature extraction and similarity computation


In [7]:
def extract_color_histogram(image, bins=(8, 8, 8)):
    if image.size == 0:
        return np.zeros(np.prod(bins))
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hist = cv2.calcHist([hsv], [0, 1, 2], None, bins, [0, 180, 0, 256, 0, 256])
    cv2.normalize(hist, hist)
    return hist.flatten()

def compute_similarity(feat1, feat2):
    if feat1 is None or feat2 is None:
        return 0
    feat1 = feat1.reshape(1, -1)
    feat2 = feat2.reshape(1, -1)
    return cosine_similarity(feat1, feat2)[0][0]

def compute_position_similarity(center1, center2, frame_shape):
    diag = np.sqrt(frame_shape[0]**2 + frame_shape[1]**2)
    dist = np.linalg.norm(np.array(center1) - np.array(center2))
    return 1 - (dist / diag)


## 4. Main tracking loop: detect, track, assign IDs, visualize, and log to CSV


In [8]:
while True:
    ret, frame = cap.read()
    if not ret:
        break

    results = model(frame)
    if frame_idx == 0:
        print("Class mapping:", results[0].names)

    detections = []
    for det in results[0].boxes:
        x1, y1, x2, y2 = map(int, det.xyxy[0])
        conf = float(det.conf[0])
        cls = int(det.cls[0])
        label = results[0].names[cls]
        cx, cy = int((x1 + x2) / 2), int((y1 + y2) / 2)
        if label == 'player' and conf > 0.4:
            crop = frame[y1:y2, x1:x2]
            features = extract_color_histogram(crop)
            detections.append({
                'bbox': (x1, y1, x2, y2),
                'features': features,
                'center': (cx, cy),
                'type': 'player',
                'assigned': False
            })
        elif label == 'ball' and conf > 0.4:
            detections.append({'bbox': (x1, y1, x2, y2), 'type': 'ball'})

    player_candidates = []
    candidate_ids = []
    candidate_centers = []
    for pid, pdata in player_db.items():
        if pdata['status'] == 'active':
            player_candidates.append(pdata['features'])
            candidate_ids.append(pid)
            candidate_centers.append(pdata['center'])

    det_features = [det['features'] for det in detections if det['type'] == 'player']
    det_centers = [det['center'] for det in detections if det['type'] == 'player']
    det_indices = [i for i, det in enumerate(detections) if det['type'] == 'player']

    if player_candidates and det_features:
        cost_matrix = np.ones((len(player_candidates), len(det_features)))
        for i, (feat_db, center_db) in enumerate(zip(player_candidates, candidate_centers)):
            for j, (feat_det, center_det) in enumerate(zip(det_features, det_centers)):
                app_sim = compute_similarity(feat_db, feat_det)
                pos_sim = compute_position_similarity(center_db, center_det, frame.shape)
                sim = appearance_weight * app_sim + position_weight * pos_sim
                cost_matrix[i, j] = 1 - sim
        row_ind, col_ind = linear_sum_assignment(cost_matrix)
        for r, c in zip(row_ind, col_ind):
            sim = 1 - cost_matrix[r, c]
            if sim >= similarity_threshold:
                pid = candidate_ids[r]
                det_idx = det_indices[c]
                detections[det_idx]['id'] = pid
                detections[det_idx]['assigned'] = True
                player_db[pid]['features'] = detections[det_idx]['features']
                player_db[pid]['center'] = detections[det_idx]['center']
                player_db[pid]['last_seen'] = frame_idx
                player_db[pid]['status'] = 'active'

    for i, det in enumerate(detections):
        if det['type'] == 'player' and not det.get('assigned', False):
            pid = next_player_id
            detections[i]['id'] = pid
            detections[i]['assigned'] = True
            player_db[pid] = {
                'features': det['features'],
                'center': det['center'],
                'last_seen': frame_idx,
                'status': 'active'
            }
            next_player_id += 1

    to_remove = []
    for pid, pdata in player_db.items():
        if pdata['status'] == 'active' and pdata['last_seen'] < frame_idx:
            pdata['status'] = 'lost'
        elif pdata['status'] == 'lost':
            to_remove.append(pid)
    for pid in to_remove:
        del player_db[pid]

    for det in detections:
        x1, y1, x2, y2 = det['bbox']
        if det['type'] == 'player':
            pid = det['id']
            color = (0, 255, 0) if player_db.get(pid, {}).get('status', '') == 'active' else (0, 165, 255)
            cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
            cv2.putText(frame, f'Player ID: {pid}', (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2)
            logger.log_player(frame_idx, pid, int((x1 + x2) / 2), int((y1 + y2) / 2))
        elif det['type'] == 'ball':
            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 2)
            cv2.putText(frame, 'Ball', (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)

    out.write(frame)
    cv2.imshow('Tracking', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

    frame_idx += 1

cap.release()
out.release()
logger.close()
cv2.destroyAllWindows()



0: 384x640 1 ball, 16 players, 2 referees, 1225.5ms
Speed: 9.9ms preprocess, 1225.5ms inference, 15.9ms postprocess per image at shape (1, 3, 384, 640)
Class mapping: {0: 'ball', 1: 'goalkeeper', 2: 'player', 3: 'referee'}

0: 384x640 18 players, 2 referees, 756.1ms
Speed: 5.7ms preprocess, 756.1ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 ball, 16 players, 2 referees, 649.6ms
Speed: 1.8ms preprocess, 649.6ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 ball, 14 players, 2 referees, 654.5ms
Speed: 2.4ms preprocess, 654.5ms inference, 0.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 ball, 14 players, 2 referees, 694.5ms
Speed: 2.2ms preprocess, 694.5ms inference, 1.0ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 1 ball, 16 players, 2 referees, 945.9ms
Speed: 6.1ms preprocess, 945.9ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 15 players, 2 referees