### **Phase 2: Ball Tracking and Player Statistics with DEEPSORT** 🎥⚽


1. **Player & Ball Tracking** ⚽

In [None]:
# test with cli command
!yolo track model="/Users/alyazouzou/Desktop/CV_Football/FootCVision/phase1/runs/detect/train/weights/best.pt" source="/Users/alyazouzou/Desktop/CV_Football/vids/far.mov" conf=0.3 iou=0.5 show

In [1]:
import cv2
from track import PlayerTracker

In [1]:
import cv2
import numpy as np
import pandas as pd
from collections import defaultdict
from ultralytics import YOLO
from scipy.interpolate import interp1d

video_path = "/Users/alyazouzou/Desktop/CV_Football/vids/mcchelsea.mov"  # Replace with your actual video path
path = "/Users/alyazouzou/Desktop/CV_Football/FootCVision/phase1/runs/detect/train/weights/best.pt"

# Load the YOLO model
model = YOLO(path)
cap = cv2.VideoCapture(video_path)

# To store tracking information
track_history = defaultdict(lambda: [])
ball_tracking_data = []  # List to store tracking data for 'ball'
player_tracking_data = []  # List to store tracking data for 'player'

frame_number = 0  # Initialize frame number counter

# Class labels for the specific task
class_labels = {0: "ball", 1: "goalkeeper", 2: "player", 3: "referee"}

while cap.isOpened():
    success, frame = cap.read()
    if success:
        # Get the dimensions of the frame (image width and height)
        image_height, image_width, _ = frame.shape
        
        # Increment frame number
        frame_number += 1

        # Track objects
        results = model.track(frame, persist=True, tracker="bytetrack.yaml")
        boxes = results[0].boxes.xywh.cpu()
        track_ids = results[0].boxes.id.int().cpu().tolist()
        classes = results[0].boxes.cls.int().cpu().tolist()  # Get class labels for each box

        for box, track_id, class_id in zip(boxes, track_ids, classes):
            if class_id == 0:  # "ball" class
                x, y, w, h = box
                # Convert xywh to normalized bounding box coordinates (x1, y1, x2, y2)
                x1, y1, x2, y2 = x / image_width, y / image_height, (x + w) / image_width, (y + h) / image_height
                
                # Track history for each object
                track = track_history[track_id]
                track.append((float(x1), float(y1), float(x2), float(y2)))

                # Only keep the last 30 points for tracking
                if len(track) > 30:
                    track.pop(0)

                # Store the tracking data for each object in the ball DataFrame
                for point in track:
                    ball_tracking_data.append({
                        "frame": frame_number,
                        "track_id": track_id,
                        "class": "ball",
                        "x1": point[0],
                        "y1": point[1],
                        "x2": point[2],
                        "y2": point[3]
                    })
            
            elif class_id == 2:  # "player" class
                x, y, w, h = box
                # Convert xywh to normalized bounding box coordinates (x1, y1, x2, y2)
                x1, y1, x2, y2 = x / image_width, y / image_height, (x + w) / image_width, (y + h) / image_height
                
                # Track history for each object
                track = track_history[track_id]
                track.append((float(x1), float(y1), float(x2), float(y2)))

                # Only keep the last 30 points for tracking
                if len(track) > 30:
                    track.pop(0)

                # Store the tracking data for each object in the player DataFrame
                for point in track:
                    player_tracking_data.append({
                        "frame": frame_number,
                        "track_id": track_id,
                        "class": "player",
                        "x1": point[0],
                        "y1": point[1],
                        "x2": point[2],
                        "y2": point[3]
                    })

    else:
        break

# Release the video capture
cap.release()



0: 352x640 17 players, 1 referee, 71.8ms
Speed: 4.0ms preprocess, 71.8ms inference, 5.5ms postprocess per image at shape (1, 3, 352, 640)

0: 352x640 17 players, 1 referee, 53.2ms
Speed: 1.6ms preprocess, 53.2ms inference, 0.3ms postprocess per image at shape (1, 3, 352, 640)

0: 352x640 17 players, 1 referee, 50.0ms
Speed: 1.1ms preprocess, 50.0ms inference, 0.4ms postprocess per image at shape (1, 3, 352, 640)

0: 352x640 17 players, 1 referee, 47.1ms
Speed: 1.0ms preprocess, 47.1ms inference, 0.5ms postprocess per image at shape (1, 3, 352, 640)

0: 352x640 17 players, 1 referee, 52.1ms
Speed: 1.0ms preprocess, 52.1ms inference, 0.4ms postprocess per image at shape (1, 3, 352, 640)

0: 352x640 17 players, 1 referee, 46.7ms
Speed: 1.0ms preprocess, 46.7ms inference, 0.4ms postprocess per image at shape (1, 3, 352, 640)

0: 352x640 17 players, 1 referee, 48.3ms
Speed: 1.1ms preprocess, 48.3ms inference, 0.3ms postprocess per image at shape (1, 3, 352, 640)

0: 352x640 17 players, 1 r

In [2]:
# Create DataFrame from the tracking data for both classes
ball_df = pd.DataFrame(ball_tracking_data)
player_df = pd.DataFrame(player_tracking_data)

In [3]:
print(player_df)

         frame  track_id   class        x1        y1        x2        y2
0            1         1  player  0.931469  0.672399  0.963082  0.822953
1            1         2  player  0.928506  0.293010  0.957184  0.392205
2            1         3  player  0.502046  0.119594  0.534881  0.216116
3            1         4  player  0.952773  0.169959  0.972086  0.285260
4            1         5  player  0.847456  0.426474  0.867665  0.549759
...        ...       ...     ...       ...       ...       ...       ...
1203139   2831      1208  player  0.565757  0.372785  0.592805  0.469328
1203140   2831      1208  player  0.564187  0.372739  0.590518  0.467789
1203141   2831      1208  player  0.563716  0.372615  0.589615  0.467065
1203142   2831      1208  player  0.561182  0.373650  0.587589  0.472546
1203143   2831      1208  player  0.560275  0.373063  0.585998  0.471540

[1203144 rows x 7 columns]


In [4]:
# Find the range of frames
min_frame = ball_df['frame'].min()
max_frame = ball_df['frame'].max()

# Create a DataFrame with all frames in the range
all_frames = pd.DataFrame({'frame': range(min_frame, max_frame + 1)})

# Merge with the original DataFrame to include all frames
ball_df_full = pd.merge(all_frames, ball_df, on='frame', how='left')

# Interpolate the missing values for x1, y1, x2, y2
for col in ['x1', 'y1', 'x2', 'y2']:
    missing = ball_df_full[col].isna()
    df_training = ball_df_full[~missing]
    df_missing = ball_df_full[missing].reset_index(drop=True)
    
    if not df_training.empty:
        f = interp1d(df_training['frame'], df_training[col], fill_value="extrapolate")
        ball_df_full.loc[missing, col] = f(df_missing['frame'])

# Fill the 'class' and 'track_id' columns with the previous values
ball_df_full['class'] = ball_df_full['class'].ffill()
missing_track_id = ball_df_full['track_id'].isna()

# Fill NaN track_id with the previous track_id + 1
for idx in ball_df_full[missing_track_id].index:
    ball_df_full.loc[idx, 'track_id'] = ball_df_full.loc[idx - 1, 'track_id'] + 1
print(ball_df_full)

      frame  track_id class        x1        y1        x2        y2
0        89      21.0  ball  0.522335  0.441476  0.531478  0.468149
1        90      21.0  ball  0.522335  0.441476  0.531478  0.468149
2        90      21.0  ball  0.522342  0.441466  0.531499  0.468179
3        91      22.0  ball  0.518771  0.444930  0.528666  0.472744
4        92      23.0  ball  0.515200  0.448393  0.525833  0.477309
...     ...       ...   ...       ...       ...       ...       ...
6245   2827    1280.0  ball  0.470076  0.377854  0.480236  0.402590
6246   2828    1281.0  ball  0.468518  0.377628  0.478690  0.402372
6247   2829    1282.0  ball  0.466959  0.377403  0.477143  0.402154
6248   2830    1283.0  ball  0.465401  0.377178  0.475596  0.401937
6249   2831    1228.0  ball  0.463842  0.376953  0.474050  0.401719

[6250 rows x 7 columns]


In [5]:
# Find the range of frames
min_frame = player_df['frame'].min()
max_frame = player_df['frame'].max()

# Create a DataFrame with all frames in the range
all_frames = pd.DataFrame({'frame': range(min_frame, max_frame + 1)})

# Merge with the original DataFrame to include all frames
player_df_full = pd.merge(all_frames, player_df, on='frame', how='left')

# Interpolate the missing values for x1, y1, x2, y2
for col in ['x1', 'y1', 'x2', 'y2']:
    missing = player_df_full[col].isna()
    df_training = player_df_full[~missing]
    df_missing = player_df_full[missing].reset_index(drop=True)
    
    if not df_training.empty:
        f = interp1d(df_training['frame'], df_training[col], fill_value="extrapolate")
        player_df_full.loc[missing, col] = f(df_missing['frame'])

# Fill the 'class' and 'track_id' columns with the previous values
player_df_full['class'] = player_df_full['class'].ffill()
missing_track_id = player_df_full['track_id'].isna()

for idx in player_df_full[missing_track_id].index:
    if idx == 0:
        # If it's the first index, initialize track_id to 1 (or any other appropriate value)
        player_df_full.loc[idx, 'track_id'] = 1
    else:
        # For other rows, fill track_id with the previous track_id + 1
        player_df_full.loc[idx, 'track_id'] = player_df_full.loc[idx - 1, 'track_id'] + 1
print(player_df_full)

         frame  track_id   class        x1        y1        x2        y2
0            1         1  player  0.931469  0.672399  0.963082  0.822953
1            1         2  player  0.928506  0.293010  0.957184  0.392205
2            1         3  player  0.502046  0.119594  0.534881  0.216116
3            1         4  player  0.952773  0.169959  0.972086  0.285260
4            1         5  player  0.847456  0.426474  0.867665  0.549759
...        ...       ...     ...       ...       ...       ...       ...
1203139   2831      1208  player  0.565757  0.372785  0.592805  0.469328
1203140   2831      1208  player  0.564187  0.372739  0.590518  0.467789
1203141   2831      1208  player  0.563716  0.372615  0.589615  0.467065
1203142   2831      1208  player  0.561182  0.373650  0.587589  0.472546
1203143   2831      1208  player  0.560275  0.373063  0.585998  0.471540

[1203144 rows x 7 columns]


In [6]:
# Merge both DataFrames
final_df = pd.concat([ball_df_full, player_df_full])

# Sort the final DataFrame by frame number and track_id
final_df = final_df.sort_values(by=['frame', 'track_id']).reset_index(drop=True)
final_df['track_id'] = final_df['track_id'].astype(int)

In [7]:
print(final_df.head())

   frame  track_id   class        x1        y1        x2        y2
0      1         1  player  0.931469  0.672399  0.963082  0.822953
1      1         2  player  0.928506  0.293010  0.957184  0.392205
2      1         3  player  0.502046  0.119594  0.534881  0.216116
3      1         4  player  0.952773  0.169959  0.972086  0.285260
4      1         5  player  0.847456  0.426474  0.867665  0.549759


In [8]:
import cv2
import pandas as pd

# Load the video
video_path = "/Users/alyazouzou/Desktop/CV_Football/vids/mcchelsea.mov"  # Replace with your actual video path
cap = cv2.VideoCapture(video_path)

# Get video properties
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Define the codec and create VideoWriter object
out = cv2.VideoWriter('output.mp4', cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))

frame_number = 0

while cap.isOpened():
    success, frame = cap.read()
    if not success:
        break
    
    frame_number += 1
    
    # Get the tracking data for the current frame
    frame_data = final_df[final_df['frame'] == frame_number]
    
    for _, row in frame_data.iterrows():
        x1, y1, x2, y2 = row['x1'], row['y1'], row['x2'], row['y2']
        class_name = row['class']
        
        # Convert normalized coordinates to pixel coordinates
        x1, y1, x2, y2 = int(x1 * width), int(y1 * height), int(x2 * width), int(y2 * height)
        
        # Choose a color for the bounding box
        color = (0, 255, 0) if class_name == "ball" else (255, 0, 0)
        
        # Draw the bounding box
        cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
        cv2.putText(frame, class_name, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)
    
    # Write the frame to the output video
    out.write(frame)

# Release everything
cap.release()
out.release()
cv2.destroyAllWindows()

**Tracking on a frame**

In [None]:
import cv2
import numpy as np
import pandas as pd
from collections import defaultdict
from ultralytics import YOLO

video_path = "/Users/alyazouzou/Desktop/CV_Football/vids/mcchelsea.mov"  # Replace with your actual video path
path = "/Users/alyazouzou/Desktop/CV_Football/FootCVision/phase1/runs/detect/train/weights/best.pt"

# Load the YOLO model
model = YOLO(path)
cap = cv2.VideoCapture(video_path)

# To store tracking information
track_history = defaultdict(lambda: [])
tracking_data = []  # List to store all tracking data for DataFrame

frame_number = 0  # Initialize frame number counter

# Class labels for the specific task
class_labels = {0: "ball", 1: "goalkeeper", 2: "player", 3: "referee"}

while cap.isOpened():
    success, frame = cap.read()
    if success:
        # Get the dimensions of the frame (image width and height)
        image_height, image_width, _ = frame.shape
        
        # Increment frame number
        frame_number += 1

        # Track objects
        results = model.track(frame, persist=True, tracker="bytetrack.yaml")
        boxes = results[0].boxes.xywh.cpu()
        track_ids = results[0].boxes.id.int().cpu().tolist()
        classes = results[0].boxes.cls.int().cpu().tolist()  # Get class labels for each box

        for box, track_id, class_id in zip(boxes, track_ids, classes):
            if class_id == 0:  # Only process if the class is "ball"
                x, y, w, h = box
                # Convert xywh to normalized bounding box coordinates (x1, y1, x2, y2)
                x1, y1, x2, y2 = x / image_width, y / image_height, (x + w) / image_width, (y + h) / image_height
                
                # Track history for each object
                track = track_history[track_id]
                track.append((float(x1), float(y1), float(x2), float(y2)))

                # Only keep the last 30 points for tracking
                if len(track) > 30:
                    track.pop(0)

                # Store the tracking data for each object in the DataFrame
                for point in track:
                    tracking_data.append({
                        "frame": frame_number,
                        "track_id": track_id,
                        "class": "ball",
                        "x1": point[0],
                        "y1": point[1],
                        "x2": point[2],
                        "y2": point[3]
                    })

    else:
        break

# Release the video capture
cap.release()

# Create DataFrame from the tracking data
df = pd.DataFrame(tracking_data)

from scipy.interpolate import interp1d

# Find the range of frames
min_frame = df['frame'].min()
max_frame = df['frame'].max()

# Create a DataFrame with all frames in the range
all_frames = pd.DataFrame({'frame': range(min_frame, max_frame + 1)})

# Merge with the original DataFrame to include all frames
df_full = pd.merge(all_frames, df, on='frame', how='left')

# Interpolate the missing values for x1, y1, x2, y2
for col in ['x1', 'y1', 'x2', 'y2']:
    missing = df_full[col].isna()
    df_training = df_full[~missing]
    df_missing = df_full[missing].reset_index(drop=True)
    
    if not df_training.empty:
        f = interp1d(df_training['frame'], df_training[col], fill_value="extrapolate")
        df_full.loc[missing, col] = f(df_missing['frame'])

# Fill the 'class' and 'track_id' columns with the previous values
df_full['class'] = df_full['class'].ffill()
missing_track_id = df_full['track_id'].isna()

# Fill NaN track_id with the previous track_id + 1
for idx in df_full[missing_track_id].index:
    df_full.loc[idx, 'track_id'] = df_full.loc[idx - 1, 'track_id'] + 1


print(df_full.head(100))



0: 352x640 17 players, 1 referee, 63.6ms
Speed: 1.1ms preprocess, 63.6ms inference, 0.5ms postprocess per image at shape (1, 3, 352, 640)

0: 352x640 17 players, 1 referee, 55.5ms
Speed: 1.6ms preprocess, 55.5ms inference, 0.3ms postprocess per image at shape (1, 3, 352, 640)

0: 352x640 17 players, 1 referee, 52.1ms
Speed: 1.1ms preprocess, 52.1ms inference, 0.8ms postprocess per image at shape (1, 3, 352, 640)

0: 352x640 17 players, 1 referee, 62.2ms
Speed: 1.2ms preprocess, 62.2ms inference, 0.3ms postprocess per image at shape (1, 3, 352, 640)

0: 352x640 17 players, 1 referee, 51.1ms
Speed: 1.0ms preprocess, 51.1ms inference, 0.5ms postprocess per image at shape (1, 3, 352, 640)

0: 352x640 17 players, 1 referee, 50.6ms
Speed: 1.0ms preprocess, 50.6ms inference, 0.4ms postprocess per image at shape (1, 3, 352, 640)

0: 352x640 17 players, 1 referee, 52.2ms
Speed: 1.0ms preprocess, 52.2ms inference, 0.4ms postprocess per image at shape (1, 3, 352, 640)

0: 352x640 17 players, 1 r

In [3]:
print(df.head())

   frame  track_id class        x1        y1        x2        y2
0     89        21  ball  0.522335  0.441476  0.531478  0.468149
1     90        21  ball  0.522335  0.441476  0.531478  0.468149
2     90        21  ball  0.522342  0.441466  0.531499  0.468179
3     92        23  ball  0.515200  0.448393  0.525833  0.477309
4     93        23  ball  0.515200  0.448393  0.525833  0.477309


In [10]:
from scipy.interpolate import interp1d

# Find the range of frames
min_frame = df['frame'].min()
max_frame = df['frame'].max()

# Create a DataFrame with all frames in the range
all_frames = pd.DataFrame({'frame': range(min_frame, max_frame + 1)})

# Merge with the original DataFrame to include all frames
df_full = pd.merge(all_frames, df, on='frame', how='left')

# Interpolate the missing values for x1, y1, x2, y2
for col in ['x1', 'y1', 'x2', 'y2']:
    missing = df_full[col].isna()
    df_training = df_full[~missing]
    df_missing = df_full[missing].reset_index(drop=True)
    
    if not df_training.empty:
        f = interp1d(df_training['frame'], df_training[col], fill_value="extrapolate")
        df_full.loc[missing, col] = f(df_missing['frame'])

# Fill the 'class' and 'track_id' columns with the previous values
df_full['class'] = df_full['class'].ffill()
missing_track_id = df_full['track_id'].isna()

# Fill NaN track_id with the previous track_id + 1
for idx in df_full[missing_track_id].index:
    df_full.loc[idx, 'track_id'] = df_full.loc[idx - 1, 'track_id'] + 1


print(df_full.head(100))

    frame  track_id class        x1        y1        x2        y2
0      89      21.0  ball  0.522335  0.441476  0.531478  0.468149
1      90      21.0  ball  0.522335  0.441476  0.531478  0.468149
2      90      21.0  ball  0.522342  0.441466  0.531499  0.468179
3      91      22.0  ball  0.518771  0.444930  0.528666  0.472744
4      92      23.0  ball  0.515200  0.448393  0.525833  0.477309
..    ...       ...   ...       ...       ...       ...       ...
95    171      62.0  ball  0.323142  0.567511  0.334041  0.598223
96    171      62.0  ball  0.323237  0.567404  0.334190  0.598319
97    171      62.0  ball  0.323499  0.567035  0.334267  0.597428
98    171      62.0  ball  0.323578  0.566916  0.334281  0.597121
99    172      62.0  ball  0.322945  0.568068  0.333540  0.597885

[100 rows x 7 columns]


In [None]:
# Calculate previous x and y coordinates for each track_id
df['prev_x'] = df.groupby('track_id')['x'].shift(1).reset_index(drop=True)
df['prev_y'] = df.groupby('track_id')['y'].shift(1).reset_index(drop=True)

# Calculate Euclidean distance between consecutive points
df['distance'] = np.sqrt((df['x'] - df['prev_x'])**2 + (df['y'] - df['prev_y'])**2)

# Fill NaN values in distance (first frame for each track_id)
df['distance'] = df['distance'].fillna(0)

# Calculate total distance traveled and average speed for each player
player_stats = df[df['class'] == 'player'].groupby('track_id').agg(
    total_distance=('distance', 'sum'),
    average_speed=('distance', 'mean')
).reset_index()

# Determine ball possession
ball_df = df[df['class'] == 'ball']
player_df = df[df['class'] == 'player']

# Merge ball and player data on frame
merged_df = pd.merge(ball_df, player_df, on='frame', suffixes=('_ball', '_player'))

# Calculate distance between ball and each player
merged_df['distance_to_ball'] = np.sqrt(
    (merged_df['x_ball'] - merged_df['x_player'])**2 + 
    (merged_df['y_ball'] - merged_df['y_player'])**2
)

# Find the closest player to the ball at each frame
closest_player = merged_df.loc[merged_df.groupby('frame')['distance_to_ball'].idxmin()]
closest_player = closest_player[['frame', 'track_id_player']].rename(columns={'track_id_player': 'track_id'})

# Merge closest player info back to the original DataFrame
df = pd.merge(df, closest_player, on='frame', how='left')
df['ball_possession'] = df['track_id'] == df['track_id_y']

# Clean up the DataFrame
df = df.drop(columns=['prev_x', 'prev_y', 'track_id_y'])

# Save the updated DataFrame to a CSV file
df.to_csv('tracking_data_with_stats_and_possession.csv', index=False)

# Print player stats
print(player_stats)

2. **Statistics** 📈
   - Extract metrics for each player, such as:
     - Distance covered.
     - Ball possession time.
     - Speed and acceleration.