### **Phase 1: Player Detection and Initial Analysis** 🏃‍♂️⚽️

1. **YOLOv11 Fine-Tuning for Player Detection** 🎯
   - Adapt YOLOv11 for detecting football players on the field.
   - **Objective**: Ensure robust detection of players, even under challenging scenarios like crowded scenes or varying lighting conditions.

In [None]:
import torch
print(torch.backends.mps.is_available())
print(torch.cuda.is_available())

False
True


In [10]:
!yolo task=detect mode=train model="/content/drive/MyDrive/FootCVision/utils/yolo11s.pt" data="/content/drive/MyDrive/dataset/data.yaml" epochs=10 imgsz=640 device=0

Ultralytics 8.3.57 🚀 Python-3.10.12 torch-2.5.1+cu121 CUDA:0 (Tesla T4, 15102MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=/content/drive/MyDrive/FootCVision/utils/yolo11s.pt, data=/content/drive/MyDrive/dataset/data.yaml, epochs=10, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=0, workers=8, project=None, name=train2, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=Fa

1.2 **Inference on Youtube Video** !

In [8]:
!yolo task=detect mode=predict model="/Users/alyazouzou/Desktop/FootCVision/phase1/runs/detect/train/weights/best.pt" source="/Users/alyazouzou/Desktop/FootCVision/phase1/vids/demo_2.mov" device=mps

Ultralytics 8.3.57 🚀 Python-3.9.21 torch-2.5.1 MPS (Apple M3)
YOLO11s summary (fused): 238 layers, 9,414,348 parameters, 0 gradients, 21.3 GFLOPs

video 1/1 (frame 1/3094) /Users/alyazouzou/Desktop/FootCVision/phase1/vids/demo_2.mov: 384x640 2 goalkeepers, 16 players, 204.8ms
video 1/1 (frame 2/3094) /Users/alyazouzou/Desktop/FootCVision/phase1/vids/demo_2.mov: 384x640 2 goalkeepers, 16 players, 23.3ms
video 1/1 (frame 3/3094) /Users/alyazouzou/Desktop/FootCVision/phase1/vids/demo_2.mov: 384x640 2 goalkeepers, 16 players, 14.0ms
video 1/1 (frame 4/3094) /Users/alyazouzou/Desktop/FootCVision/phase1/vids/demo_2.mov: 384x640 2 goalkeepers, 16 players, 15.3ms
video 1/1 (frame 5/3094) /Users/alyazouzou/Desktop/FootCVision/phase1/vids/demo_2.mov: 384x640 2 goalkeepers, 16 players, 14.1ms
video 1/1 (frame 6/3094) /Users/alyazouzou/Desktop/FootCVision/phase1/vids/demo_2.mov: 384x640 2 goalkeepers, 17 players, 17.1ms
video 1/1 (frame 7/3094) /Users/alyazouzou/Desktop/FootCVision/phase1/vids/dem

2. **Two Approaches for Team Differentiation** 📊
   - **Color Histograms for Team Identification**:
     - Extract **color histograms** from bounding box regions of detected players.
     - Cluster players into teams based on dominant uniform colors.
     - **Objective**: Provide a visually interpretable method for distinguishing teams based on color features.

In [13]:
import cv2
import numpy as np
from sklearn.cluster import KMeans
from ultralytics import YOLO

# Function to extract color histograms from bounding boxes
def extract_color_histogram(frame, bbox):
    x1, y1, x2, y2 = bbox  # Coordinates for top-left (x1, y1) and bottom-right (x2, y2)
    
    # Crop the bounding box region of the player
    player_roi = frame[int(y1):int(y2), int(x1):int(x2)]
    
    # Convert to HSV for better color representation
    player_roi_hsv = cv2.cvtColor(player_roi, cv2.COLOR_BGR2HSV)
    
    # Compute color histogram (Hue, Saturation, and Value)
    hist = cv2.calcHist([player_roi_hsv], [0, 1], None, [8, 8], [0, 180, 0, 256])
    cv2.normalize(hist, hist)  # Normalize histogram to get uniform scale
    return hist.flatten()  # Flatten for clustering

# Function to detect teams using color histograms
def detect_teams_by_color(frame, results):
    team_histograms = []
    bboxes = []
    
    # Extract bounding boxes and color histograms
    for result in results:
        for bbox in result.boxes.xyxy:  # Bounding boxes in the form [x1, y1, x2, y2]
            x1, y1, x2, y2 = bbox[0].item(), bbox[1].item(), bbox[2].item(), bbox[3].item()
            hist = extract_color_histogram(frame, [x1, y1, x2, y2])
            team_histograms.append(hist)
            bboxes.append([x1, y1, x2, y2])
    
    # Use KMeans clustering to differentiate between teams based on color histograms
    kmeans = KMeans(n_clusters=2, random_state=0)
    labels = kmeans.fit_predict(team_histograms)  # Cluster players into 2 teams
    
    return labels, bboxes

# Example for inference and team differentiation
def perform_inference_and_team_detection(video_path, model_path):
    # Load the YOLOv5 model
    model = YOLO(model_path)
    
    # Open video file
    cap = cv2.VideoCapture(video_path)
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        # Run inference on the frame
        results = model.predict(source=frame)
        
        # Detect teams based on color histograms
        labels, bboxes = detect_teams_by_color(frame, results)
        
        # Draw bounding boxes and label teams
        for i, label in enumerate(labels):
            x1, y1, x2, y2 = bboxes[i]
            color = (0, 255, 0) if label == 0 else (0, 0, 255)  # Green for team 1, Red for team 2
            cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), color, 2)  # Draw bounding box
            cv2.putText(frame, f"Team {label + 1}", (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)
        
        # Show the frame with bounding boxes and labels
        cv2.imshow('Frame', frame)
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    
    cap.release()
    cv2.destroyAllWindows()

# Example usage:
video_path = '/Users/alyazouzou/Desktop/FootCVision/phase1/vids/demo_2.mov'
model_path = '/Users/alyazouzou/Desktop/FootCVision/phase1/runs/detect/train/weights/best.pt'
perform_inference_and_team_detection(video_path, model_path)



0: 384x640 2 goalkeepers, 16 players, 58.0ms
Speed: 1.2ms preprocess, 58.0ms inference, 0.4ms postprocess per image at shape (1, 3, 384, 640)
[ultralytics.engine.results.Results object with attributes:

boxes: ultralytics.engine.results.Boxes object
keypoints: None
masks: None
names: {0: 'ball', 1: 'goalkeeper', 2: 'player', 3: 'referee'}
obb: None
orig_img: array([[[ 23,  12,  25],
        [ 25,  15,  27],
        [ 22,  11,  24],
        ...,
        [ 47,  34,  53],
        [ 53,  40,  59],
        [ 44,  31,  49]],

       [[ 30,  19,  32],
        [ 33,  23,  35],
        [ 32,  22,  34],
        ...,
        [ 61,  48,  67],
        [ 69,  56,  75],
        [ 55,  42,  61]],

       [[ 28,  18,  31],
        [ 32,  22,  34],
        [ 33,  23,  35],
        ...,
        [ 67,  52,  70],
        [ 73,  58,  76],
        [ 54,  39,  58]],

       ...,

       [[ 42, 107,  79],
        [ 56, 120,  93],
        [ 49, 122,  93],
        ...,
        [ 44, 127,  95],
        [  5,  79

AttributeError: 'list' object has no attribute 'xywh'

   - **K-Means Clustering for Team Analysis**:
     - Use player positions (bounding box coordinates) and spatial distributions to cluster players into two teams.
     - **Objective**: Offer a complementary approach to visualizing teams, focusing on spatial and positional analysis.

In [None]:
import numpy as np
from sklearn.cluster import KMeans

# Function to extract player positions (bounding box coordinates)
def extract_player_positions(results):
    positions = []
    for bbox in results.xywh[0]:  # Iterate over all detections
        x, y, w, h = bbox  # Extract the center of the bounding box
        center_x = x + w / 2
        center_y = y + h / 2
        positions.append([center_x, center_y])  # Use center of bounding box
    return np.array(positions)

# Function to detect teams based on player positions
def detect_teams_by_position(frame, results):
    # Extract player positions
    player_positions = extract_player_positions(results)
    
    # Apply KMeans clustering to the player positions (spatial clustering)
    kmeans = KMeans(n_clusters=2, random_state=0)  # Assume two teams
    labels = kmeans.fit_predict(player_positions)  # Assign players to teams
    
    return labels

# Example of team detection based on player positions
def perform_inference_and_team_analysis(video_path, model_path):
    model = YOLO(model_path)
    cap = cv2.VideoCapture(video_path)
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        results = model(frame)
        
        # Detect teams based on player positions
        labels = detect_teams_by_position(frame, results)
        bboxes = results.xywh[0]  # Get bounding box coordinates
        
        # Draw bounding boxes and label teams
        for i, label in enumerate(labels):
            x, y, w, h = bboxes[i]
            color = (0, 255, 0) if label == 0 else (0, 0, 255)  # Green for team 1, Red for team 2
            cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)  # Draw bounding box
            cv2.putText(frame, f"Team {label + 1}", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)
        
        # Show the frame with bounding boxes and labels
        cv2.imshow('Frame', frame)
        
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    
    cap.release()
    cv2.destroyAllWindows()

# Example usage:
video_path = '/path/to/video.mp4'
model_path = '/path/to/your/model.pt'
perform_inference_and_team_analysis(video_path, model_path)


3. **Conformal Object Detection with puncc library** 📏

In [None]:
import numpy as np
import cv2
from ultralytics import YOLO
from puncc import ConformalPrediction

# Load the YOLOv5 model (or YOLOv11 model)
model = YOLO('/path/to/your/model.pt')  # Path to your fine-tuned YOLO model

# Function to run inference and apply conformal prediction
def conformal_object_detection(frame, model, alpha=0.1):
    # Run YOLO inference on the frame
    results = model(frame)
    
    # Extract the bounding boxes, labels, and confidence scores from the results
    bboxes = results.xywh[0][:, :-1]  # Bounding boxes
    confidences = results.xywh[0][:, -1]  # Confidence scores
    labels = results.names  # Class names
    
    # Create residuals based on the prediction error (here we simply use confidence as an example)
    residuals = confidences  # In a more advanced setup, you might calculate residuals based on bounding box regression errors

    # Apply conformal prediction using puncc
    conformal_predictor = ConformalPrediction(alpha=alpha)  # alpha is the confidence level (e.g., 0.1 for 90% confidence)
    prediction_sets = conformal_predictor.predict(bboxes, residuals)

    # Display the results with prediction sets
    for i, bbox in enumerate(bboxes):
        x, y, w, h = bbox
        label = labels[i]
        confidence = confidences[i]
        prediction_set = prediction_sets[i]  # Get the prediction set for the current bounding box

        # Draw bounding box and label
        color = (0, 255, 0)  # Green color for detection
        cv2.rectangle(frame, (int(x - w / 2), int(y - h / 2)), (int(x + w / 2), int(y + h / 2)), color, 2)
        cv2.putText(frame, f"{label}: {confidence:.2f}", (int(x - w / 2), int(y - h / 2) - 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.9, color, 2)

        # Display the confidence interval (prediction set)
        prediction_text = f"Prediction Set: {prediction_set}"
        cv2.putText(frame, prediction_text, (int(x - w / 2), int(y + h / 2) + 10),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2)

    # Return the processed frame
    return frame

# Example usage:
video_path = '/path/to/video.mp4'
cap = cv2.VideoCapture(video_path)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Apply conformal prediction for object detection
    frame_with_detections = conformal_object_detection(frame, model, alpha=0.1)

    # Display the frame with bounding boxes and prediction sets
    cv2.imshow('Conformal Object Detection', frame_with_detections)

    # Break loop on 'q' press
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()