<a href="https://colab.research.google.com/github/distil-comedy/real-time-football-analysis/blob/main/football_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Cell 1: Install Dependencies
!pip install ultralytics roboflow supervision opencv-python pyyaml

import torch
print(f"‚úÖ PyTorch: {torch.__version__}")
print(f"‚úÖ CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"‚úÖ GPU: {torch.cuda.get_device_name(0)}")

Collecting ultralytics
  Downloading ultralytics-8.3.226-py3-none-any.whl.metadata (37 kB)
Collecting roboflow
  Downloading roboflow-1.2.11-py3-none-any.whl.metadata (9.7 kB)
Collecting supervision
  Downloading supervision-0.26.1-py3-none-any.whl.metadata (13 kB)
Collecting ultralytics-thop>=2.0.18 (from ultralytics)
  Downloading ultralytics_thop-2.0.18-py3-none-any.whl.metadata (14 kB)
Collecting idna==3.7 (from roboflow)
  Downloading idna-3.7-py3-none-any.whl.metadata (9.9 kB)
Collecting opencv-python-headless==4.10.0.84 (from roboflow)
  Downloading opencv_python_headless-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)
Collecting pi-heif<2 (from roboflow)
  Downloading pi_heif-1.1.1-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (6.5 kB)
Collecting pillow-avif-plugin<2 (from roboflow)
  Downloading pillow_avif_plugin-1.5.2-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (2.1 kB)
Collecting filetype (from roboflow)
  Downlo

In [4]:
# Cell 1: Explore Your Google Drive Structure
from google.colab import drive
import os

drive.mount('/content/drive')

# Let's see what's actually in your Drive
def explore_drive_structure():
    base_path = "/content/drive/MyDrive"

    print("üîç Exploring your Google Drive structure...")
    print("=" * 50)

    # Check common folder names
    possible_folders = [
        "MYPROJECTS", "MyProjects", "myprojects",
        "football_analysis", "Football", "football"
    ]

    for folder in possible_folders:
        test_path = os.path.join(base_path, folder)
        if os.path.exists(test_path):
            print(f"‚úÖ Found: {test_path}")
            # List contents
            contents = os.listdir(test_path)
            print(f"   Contents: {contents}")

            # Go deeper if it's the main project folder
            for item in contents:
                item_path = os.path.join(test_path, item)
                if os.path.isdir(item_path):
                    print(f"   üìÅ {item}/ ‚Üí {os.listdir(item_path)[:5]}...")  # First 5 items
        else:
            print(f"‚ùå Not found: {test_path}")

explore_drive_structure()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
üîç Exploring your Google Drive structure...
‚ùå Not found: /content/drive/MyDrive/MYPROJECTS
‚úÖ Found: /content/drive/MyDrive/MyProjects
   Contents: ['extract_frames.py', 'labelimg_error.txt', 'football_analysis_dataset.zip', 'football_analysis_dataset', 'data', 'football_dataset']
   üìÅ football_analysis_dataset/ ‚Üí ['data.yaml', 'train.txt', 'labels']...
   üìÅ data/ ‚Üí ['videos', 'frames']...
   üìÅ football_dataset/ ‚Üí ['yolov8n.pt', 'football.yaml', 'verify_dataset.py', 'analyze_dataset.py', 'train_baseline.py']...
‚ùå Not found: /content/drive/MyDrive/myprojects
‚ùå Not found: /content/drive/MyDrive/football_analysis
‚ùå Not found: /content/drive/MyDrive/Football
‚ùå Not found: /content/drive/MyDrive/football


In [5]:
# Cell 2: Search for Football-related Files
def find_football_files():
    base_path = "/content/drive/MyDrive"

    print("\nüîé Searching for football-related files...")
    print("=" * 50)

    # Walk through entire Drive (with limit to avoid timeout)
    football_files = []
    for root, dirs, files in os.walk(base_path, topdown=True):
        # Check if this looks like your project
        if any(keyword in root.lower() for keyword in ['football', 'project', 'yolo', 'dataset']):
            print(f"üìÅ Potential project folder: {root}")
            print(f"   Files: {files[:10]}")  # First 10 files
            print(f"   Subdirs: {dirs[:10]}")  # First 10 subdirectories

        # Look for specific files
        for file in files:
            if any(keyword in file.lower() for keyword in ['football', 'yaml', 'yolo', 'train']):
                full_path = os.path.join(root, file)
                football_files.append(full_path)

        # Limit depth to avoid timeout
        if len(football_files) > 20:
            break

    print(f"\nüéØ Found {len(football_files)} football-related files:")
    for file in football_files[:10]:  # Show first 10
        print(f"   üìÑ {file}")

find_football_files()


üîé Searching for football-related files...
üìÅ Potential project folder: /content/drive/MyDrive/MyProjects
   Files: ['extract_frames.py', 'labelimg_error.txt', 'football_analysis_dataset.zip']
   Subdirs: ['football_analysis_dataset', 'data', 'football_dataset']
üìÅ Potential project folder: /content/drive/MyDrive/MyProjects/football_analysis_dataset
   Files: ['data.yaml', 'train.txt']
   Subdirs: ['labels']
üìÅ Potential project folder: /content/drive/MyDrive/MyProjects/football_analysis_dataset/labels
   Files: []
   Subdirs: ['train']
üìÅ Potential project folder: /content/drive/MyDrive/MyProjects/football_analysis_dataset/labels/train
   Files: ['frame_05300.txt', 'frame_06550.txt', 'frame_03500.txt', 'frame_12500.txt', 'frame_08000.txt', 'frame_00350.txt', 'frame_10250.txt', 'frame_10700.txt', 'frame_08350.txt', 'frame_11950.txt']
   Subdirs: []
üìÅ Potential project folder: /content/drive/MyDrive/MyProjects/data
   Files: []
   Subdirs: ['videos', 'frames']
üìÅ Potenti

In [6]:
# Cell 3: Copy Files Based on Actual Structure
def copy_correct_structure():
    """Copy files based on your actual Google Drive structure"""

    # UPDATE THESE PATHS based on what we found in previous cells
    actual_drive_path = "/content/drive/MyDrive/MyProjects"  # ‚ö†Ô∏è UPDATE THIS!

    if not os.path.exists(actual_drive_path):
        print(f"‚ùå Path not found: {actual_drive_path}")
        print("Please update the path above based on exploration results")
        return False

    print(f"üìÅ Found your project at: {actual_drive_path}")

    # Create Colab project structure
    colab_project = "/content/football_analysis"
    os.makedirs(colab_project, exist_ok=True)
    os.chdir(colab_project)

    # Map what to copy (these are common structures)
    copy_mappings = [
        # Source in Drive ‚Üí Destination in Colab
        (f"{actual_drive_path}/football_dataset", f"{colab_project}/football_dataset"),
        (f"{actual_drive_path}/data", f"{colab_project}/data"),
        (f"{actual_drive_path}/videos", f"{colab_project}/videos"),
        (f"{actual_drive_path}/frames", f"{colab_project}/frames"),
    ]

    # Also try to find files directly in MYPROJECTS
    direct_files = [
        'football.yaml', 'extract_frames.py', 'analyze_dataset.py',
        'verify_dataset.py', 'yolov8n.pt'
    ]

    successful_copies = 0

    # Copy folder structures
    for source, destination in copy_mappings:
        if os.path.exists(source):
            # Use shutil.copytree with dirs_exist_ok for Python 3.8+
            import shutil
            if os.path.isdir(source):
                shutil.copytree(source, destination, dirs_exist_ok=True)
                print(f"‚úÖ Copied folder: {source} ‚Üí {destination}")
                successful_copies += 1

    # Copy individual files
    for file in direct_files:
        source_file = os.path.join(actual_drive_path, file)
        if os.path.exists(source_file):
            shutil.copy2(source_file, colab_project)
            print(f"‚úÖ Copied file: {file}")
            successful_copies += 1

    # If nothing worked, try listing what's actually there
    if successful_copies == 0:
        print("\nü§î Let's see what's actually in your MYPROJECTS folder:")
        all_items = os.listdir(actual_drive_path)
        for item in all_items:
            item_path = os.path.join(actual_drive_path, item)
            item_type = "üìÅ Folder" if os.path.isdir(item_path) else "üìÑ File"
            print(f"   {item_type}: {item}")

    return successful_copies > 0

# Run this after you've identified the correct path
copy_correct_structure()

üìÅ Found your project at: /content/drive/MyDrive/MyProjects
‚úÖ Copied folder: /content/drive/MyDrive/MyProjects/football_dataset ‚Üí /content/football_analysis/football_dataset
‚úÖ Copied folder: /content/drive/MyDrive/MyProjects/data ‚Üí /content/football_analysis/data
‚úÖ Copied file: extract_frames.py


True

In [7]:
# Cell 4: Verify Dataset in Colab
import os
import yaml
from pathlib import Path

def verify_colab_dataset():
    print("üîç Verifying dataset structure in Colab...")
    print("=" * 50)

    colab_project = "/content/football_analysis"
    dataset_path = f"{colab_project}/football_dataset"

    # Check if dataset exists
    if not os.path.exists(dataset_path):
        print("‚ùå football_dataset not found in Colab!")
        return False

    print(f"‚úÖ football_dataset found at: {dataset_path}")

    # Check directory structure
    required_dirs = [
        "images/train",
        "images/val",
        "labels/train",
        "labels/val"
    ]

    all_good = True
    for dir_path in required_dirs:
        full_path = os.path.join(dataset_path, dir_path)
        if os.path.exists(full_path):
            file_count = len(os.listdir(full_path))
            print(f"‚úÖ {dir_path}: {file_count} files")
        else:
            print(f"‚ùå Missing: {dir_path}")
            all_good = False

    # Check and update YAML file for Colab
    yaml_path = os.path.join(dataset_path, "football.yaml")
    if os.path.exists(yaml_path):
        with open(yaml_path, 'r') as f:
            config = yaml.safe_load(f)

        print(f"\nüìä Original YAML config:")
        print(f"   Path: {config.get('path', 'Not set')}")
        print(f"   Train: {config.get('train', 'Not set')}")
        print(f"   Val: {config.get('val', 'Not set')}")
        print(f"   Classes: {config.get('nc', 'Not set')}")

        # Update paths for Colab
        config['path'] = dataset_path
        config['train'] = 'images/train'
        config['val'] = 'images/val'

        with open(yaml_path, 'w') as f:
            yaml.dump(config, f)

        print(f"\n‚úÖ Updated YAML config for Colab:")
        print(f"   Path: {config['path']}")
        print(f"   Train: {config['train']}")
        print(f"   Val: {config['val']}")

    else:
        print("‚ùå football.yaml not found!")
        all_good = False

    return all_good

# Run verification
if verify_colab_dataset():
    print("\nüéâ Dataset verification successful! Ready for training.")
else:
    print("\n‚ùå Dataset has issues that need to be fixed.")

üîç Verifying dataset structure in Colab...
‚úÖ football_dataset found at: /content/football_analysis/football_dataset
‚úÖ images/train: 236 files
‚úÖ images/val: 36 files
‚úÖ labels/train: 236 files
‚úÖ labels/val: 36 files

üìä Original YAML config:
   Path: E:/MyProjects/football_dataset
   Train: images/train
   Val: images/val
   Classes: 8

‚úÖ Updated YAML config for Colab:
   Path: /content/football_analysis/football_dataset
   Train: images/train
   Val: images/val

üéâ Dataset verification successful! Ready for training.


In [8]:
# Cell 5: Analyze Dataset
def analyze_colab_dataset():
    import yaml
    from collections import Counter

    dataset_path = "/content/football_analysis/football_dataset"
    yaml_path = os.path.join(dataset_path, "football.yaml")

    with open(yaml_path, 'r') as f:
        config = yaml.safe_load(f)

    class_names = config['names']

    print("üìä Analyzing dataset distribution...")
    print("=" * 50)

    # Analyze training labels
    labels_path = os.path.join(dataset_path, "labels/train")
    class_counts = Counter()
    total_objects = 0

    if os.path.exists(labels_path):
        label_files = [f for f in os.listdir(labels_path) if f.endswith('.txt')]
        print(f"Found {len(label_files)} label files")

        for label_file in label_files[:50]:  # Check first 50 files
            with open(os.path.join(labels_path, label_file), 'r') as f:
                for line in f:
                    if line.strip():
                        try:
                            class_id = int(line.split()[0])
                            class_counts[class_id] += 1
                            total_objects += 1
                        except ValueError:
                            continue

        print(f"\nüìà Class Distribution (from {total_objects} objects):")
        for class_id, count in class_counts.most_common():
            class_name = class_names.get(class_id, f"Unknown_{class_id}")
            percentage = (count / total_objects) * 100 if total_objects > 0 else 0
            print(f"   {class_name} (ID {class_id}): {count} objects ({percentage:.1f}%)")
    else:
        print("‚ùå Labels path not found")

analyze_colab_dataset()

üìä Analyzing dataset distribution...
Found 236 label files

üìà Class Distribution (from 1331 objects):
   player (ID 0): 963 objects (72.4%)
   penalty_spot (ID 6): 159 objects (11.9%)
   goalkeeper (ID 1): 94 objects (7.1%)
   referee (ID 2): 50 objects (3.8%)
   ball (ID 3): 24 objects (1.8%)
   center_circle (ID 7): 18 objects (1.4%)
   goal_post (ID 4): 17 objects (1.3%)
   corner_flag (ID 5): 6 objects (0.5%)


In [9]:
# Cell 6: Install Dependencies
!pip install ultralytics roboflow supervision opencv-python pyyaml

print("‚úÖ All dependencies installed!")

‚úÖ All dependencies installed!


In [10]:
# Cell 1: Diagnose Dataset Problems
import os
import cv2
import numpy as np
from pathlib import Path

def diagnose_dataset_issues():
    print("üîç Diagnosing dataset issues...")
    print("=" * 50)

    dataset_path = "/content/football_analysis/football_dataset"

    # Check image files
    train_images_path = os.path.join(dataset_path, "images/train")
    val_images_path = os.path.join(dataset_path, "images/val")

    corrupt_images = []
    valid_images = []

    # Check training images
    if os.path.exists(train_images_path):
        print(f"üìÅ Checking {len(os.listdir(train_images_path))} training images...")
        for img_file in os.listdir(train_images_path):
            if img_file.lower().endswith(('.jpg', '.jpeg', '.png')):
                img_path = os.path.join(train_images_path, img_file)
                try:
                    img = cv2.imread(img_path)
                    if img is not None and img.size > 0:
                        valid_images.append(img_path)
                    else:
                        corrupt_images.append(img_path)
                except Exception as e:
                    corrupt_images.append(img_path)

    print(f"‚úÖ Valid images: {len(valid_images)}")
    print(f"‚ùå Corrupt images: {len(corrupt_images)}")

    if corrupt_images:
        print("\nüö® Corrupt images found:")
        for corrupt in corrupt_images[:10]:  # Show first 10
            print(f"   {os.path.basename(corrupt)}")

    # Check label files
    train_labels_path = os.path.join(dataset_path, "labels/train")
    if os.path.exists(train_labels_path):
        print(f"\nüìù Checking {len(os.listdir(train_labels_path))} label files...")

        valid_labels = 0
        invalid_labels = 0

        for label_file in os.listdir(train_labels_path):
            if label_file.endswith('.txt'):
                label_path = os.path.join(train_labels_path, label_file)
                try:
                    with open(label_path, 'r') as f:
                        content = f.read().strip()
                        if content:  # Check if file has content
                            # Validate YOLO format
                            lines = content.split('\n')
                            valid_format = True
                            for line in lines:
                                parts = line.strip().split()
                                if len(parts) != 5:
                                    valid_format = False
                                    break
                                # Check if values are valid
                                try:
                                    class_id = int(parts[0])
                                    x_center = float(parts[1])
                                    y_center = float(parts[2])
                                    width = float(parts[3])
                                    height = float(parts[4])

                                    if not (0 <= x_center <= 1 and 0 <= y_center <= 1 and
                                            0 <= width <= 1 and 0 <= height <= 1):
                                        valid_format = False
                                        break
                                except ValueError:
                                    valid_format = False
                                    break

                            if valid_format:
                                valid_labels += 1
                            else:
                                invalid_labels += 1
                        else:
                            invalid_labels += 1
                except Exception as e:
                    invalid_labels += 1

        print(f"‚úÖ Valid label files: {valid_labels}")
        print(f"‚ùå Invalid label files: {invalid_labels}")

    return valid_images, corrupt_images

valid_imgs, corrupt_imgs = diagnose_dataset_issues()

üîç Diagnosing dataset issues...
üìÅ Checking 236 training images...
‚úÖ Valid images: 236
‚ùå Corrupt images: 0

üìù Checking 236 label files...
‚úÖ Valid label files: 0
‚ùå Invalid label files: 236


In [11]:
# Cell 2: Analyze Label Format Problems
def analyze_label_problems():
    print("üîç Analyzing label format problems...")
    print("=" * 50)

    dataset_path = "/content/football_analysis/football_dataset"
    train_labels_path = os.path.join(dataset_path, "labels/train")

    if not os.path.exists(train_labels_path):
        print("‚ùå Labels path not found!")
        return

    problem_types = {
        'empty_files': 0,
        'wrong_columns': 0,
        'invalid_numbers': 0,
        'out_of_bounds': 0,
        'other_issues': 0
    }

    print("üìã Sample of problematic labels:")
    print("-" * 50)

    sample_count = 0
    for label_file in os.listdir(train_labels_path)[:10]:  # Check first 10 files
        if label_file.endswith('.txt'):
            label_path = os.path.join(train_labels_path, label_file)

            try:
                with open(label_path, 'r') as f:
                    content = f.read().strip()

                if not content:
                    problem_types['empty_files'] += 1
                    if sample_count < 3:
                        print(f"üìÑ {label_file}: EMPTY FILE")
                        sample_count += 1
                    continue

                lines = content.split('\n')
                for i, line in enumerate(lines):
                    line = line.strip()
                    if not line:
                        continue

                    parts = line.split()

                    # Check number of columns
                    if len(parts) != 5:
                        problem_types['wrong_columns'] += 1
                        if sample_count < 3:
                            print(f"üìÑ {label_file} line {i+1}: WRONG COLUMNS ({len(parts)} instead of 5)")
                            print(f"   Content: {line}")
                            sample_count += 1
                        break

                    # Check if values are numbers
                    try:
                        class_id = int(parts[0])
                        x_center = float(parts[1])
                        y_center = float(parts[2])
                        width = float(parts[3])
                        height = float(parts[4])
                    except ValueError:
                        problem_types['invalid_numbers'] += 1
                        if sample_count < 3:
                            print(f"üìÑ {label_file} line {i+1}: INVALID NUMBERS")
                            print(f"   Content: {line}")
                            sample_count += 1
                        break

                    # Check if values are within bounds
                    if not (0 <= x_center <= 1 and 0 <= y_center <= 1 and 0 <= width <= 1 and 0 <= height <= 1):
                        problem_types['out_of_bounds'] += 1
                        if sample_count < 3:
                            print(f"üìÑ {label_file} line {i+1}: VALUES OUT OF BOUNDS (0-1)")
                            print(f"   Values: {x_center}, {y_center}, {width}, {height}")
                            sample_count += 1
                        break

            except Exception as e:
                problem_types['other_issues'] += 1
                if sample_count < 3:
                    print(f"üìÑ {label_file}: OTHER ERROR - {e}")
                    sample_count += 1

    print("\nüìä Problem Summary:")
    print("=" * 50)
    for problem, count in problem_types.items():
        print(f"   {problem}: {count} files")

    return problem_types

problem_types = analyze_label_problems()

üîç Analyzing label format problems...
üìã Sample of problematic labels:
--------------------------------------------------
üìÑ frame_09950.txt line 5: WRONG COLUMNS (6 instead of 5)
   Content: 2 0.671130 0.482329 0.010958 0.019046 0
üìÑ frame_02250.txt line 2: WRONG COLUMNS (6 instead of 5)
   Content: 2 0.462690 0.438116 0.008818 0.014935 0
üìÑ frame_09200.txt line 1: WRONG COLUMNS (6 instead of 5)
   Content: 2 0.564224 0.564343 0.013406 0.023352 0

üìä Problem Summary:
   empty_files: 0 files
   wrong_columns: 10 files
   invalid_numbers: 0 files
   out_of_bounds: 0 files
   other_issues: 0 files


In [12]:
# Cell 3 FIXED: Remove the 6th Column from Labels
def fix_extra_columns():
    print("üõ†Ô∏è Fixing extra columns in labels...")
    print("=" * 50)

    dataset_path = "/content/football_analysis/football_dataset"
    train_labels_path = os.path.join(dataset_path, "labels/train")
    val_labels_path = os.path.join(dataset_path, "labels/val")

    fixed_count = 0

    def fix_labels_in_folder(labels_folder, folder_name):
        nonlocal fixed_count
        if not os.path.exists(labels_folder):
            print(f"‚ùå {folder_name} labels path not found!")
            return

        for label_file in os.listdir(labels_folder):
            if not label_file.endswith('.txt'):
                continue

            label_path = os.path.join(labels_folder, label_file)

            try:
                with open(label_path, 'r') as f:
                    content = f.read().strip()

                if not content:
                    continue

                lines = content.split('\n')
                fixed_lines = []

                for line in lines:
                    line = line.strip()
                    if not line:
                        continue

                    parts = line.split()

                    # Fix: Remove the 6th column (keep first 5 columns)
                    if len(parts) == 6:
                        # Keep only class_id, x_center, y_center, width, height
                        fixed_parts = parts[:5]
                        fixed_line = ' '.join(fixed_parts)
                        fixed_lines.append(fixed_line)
                        print(f"‚úÖ Fixed {label_file}: removed 6th column '{parts[5]}'")
                    elif len(parts) == 5:
                        # Already correct format
                        fixed_lines.append(line)
                    else:
                        # Wrong number of columns, skip this line
                        print(f"‚ö†Ô∏è Skipping line in {label_file}: {len(parts)} columns")
                        continue

                # Write back fixed lines
                if fixed_lines:
                    with open(label_path, 'w') as f:
                        f.write('\n'.join(fixed_lines))
                    fixed_count += 1

            except Exception as e:
                print(f"‚ùå Error processing {label_file}: {e}")

    # Fix both train and validation labels
    fix_labels_in_folder(train_labels_path, "Training")
    fix_labels_in_folder(val_labels_path, "Validation")

    print(f"\n‚úÖ Fixed {fixed_count} label files")
    return fixed_count

fixed_count = fix_extra_columns()

üõ†Ô∏è Fixing extra columns in labels...
‚úÖ Fixed frame_09950.txt: removed 6th column '0'
‚úÖ Fixed frame_09950.txt: removed 6th column '1'
‚úÖ Fixed frame_09950.txt: removed 6th column '2'
‚úÖ Fixed frame_09950.txt: removed 6th column '3'
‚úÖ Fixed frame_09950.txt: removed 6th column '4'
‚úÖ Fixed frame_09950.txt: removed 6th column '5'
‚úÖ Fixed frame_09950.txt: removed 6th column '6'
‚úÖ Fixed frame_09950.txt: removed 6th column '7'
‚úÖ Fixed frame_09950.txt: removed 6th column '8'
‚úÖ Fixed frame_09950.txt: removed 6th column '9'
‚úÖ Fixed frame_09950.txt: removed 6th column '10'
‚úÖ Fixed frame_09950.txt: removed 6th column '11'
‚úÖ Fixed frame_09950.txt: removed 6th column '12'
‚úÖ Fixed frame_09950.txt: removed 6th column '13'
‚úÖ Fixed frame_09950.txt: removed 6th column '14'
‚úÖ Fixed frame_09950.txt: removed 6th column '15'
‚úÖ Fixed frame_09950.txt: removed 6th column '16'
‚úÖ Fixed frame_09950.txt: removed 6th column '17'
‚úÖ Fixed frame_09950.txt: removed 6th column '19'

In [13]:
# Cell 4: Verify Fixed Labels
def verify_fixed_labels():
    print("‚úÖ Verifying fixed labels...")
    print("=" * 50)

    dataset_path = "/content/football_analysis/football_dataset"
    train_labels_path = os.path.join(dataset_path, "labels/train")

    valid_count = 0
    invalid_count = 0
    column_stats = {}

    if os.path.exists(train_labels_path):
        for label_file in os.listdir(train_labels_path):
            if label_file.endswith('.txt'):
                label_path = os.path.join(train_labels_path, label_file)

                try:
                    with open(label_path, 'r') as f:
                        content = f.read().strip()

                    if not content:
                        invalid_count += 1
                        continue

                    lines = content.split('\n')
                    all_valid = True

                    for line_num, line in enumerate(lines):
                        parts = line.strip().split()
                        num_columns = len(parts)

                        # Track column statistics
                        if num_columns not in column_stats:
                            column_stats[num_columns] = 0
                        column_stats[num_columns] += 1

                        # Check if valid
                        if num_columns != 5:
                            all_valid = False
                            print(f"‚ùå {label_file} line {line_num+1}: {num_columns} columns (should be 5)")
                            break

                        try:
                            class_id = int(parts[0])
                            x_center = float(parts[1])
                            y_center = float(parts[2])
                            width = float(parts[3])
                            height = float(parts[4])

                            if not (0 <= class_id <= 7 and 0 <= x_center <= 1 and
                                    0 <= y_center <= 1 and 0 <= width <= 1 and 0 <= height <= 1):
                                all_valid = False
                                print(f"‚ùå {label_file} line {line_num+1}: values out of bounds")
                                break

                        except ValueError:
                            all_valid = False
                            print(f"‚ùå {label_file} line {line_num+1}: invalid numbers")
                            break

                    if all_valid:
                        valid_count += 1
                    else:
                        invalid_count += 1

                except Exception as e:
                    print(f"‚ùå Error reading {label_file}: {e}")
                    invalid_count += 1

    print(f"\nüìä Final Label Status:")
    print(f"   ‚úÖ Valid labels: {valid_count}")
    print(f"   ‚ùå Invalid labels: {invalid_count}")

    if column_stats:
        print(f"\nüìà Column Statistics:")
        for cols, count in sorted(column_stats.items()):
            print(f"   {cols} columns: {count} lines")

    success_rate = (valid_count/(valid_count+invalid_count))*100 if (valid_count+invalid_count) > 0 else 0
    print(f"   üìä Success rate: {success_rate:.1f}%")

    return valid_count > 0 and invalid_count == 0

is_ready = verify_fixed_labels()

‚úÖ Verifying fixed labels...

üìä Final Label Status:
   ‚úÖ Valid labels: 236
   ‚ùå Invalid labels: 0

üìà Column Statistics:
   5 columns: 6255 lines
   üìä Success rate: 100.0%


In [None]:
# Cell 5: Quick Training Test
def quick_training_test():
    if not is_ready:
        print("‚ùå Labels still need fixing. Please check the errors above.")
        return False

    print("üöÄ Starting quick training test...")
    print("=" * 50)

    from ultralytics import YOLO
    import torch

    print(f"PyTorch version: {torch.__version__}")
    print(f"CUDA available: {torch.cuda.is_available()}")

    try:
        # Load pre-trained model
        model = YOLO('yolov8n.pt')
        print("‚úÖ Model loaded successfully")

        # Quick test with minimal epochs
        print("Starting training test (3 epochs)...")
        results = model.train(
            data='/content/football_analysis/football_dataset/football.yaml',
            epochs=3,           # Very few epochs for testing
            imgsz=640,
            batch=4,            # Small batch size
            device='cpu',       # Use CPU to avoid GPU issues
            workers=1,          # Single worker for stability
            verbose=True,       # See detailed output
            save=False,         # Don't save for quick test
            exist_ok=True,      # Overwrite if exists
        )

        print("üéâ SUCCESS! Training completed with fixed labels!")
        print("You can now run full training with more epochs.")
        return True

    except Exception as e:
        print(f"‚ùå Training failed: {e}")
        print("\nüîß Debugging info:")

        # Check YAML file
        yaml_path = '/content/football_analysis/football_dataset/football.yaml'
        if os.path.exists(yaml_path):
            import yaml
            with open(yaml_path, 'r') as f:
                config = yaml.safe_load(f)
            print(f"YAML config: {config}")
        else:
            print("YAML file not found!")

        return False

# Run the test
success = quick_training_test()

üöÄ Starting quick training test...
Creating new Ultralytics Settings v0.0.6 file ‚úÖ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
PyTorch version: 2.8.0+cu126
CUDA available: False
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt': 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 6.2MB 107.1MB/s 0.1s
‚úÖ Model loaded successfully
Starting training test (3 epochs)...
Ultralytics 8.3.226 üöÄ Python-3.12.12 torch-2.8.0+cu126 CPU (Intel Xeon CPU @ 2.20GHz)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=4, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix

In [None]:
# Cell 6: Full Model Training
def full_training():
    print("üöÄ Starting FULL YOLOv8 training...")
    print("=" * 50)

    from ultralytics import YOLO
    import torch

    print(f"üíª Device: {'GPU' if torch.cuda.is_available() else 'CPU'}")

    # Load the model that already learned from our quick test
    model = YOLO('yolov8n.pt')

    # Full training with optimized parameters
    results = model.train(
        data='/content/football_analysis/football_dataset/football.yaml',
        epochs=100,          # Full training
        imgsz=640,
        batch=16 if torch.cuda.is_available() else 8,  # Larger batches if GPU available
        patience=25,         # Early stopping patience
        save=True,
        device='cpu',        # Using CPU (you can change to 0 if GPU available)
        workers=2,
        lr0=0.01,           # Learning rate
        weight_decay=0.0005,
        augment=True,        # Data augmentation
        degrees=10,          # Rotation augmentation
        translate=0.1,       # Translation augmentation
        scale=0.5,           # Scale augmentation
        fliplr=0.5,          # Horizontal flip
        mosaic=1.0,          # Mosaic augmentation
        mixup=0.1,           # Mixup augmentation
        copy_paste=0.1,      # Copy-paste augmentation
        erasing=0.4,         # Random erasing
        verbose=True,
        pretrained=True,
        cos_lr=True,         # Cosine learning rate scheduler
        close_mosaic=10,     # Disable mosaic last epochs
    )

    print("‚úÖ Full training completed!")
    return results

# Start full training
full_training()