# SoccerNet SynLoc: Setup and Data Exploration

This notebook covers:
1. Environment setup and GPU detection
2. Package installation
3. SoccerNet data download
4. Data exploration and visualization

## 1. Environment Setup

In [None]:
# Check if running in Colab
import sys
IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    print("Running in Google Colab")
else:
    print("Running locally")

In [None]:
# GPU Detection
import torch

def detect_gpu():
    """Detect available GPU and print info."""
    if not torch.cuda.is_available():
        print("No GPU available. Training will be slow.")
        return None
    
    gpu_name = torch.cuda.get_device_name(0)
    gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1e9
    
    print(f"GPU: {gpu_name}")
    print(f"Memory: {gpu_memory:.1f} GB")
    
    # Recommend batch size based on GPU
    if 'T4' in gpu_name:
        print("\nRecommended settings for T4:")
        print("  - Batch size: 8-16")
        print("  - Input size: 640x640")
        print("  - Model: tiny or s")
    elif 'V100' in gpu_name:
        print("\nRecommended settings for V100:")
        print("  - Batch size: 16-32")
        print("  - Input size: 640x640 or 960x960")
        print("  - Model: s or m")
    elif 'A100' in gpu_name:
        print("\nRecommended settings for A100:")
        print("  - Batch size: 32-64")
        print("  - Input size: 960x960 or 1280x1280")
        print("  - Model: m or l")
    else:
        print(f"\nUnknown GPU. Adjust batch size based on memory.")
    
    return gpu_name

GPU_NAME = detect_gpu()

## 2. Install Dependencies

In [None]:
if IN_COLAB:
    # Clone repository if not already present
    import os
    if not os.path.exists('soccernet-synloc'):
        !git clone https://github.com/YOUR_USERNAME/soccernet-synloc.git
    
    # Install package
    %cd soccernet-synloc
    !pip install -e .[dev] -q
    
    # Install SoccerNet for data download
    !pip install SoccerNet -q

In [None]:
# Verify installation
import synloc
from synloc.models import YOLOXPose
from synloc.data import SynLocDataset

print(f"synloc package loaded successfully")

## 3. Download SoccerNet Data

In [None]:
# Set data directory
import os
from pathlib import Path

if IN_COLAB:
    # Mount Google Drive for persistent storage
    from google.colab import drive
    drive.mount('/content/drive')
    
    DATA_ROOT = Path('/content/drive/MyDrive/SoccerNet/synloc')
else:
    DATA_ROOT = Path('./data/synloc')

DATA_ROOT.mkdir(parents=True, exist_ok=True)
print(f"Data root: {DATA_ROOT}")

In [None]:
# Download SoccerNet SynLoc dataset
from SoccerNet.Downloader import SoccerNetDownloader

downloader = SoccerNetDownloader(LocalDirectory=str(DATA_ROOT))

# Download synloc task data
# You'll need your SoccerNet credentials
downloader.downloadDataTask(
    task="synloc",
    split=["train", "valid", "test", "challenge"]
)

In [None]:
# Verify downloaded data
expected_files = [
    'train/annotations.json',
    'valid/annotations.json',
    'test/annotations.json',
    'challenge/annotations.json',
]

for f in expected_files:
    path = DATA_ROOT / f
    if path.exists():
        print(f"Found: {f}")
    else:
        print(f"Missing: {f}")

## 4. Data Exploration

In [None]:
import json
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter

# Load annotation file
with open(DATA_ROOT / 'train/annotations.json') as f:
    train_ann = json.load(f)

print(f"Keys: {train_ann.keys()}")
print(f"Number of images: {len(train_ann['images'])}")
print(f"Number of annotations: {len(train_ann['annotations'])}")
print(f"Categories: {train_ann['categories']}")

In [None]:
# Explore image info
img = train_ann['images'][0]
print("Sample image info:")
for k, v in img.items():
    if isinstance(v, list) and len(v) > 5:
        print(f"  {k}: {type(v).__name__}[{len(v)}]")
    else:
        print(f"  {k}: {v}")

In [None]:
# Explore annotation info
ann = train_ann['annotations'][0]
print("Sample annotation info:")
for k, v in ann.items():
    if isinstance(v, list) and len(v) > 5:
        print(f"  {k}: {type(v).__name__}[{len(v)}]")
    else:
        print(f"  {k}: {v}")

In [None]:
# Count athletes per image
athletes_per_img = Counter()
for ann in train_ann['annotations']:
    athletes_per_img[ann['image_id']] += 1

counts = list(athletes_per_img.values())
print(f"Athletes per image:")
print(f"  Min: {min(counts)}")
print(f"  Max: {max(counts)}")
print(f"  Mean: {np.mean(counts):.1f}")
print(f"  Median: {np.median(counts):.1f}")

plt.figure(figsize=(10, 4))
plt.hist(counts, bins=30, edgecolor='black')
plt.xlabel('Number of Athletes')
plt.ylabel('Number of Images')
plt.title('Distribution of Athletes per Image')
plt.show()

In [None]:
# Analyze keypoints
keypoint_names = train_ann['categories'][0].get('keypoints', ['pelvis', 'pelvis_ground'])
print(f"Keypoint names: {keypoint_names}")

# Check keypoint visibility
visibility_counts = {name: {'visible': 0, 'occluded': 0, 'not_labeled': 0} 
                     for name in keypoint_names}

for ann in train_ann['annotations']:
    kps = np.array(ann['keypoints']).reshape(-1, 3)
    for i, name in enumerate(keypoint_names):
        v = int(kps[i, 2])
        if v == 0:
            visibility_counts[name]['not_labeled'] += 1
        elif v == 1:
            visibility_counts[name]['occluded'] += 1
        else:
            visibility_counts[name]['visible'] += 1

print("\nKeypoint visibility:")
for name, counts in visibility_counts.items():
    total = sum(counts.values())
    print(f"  {name}:")
    for status, count in counts.items():
        print(f"    {status}: {count} ({100*count/total:.1f}%)")

## 5. Visualize Samples

In [None]:
from PIL import Image
import cv2

def visualize_sample(img_info, annotations, data_root, ax=None):
    """Visualize image with annotations."""
    if ax is None:
        fig, ax = plt.subplots(figsize=(12, 8))
    
    # Load image
    img_path = data_root / 'train' / 'images' / img_info['file_name']
    if not img_path.exists():
        # Try alternative path
        img_path = data_root / img_info['file_name']
    
    img = Image.open(img_path)
    ax.imshow(img)
    
    # Draw annotations
    colors = ['red', 'blue']  # pelvis, pelvis_ground
    
    for ann in annotations:
        # Draw bbox
        x, y, w, h = ann['bbox']
        rect = plt.Rectangle((x, y), w, h, fill=False, 
                              edgecolor='yellow', linewidth=2)
        ax.add_patch(rect)
        
        # Draw keypoints
        kps = np.array(ann['keypoints']).reshape(-1, 3)
        for i, (kx, ky, v) in enumerate(kps):
            if v > 0:
                ax.scatter(kx, ky, c=colors[i], s=50, zorder=10)
    
    ax.set_title(f"Image {img_info['id']} - {len(annotations)} athletes")
    ax.axis('off')
    
    return ax

In [None]:
# Group annotations by image
img_to_anns = {}
for ann in train_ann['annotations']:
    img_id = ann['image_id']
    if img_id not in img_to_anns:
        img_to_anns[img_id] = []
    img_to_anns[img_id].append(ann)

# Create image id to info mapping
img_id_to_info = {img['id']: img for img in train_ann['images']}

# Visualize random samples
np.random.seed(42)
sample_ids = np.random.choice(list(img_to_anns.keys()), size=4, replace=False)

fig, axes = plt.subplots(2, 2, figsize=(16, 12))
for ax, img_id in zip(axes.flatten(), sample_ids):
    try:
        visualize_sample(img_id_to_info[img_id], img_to_anns[img_id], DATA_ROOT, ax)
    except Exception as e:
        ax.set_title(f"Error loading image {img_id}: {e}")
plt.tight_layout()
plt.show()

## 6. Visualize BEV Projections

In [None]:
from synloc.visualization import draw_pitch, visualize_bev_predictions

def visualize_bev_sample(img_info, annotations):
    """Visualize ground truth BEV positions."""
    # Extract ground truth positions
    positions = []
    for ann in annotations:
        if 'position_on_pitch' in ann:
            positions.append(ann['position_on_pitch'][:2])
    
    if len(positions) == 0:
        print("No position_on_pitch data available")
        return
    
    positions = np.array(positions)
    
    fig, ax = plt.subplots(figsize=(12, 8))
    ax = draw_pitch(ax=ax)
    ax.scatter(positions[:, 0], positions[:, 1], 
               c='red', s=100, marker='o',
               edgecolors='white', linewidths=2,
               label='Athletes', zorder=10)
    ax.set_title(f"Image {img_info['id']} - BEV View")
    ax.legend(loc='upper right')
    plt.show()

# Visualize BEV for a sample
sample_id = sample_ids[0]
visualize_bev_sample(img_id_to_info[sample_id], img_to_anns[sample_id])

## 7. Test Dataset Class

In [None]:
from synloc.data import SynLocDataset, get_train_transforms, get_val_transforms

# Create dataset
train_dataset = SynLocDataset(
    ann_file=str(DATA_ROOT / 'train/annotations.json'),
    img_dir=str(DATA_ROOT / 'train/images'),
    transforms=get_val_transforms(640),  # Use val transforms for visualization
    input_size=(640, 640)
)

print(f"Dataset size: {len(train_dataset)}")

In [None]:
# Load a sample
sample = train_dataset[0]

print("Sample keys:", sample.keys())
print(f"Image shape: {sample['image'].shape}")
print(f"Number of bboxes: {len(sample['bboxes'])}")
print(f"Keypoints shape: {sample['keypoints'].shape}")

In [None]:
# Visualize preprocessed sample
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Image with annotations
img = sample['image'].permute(1, 2, 0).numpy()
# Denormalize
mean = np.array([0.485, 0.456, 0.406])
std = np.array([0.229, 0.224, 0.225])
img = img * std + mean
img = np.clip(img, 0, 1)

axes[0].imshow(img)
axes[0].set_title('Preprocessed Image')

# Draw keypoints on image
kpts = sample['keypoints'].numpy()
h, w = sample['image'].shape[1:]
for kpt in kpts:
    x, y = kpt[0, :2] * w, kpt[0, :2] * h  # Pelvis
    axes[0].scatter(x, y, c='red', s=50)
    x, y = kpt[1, :2] * w, kpt[1, :2] * h  # Pelvis ground
    axes[0].scatter(x, y, c='blue', s=50)

# BEV projection
if 'position_on_pitch' in sample:
    positions = sample['position_on_pitch'].numpy()
    axes[1] = draw_pitch(ax=axes[1])
    axes[1].scatter(positions[:, 0], positions[:, 1],
                   c='red', s=100, marker='o',
                   edgecolors='white', linewidths=2)
    axes[1].set_title('BEV Positions')

plt.tight_layout()
plt.show()

## 8. Test DataLoader

In [None]:
from torch.utils.data import DataLoader

train_loader = DataLoader(
    train_dataset,
    batch_size=4,
    shuffle=True,
    num_workers=2,
    collate_fn=SynLocDataset.collate_fn
)

# Test batch loading
batch = next(iter(train_loader))

print("Batch contents:")
for k, v in batch.items():
    if isinstance(v, torch.Tensor):
        print(f"  {k}: {v.shape}")
    elif isinstance(v, list):
        print(f"  {k}: list[{len(v)}]")
    else:
        print(f"  {k}: {type(v)}")

## 9. Test Model Forward Pass

In [None]:
from synloc.models import YOLOXPose

# Create model
model = YOLOXPose(
    variant='tiny',
    num_keypoints=2,
    input_size=(640, 640)
)

# Count parameters
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")

In [None]:
# Test forward pass
model.eval()
with torch.no_grad():
    images = batch['image']
    print(f"Input shape: {images.shape}")
    
    # Get features
    feats = model.neck(model.backbone(images))
    print(f"\nFeature shapes:")
    for i, f in enumerate(feats):
        print(f"  Level {i}: {f.shape}")
    
    # Get predictions
    results = model.predict(
        images,
        input_size=(640, 640),
        score_thr=0.01
    )
    
    print(f"\nPredictions for batch:")
    for i, res in enumerate(results):
        print(f"  Image {i}: {len(res['bboxes'])} detections")

## Summary

Setup complete! Key findings:

1. **Dataset**: SoccerNet SynLoc with synthetic images
2. **Annotations**: COCO format with 2 keypoints (pelvis, pelvis_ground)
3. **Task**: Detect athletes and project to Bird's Eye View

Next steps:
- Proceed to `02_training.ipynb` for model training