In [1]:
import cv2
import torch
import json
import numpy as np
import torch.nn as nn
from pathlib import Path
import albumentations as A
import matplotlib.pyplot as plt
import torchvision.models.video as video_models
from torch.utils.data import Dataset, DataLoader
import os
import random
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import seaborn as sns
from tqdm import tqdm
import pandas as pd
import shutil
from datetime import datetime

  from .autonotebook import tqdm as notebook_tqdm


# Generating test data

In [None]:

SOURCE_DIR = '../data/9-classes'
TARGET_DIR = '../data/test9-classes'
NUM_VIDEOS_PER_CLASS = 20
RANDOM_SEED = 42

# Setting random seed for reproducibility
random.seed(RANDOM_SEED)

# Creating target directory if it doesn't exist
os.makedirs(TARGET_DIR, exist_ok=True)

# Getting all class folders
class_folders = [f for f in os.listdir(SOURCE_DIR) 
                 if os.path.isdir(os.path.join(SOURCE_DIR, f))]

print(f"Found {len(class_folders)} class folders.")

# Processing each class folder
for class_folder in tqdm(class_folders, desc="Processing classes"):
    source_class_path = os.path.join(SOURCE_DIR, class_folder)
    target_class_path = os.path.join(TARGET_DIR, class_folder)
    
    # Creating class folder in target directory
    os.makedirs(target_class_path, exist_ok=True)
    
    # Getting all video files
    video_files = [f for f in os.listdir(source_class_path) 
                  if f.endswith(('.mp4', '.avi', '.mov', '.mkv'))]
    
    # Checking if we have enough videos
    if len(video_files) <= NUM_VIDEOS_PER_CLASS:
        print(f"Warning: Class {class_folder} has only {len(video_files)} videos, "
              f"using all of them.")
        selected_videos = video_files
    else:
        selected_videos = random.sample(video_files, NUM_VIDEOS_PER_CLASS)
    
    for video in selected_videos:
        source_file = os.path.join(source_class_path, video)
        target_file = os.path.join(target_class_path, video)
        shutil.copy2(source_file, target_file)
    
    print(f"  - {class_folder}: Copied {len(selected_videos)} videos")

total_videos = sum(len(os.listdir(os.path.join(TARGET_DIR, class_folder))) 
                   for class_folder in class_folders)

print(f"\nTest set creation complete!")
print(f"Created test set with {total_videos} videos across {len(class_folders)} classes.")
print(f"Test set location: {TARGET_DIR}")

Found 9 class folders.


Processing classes:   0%|          | 0/9 [00:00<?, ?it/s]

  - Corner: Copied 20 videos

Processing classes:  22%|██▏       | 2/9 [00:00<00:01,  5.50it/s]


  - Foul: Copied 20 videos


Processing classes:  33%|███▎      | 3/9 [00:00<00:01,  3.58it/s]

  - Goal: Copied 20 videos


Processing classes:  44%|████▍     | 4/9 [00:00<00:01,  3.96it/s]

  - Kick-off: Copied 20 videos
  - Penalty: Copied 2 videos
  - Red card: Copied 4 videos


Processing classes:  78%|███████▊  | 7/9 [00:01<00:00,  5.30it/s]

  - Shots off target: Copied 20 videos


Processing classes:  89%|████████▉ | 8/9 [00:01<00:00,  4.66it/s]

  - Shots on target: Copied 20 videos


Processing classes: 100%|██████████| 9/9 [00:02<00:00,  4.47it/s]

  - Yellow card: Copied 20 videos

Test set creation complete!
Created test set with 146 videos across 9 classes.
Test set location: ./test





# Getting the video paths along with labels

In [None]:
def load_video_paths(data_root):
    video_paths = []
    labels = []
    class_names = sorted(os.listdir(data_root))
    label_to_idx = {name: i for i, name in enumerate(class_names)}

    for class_name in class_names:
        class_dir = Path(data_root) / class_name
        for video_file in class_dir.glob("*.mp4"):
            video_paths.append(str(video_file))
            labels.append(label_to_idx[class_name])
    return video_paths, labels, label_to_idx

video_paths, labels, label_to_idx = load_video_paths("../data/test9-classes")
idx_to_label = {v: k for k, v in label_to_idx.items()}

In [5]:
class SoccerDataset(Dataset):
    def __init__(self, video_paths, labels, config):
        self.video_paths = video_paths
        self.labels = labels
        self.config = config
        self.transform = A.Compose([
            A.Resize(height=config['frame_height'], width=config['frame_width']),
            A.Normalize(mean=[0.43216, 0.394666, 0.37645], std=[0.22803, 0.22145, 0.216989])
        ])

    def __len__(self):
        return len(self.video_paths)

    def __getitem__(self, idx):
        path = self.video_paths[idx]
        label = self.labels[idx]
        frames = self._load_video(path)
        return frames, label

    def _load_video(self, path):
        cap = cv2.VideoCapture(path)
        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        indices = np.linspace(0, frame_count - 1, self.config['num_frames'], dtype=int)
        frames = []

        for i in indices:
            cap.set(cv2.CAP_PROP_POS_FRAMES, i)
            ret, frame = cap.read()
            if not ret:
                continue
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame = self.transform(image=frame)['image']
            frames.append(frame)

        cap.release()

        frames = np.array(frames)
        frames = np.transpose(frames, (3, 0, 1, 2))
        return torch.from_numpy(frames).float()


# Model architecture

In [6]:
class R3DClassifier(nn.Module):
    def __init__(self, num_classes, config):
        super(R3DClassifier, self).__init__()

        model_type = config['model_type'].lower()
        pretrained = config['pretrained']
        dropout = config['dropout']

        if model_type == 'r3d':
            self.model = video_models.r3d_18(pretrained=pretrained)
        elif model_type == 'mc3':
            self.model = video_models.mc3_18(pretrained=pretrained)
        elif model_type == 'r2plus1d':
            self.model = video_models.r2plus1d_18(pretrained=pretrained)
        else:
            raise ValueError(f"Unsupported model_type '{model_type}' in CONFIG")

        in_features = self.model.fc.in_features
        self.model.fc = nn.Sequential(
            nn.Dropout(dropout),
            nn.Linear(in_features, num_classes)
        )

    def forward(self, x):
        return self.model(x)


# Evaluating the model

In [None]:
def evaluate_model(model_path, config, video_paths, labels, idx_to_label):
    # Creating test dataset and dataloader
    test_loader = DataLoader(SoccerDataset(video_paths, labels, config), batch_size=config['batch_size'], shuffle=True)
    
    # Initializing and loading model we need to evaluate
    model = R3DClassifier(num_classes=len(idx_to_label), config=config).to(config['device'])
    model.load_state_dict(torch.load(model_path, map_location=config['device']))
    model.eval()
    
    # To store model results
    all_preds = []
    all_targets = []
    all_probs = []
    
    with torch.no_grad():
        for videos, batch_labels in tqdm(test_loader, desc="Evaluating"):
            videos, batch_labels = videos.to(config['device']), batch_labels.to(config['device'])
            outputs = model(videos)
            probs = torch.softmax(outputs, dim=1)
            
            all_probs.extend(probs.cpu().numpy())
            all_preds.extend(torch.argmax(outputs, 1).cpu().numpy())
            all_targets.extend(batch_labels.cpu().numpy())
    
    # Calculate metrics
    accuracy = accuracy_score(all_targets, all_preds)
    conf_matrix = confusion_matrix(all_targets, all_preds)
    class_report = classification_report(all_targets, all_preds, 
                                         target_names=[idx_to_label[i] for i in range(len(idx_to_label))], 
                                         output_dict=True)
    
    # Return results
    return {
        'accuracy': accuracy,
        'confusion_matrix': conf_matrix,
        'classification_report': class_report,
        'predictions': all_preds,
        'targets': all_targets,
        'probabilities': all_probs
    }

# Visualizing the results

In [8]:
def visualize_results(results, idx_to_label, config_name, save_dir="results"):
    os.makedirs(save_dir, exist_ok=True)
    
    # Confusion Matrix
    plt.figure(figsize=(12, 10))
    labels = [idx_to_label[i] for i in range(len(idx_to_label))]
    conf_matrix = results['confusion_matrix']
    
    # Normalize confusion matrix
    conf_matrix_norm = conf_matrix.astype('float') / conf_matrix.sum(axis=1)[:, np.newaxis]
    
    sns.heatmap(conf_matrix_norm, annot=True, fmt='.2f', cmap='Blues', 
                xticklabels=labels, yticklabels=labels)
    plt.title(f'Normalized Confusion Matrix - {config_name}')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.tight_layout()
    plt.savefig(os.path.join(save_dir, f'{config_name}_confusion_matrix.png'), dpi=300)
    plt.close()
    
    # Class-wise performance
    class_report = results['classification_report']
    print(class_report)
    df = pd.DataFrame(class_report).transpose()
    
    # Filter out the avg/total rows
    df = df[~df.index.isin(['accuracy', 'macro avg', 'weighted avg'])]
    
    plt.figure(figsize=(14, 8))
    sns.barplot(x=df.index, y=df['f1-score'])
    plt.title(f'F1-Score by Class - {config_name}')
    plt.ylabel('F1-Score')
    plt.xlabel('Class')
    plt.xticks(rotation=45, ha='right')
    plt.ylim(0, 1.0)
    plt.tight_layout()
    plt.savefig(os.path.join(save_dir, f'{config_name}_f1_scores.png'), dpi=300)
    plt.close()
    
    # Save full results as JSON
    clean_results = {
        'accuracy': float(results['accuracy'])
    }

    # Safely process classification report
    if isinstance(class_report, dict):
        clean_class_report = {}
        for k, v in class_report.items():
            if isinstance(v, dict):
                clean_class_report[k] = {
                    kk: float(vv) for kk, vv in v.items() 
                    if isinstance(vv, (int, float))
                }
            elif isinstance(v, (int, float)):
                clean_class_report[k] = float(v)
        clean_results['classification_report'] = clean_class_report
    
    with open(os.path.join(save_dir, f'{config_name}_results.json'), 'w') as f:
        json.dump(clean_results, f, indent=4)
    
    return


# Ablation study for models trained on different configurations

In [9]:
def run_ablation_study(configs, model_paths, test_folder, video_paths, labels, idx_to_label, output_dir="ablation_results"):
    # Create output directory with timestamp
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    output_dir = f"{output_dir}_{timestamp}"
    os.makedirs(output_dir, exist_ok=True)
    
    # Change working directory to output directory
    original_dir = os.getcwd()
    os.chdir(output_dir)
    
    # Create results directory
    os.makedirs('results', exist_ok=True)
    
    # Load test data (only need to do this once)
    
    # Results container
    comparative_results = {}
    
    # Evaluate each model
    for i, (config, model_path) in enumerate(zip(configs, model_paths)):
        config_name = f"config_{i+1}"
        print(f"\n{'='*50}\nEvaluating {config_name}: {model_path}\n{'='*50}")
        
        # Evaluate model
        results = evaluate_model(model_path, config, video_paths, labels, idx_to_label)
        
        # Visualize results
        visualize_results(results, idx_to_label, config_name)
        
        # Save to comparative results
        comparative_results[config_name] = {
            'accuracy': float(results['accuracy']),
            'config': {k: str(v) if isinstance(v, torch.device) else v 
                      for k, v in config.items() if k not in ['device']},
            'model_path': model_path
        }
    
    # Create comparative visualization
    configs_df = pd.DataFrame({
        'Configuration': [f"Config {i+1}" for i in range(len(configs))],
        'Accuracy': [comparative_results[f"config_{i+1}"]['accuracy'] for i in range(len(configs))]
    })
    
    plt.figure(figsize=(10, 6))
    sns.barplot(x='Configuration', y='Accuracy', data=configs_df)
    plt.title('Accuracy Comparison Across Configurations')
    plt.ylim(0, 1.0)
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.tight_layout()
    plt.savefig('results/comparative_accuracy.png', dpi=300)
    plt.close()
    
    # Save comparative results
    with open('results/ablation_study_results.json', 'w') as f:
        json.dump(comparative_results, f, indent=4)
    
    print(f"\nResults saved to {output_dir}")
    os.chdir(original_dir)
    
    return comparative_results


In [None]:
TEST_FOLDER = "../tests"

# Define your configurations as Python dictionaries
CONFIG1 = {
    'frame_height': 224,
    'frame_width': 224,
    'num_frames': 32,
    'batch_size': 4,
    'epochs': 15,
    'learning_rate': 1e-4,
    'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
    'fold' : 3,
    'optimizer': 'adam',
    'scheduler': 'cosine',
    'dropout': 0.5,
    'model_type': 'r3d',
    'pretrained': True,
}

# Set seed
torch.manual_seed(42)
random.seed(42)
np.random.seed(42)

CONFIG2 = {
    'frame_height': 224,
    'frame_width': 224,
    'num_frames': 32,
    'batch_size': 8,
    'epochs': 16,
    'learning_rate': 3e-4,
    'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
    'fold' : 3,
    'optimizer': 'adam',
    'scheduler': 'cosine',
    'dropout': 0.7,
    'model_type': 'r3d',
    'pretrained': True,
}

CONFIG3 = {
    'frame_height': 232,
    'frame_width': 232,
    'num_frames': 32,
    'batch_size': 4,
    'epochs': 8,
    'learning_rate': 1e-3,
    'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
    'fold' : 3,
    'optimizer': 'sgd',
    'scheduler': 'cosine',
    'dropout': 0.3,
    'model_type': 'r3d',
    'pretrained': False,
}

CONFIG4 = {
    'frame_height': 224,
    'frame_width': 224,
    'num_frames': 32,
    'batch_size': 8,
    'epochs': 16,
    'learning_rate': 3e-4,
    'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
    'fold' : 3,
    'optimizer': 'adam',
    'scheduler': 'cosine',
    'dropout': 0.7,
    'model_type': 'r3d',
    'pretrained': False,
}

# Paths to your saved models (change these to your model paths)
MODEL_PATH1 = "models/best_model_config1.pth"
MODEL_PATH2 = "models/best_model_config2.pth"
MODEL_PATH3 = "models/best_model_config3.pth"
MODEL_PATH4 = "models/best_model_config4.pth"

# Run ablation study
results = run_ablation_study(
    configs=[CONFIG1, CONFIG2, CONFIG3, CONFIG4],
    model_paths=[MODEL_PATH1, MODEL_PATH2, MODEL_PATH3, MODEL_PATH4],
    test_folder=TEST_FOLDER,
    video_paths=video_paths,
    labels=labels,
    idx_to_label=idx_to_label,
    output_dir="ablation_results"
)

# Print summary
print("\n==== ABLATION STUDY RESULTS ====")
for config_name, result in results.items():
    print(f"{config_name}: Accuracy = {result['accuracy']:.4f}")
    print(f"Key Configuration Parameters:")
    for param in ['batch_size', 'learning_rate', 'optimizer', 'dropout', 'pretrained']:
        print(f"  - {param}: {result['config'][param]}")
    print("-" * 40)


Evaluating config_1: C:\Users\vidit\Documents\ML\Automatic-Highlight-Generation\models\best_model_config1.pth


Evaluating: 100%|██████████| 37/37 [08:04<00:00, 13.10s/it]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'Corner': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 20.0}, 'Foul': {'precision': 1.0, 'recall': 0.9, 'f1-score': 0.9473684210526315, 'support': 20.0}, 'Goal': {'precision': 1.0, 'recall': 0.9, 'f1-score': 0.9473684210526315, 'support': 20.0}, 'Kick-off': {'precision': 0.9047619047619048, 'recall': 0.95, 'f1-score': 0.926829268292683, 'support': 20.0}, 'Penalty': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 2.0}, 'Red card': {'precision': 1.0, 'recall': 0.5, 'f1-score': 0.6666666666666666, 'support': 4.0}, 'Shots off target': {'precision': 0.72, 'recall': 0.9, 'f1-score': 0.8, 'support': 20.0}, 'Shots on target': {'precision': 0.8, 'recall': 0.8, 'f1-score': 0.8, 'support': 20.0}, 'Yellow card': {'precision': 0.8636363636363636, 'recall': 0.95, 'f1-score': 0.9047619047619048, 'support': 20.0}, 'accuracy': 0.8904109589041096, 'macro avg': {'precision': 0.8098220298220298, 'recall': 0.7666666666666667, 'f1-score': 0.7769994090918353, 'support': 146.0}

Evaluating: 100%|██████████| 19/19 [09:21<00:00, 29.53s/it]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'Corner': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 20.0}, 'Foul': {'precision': 0.9473684210526315, 'recall': 0.9, 'f1-score': 0.9230769230769231, 'support': 20.0}, 'Goal': {'precision': 0.9473684210526315, 'recall': 0.9, 'f1-score': 0.9230769230769231, 'support': 20.0}, 'Kick-off': {'precision': 0.9523809523809523, 'recall': 1.0, 'f1-score': 0.975609756097561, 'support': 20.0}, 'Penalty': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 2.0}, 'Red card': {'precision': 1.0, 'recall': 0.75, 'f1-score': 0.8571428571428571, 'support': 4.0}, 'Shots off target': {'precision': 1.0, 'recall': 0.7, 'f1-score': 0.8235294117647058, 'support': 20.0}, 'Shots on target': {'precision': 0.6896551724137931, 'recall': 1.0, 'f1-score': 0.8163265306122449, 'support': 20.0}, 'Yellow card': {'precision': 0.9523809523809523, 'recall': 1.0, 'f1-score': 0.975609756097561, 'support': 20.0}, 'accuracy': 0.910958904109589, 'macro avg': {'precision': 0.8321282132534401, 'recall'

Evaluating: 100%|██████████| 37/37 [10:02<00:00, 16.28s/it]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'Corner': {'precision': 0.76, 'recall': 0.95, 'f1-score': 0.8444444444444444, 'support': 20.0}, 'Foul': {'precision': 0.5333333333333333, 'recall': 0.8, 'f1-score': 0.64, 'support': 20.0}, 'Goal': {'precision': 0.8571428571428571, 'recall': 0.3, 'f1-score': 0.4444444444444444, 'support': 20.0}, 'Kick-off': {'precision': 0.7727272727272727, 'recall': 0.85, 'f1-score': 0.8095238095238095, 'support': 20.0}, 'Penalty': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 2.0}, 'Red card': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 4.0}, 'Shots off target': {'precision': 0.45714285714285713, 'recall': 0.8, 'f1-score': 0.5818181818181818, 'support': 20.0}, 'Shots on target': {'precision': 1.0, 'recall': 0.05, 'f1-score': 0.09523809523809523, 'support': 20.0}, 'Yellow card': {'precision': 0.6923076923076923, 'recall': 0.9, 'f1-score': 0.782608695652174, 'support': 20.0}, 'accuracy': 0.636986301369863, 'macro avg': {'precision': 0.5636282236282236, 'recall': 0.5166

Evaluating: 100%|██████████| 19/19 [08:30<00:00, 26.86s/it]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'Corner': {'precision': 0.6896551724137931, 'recall': 1.0, 'f1-score': 0.8163265306122449, 'support': 20.0}, 'Foul': {'precision': 0.6071428571428571, 'recall': 0.85, 'f1-score': 0.7083333333333334, 'support': 20.0}, 'Goal': {'precision': 0.6842105263157895, 'recall': 0.65, 'f1-score': 0.6666666666666666, 'support': 20.0}, 'Kick-off': {'precision': 0.8888888888888888, 'recall': 0.8, 'f1-score': 0.8421052631578947, 'support': 20.0}, 'Penalty': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 2.0}, 'Red card': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 4.0}, 'Shots off target': {'precision': 0.6, 'recall': 0.6, 'f1-score': 0.6, 'support': 20.0}, 'Shots on target': {'precision': 0.7777777777777778, 'recall': 0.35, 'f1-score': 0.4827586206896552, 'support': 20.0}, 'Yellow card': {'precision': 0.782608695652174, 'recall': 0.9, 'f1-score': 0.8372093023255814, 'support': 20.0}, 'accuracy': 0.7054794520547946, 'macro avg': {'precision': 0.5589204353545867, 'rec