In [None]:
import os
import cv2
import torch
import random
import json
import numpy as np
import torch.nn as nn
from pathlib import Path
from tqdm import tqdm
import albumentations as A
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import torchvision.models.video as video_models
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import StratifiedKFold

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
CONFIG = {
    'data_root': '9-classes',
    'frame_height': 224,
    'frame_width': 224,
    'num_frames': 16,

    'batch_size': 4,
    'epochs': 3,
    'learning_rate': 1e-3,
    'device': torch.device('cuda' if torch.cuda.is_available() else 'cpu'),
    'fold' : 2,
    'optimizer': 'adam',
    'scheduler': 'cosine',
    'dropout': 0.3,

    'model_type': 'r3d',
    'pretrained': False,
}

# Set seed
torch.manual_seed(42)
random.seed(42)
np.random.seed(42)


In [None]:
def load_video_paths(data_root):
    video_paths = []
    labels = []
    class_names = sorted(os.listdir(data_root))
    label_to_idx = {name: i for i, name in enumerate(class_names)}

    for class_name in class_names:
        class_dir = Path(data_root) / class_name
        for video_file in class_dir.glob("*.mp4"):
            video_paths.append(str(video_file))
            labels.append(label_to_idx[class_name])
    return video_paths, labels, label_to_idx

video_paths, labels, label_to_idx = load_video_paths(CONFIG['data_root'])
idx_to_label = {v: k for k, v in label_to_idx.items()}

In [None]:
class SoccerDataset(Dataset):
    def __init__(self, video_paths, labels, config):
        self.video_paths = video_paths
        self.labels = labels
        self.config = config
        self.transform = A.Compose([
            A.Resize(height=config['frame_height'], width=config['frame_width']),
            A.Normalize(mean=[0.43216, 0.394666, 0.37645], std=[0.22803, 0.22145, 0.216989])
        ])

    def __len__(self):
        return len(self.video_paths)

    def __getitem__(self, idx):
        path = self.video_paths[idx]
        label = self.labels[idx]
        frames = self._load_video(path)
        return frames, label

    def _load_video(self, path):
        cap = cv2.VideoCapture(path)
        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        indices = np.linspace(0, frame_count - 1, self.config['num_frames'], dtype=int)
        frames = []

        for i in indices:
            cap.set(cv2.CAP_PROP_POS_FRAMES, i)
            ret, frame = cap.read()
            if not ret:
                continue
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame = self.transform(image=frame)['image']
            frames.append(frame)

        cap.release()

        frames = np.array(frames)
        frames = np.transpose(frames, (3, 0, 1, 2))
        return torch.from_numpy(frames).float()


In [5]:
class R3DClassifier(nn.Module):
    def __init__(self, num_classes, config):
        super(R3DClassifier, self).__init__()

        model_type = config['model_type'].lower()
        pretrained = config['pretrained']
        dropout = config['dropout']

        if model_type == 'r3d':
            self.model = video_models.r3d_18(pretrained=pretrained)
        elif model_type == 'mc3':
            self.model = video_models.mc3_18(pretrained=pretrained)
        elif model_type == 'r2plus1d':
            self.model = video_models.r2plus1d_18(pretrained=pretrained)
        else:
            raise ValueError(f"Unsupported model_type '{model_type}' in CONFIG")

        in_features = self.model.fc.in_features
        self.model.fc = nn.Sequential(
            nn.Dropout(dropout),
            nn.Linear(in_features, num_classes)
        )

    def forward(self, x):
        return self.model(x)


In [None]:
k_folds = CONFIG['fold']
skf = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)

all_fold_accuracies = []
fold_tracking = []

for fold, (train_idx, val_idx) in enumerate(skf.split(video_paths, labels)):
    print(f"\n{'='*30}\n▶️ Fold {fold+1}/{k_folds}\n{'='*30}")

    train_paths_fold = [video_paths[i] for i in train_idx]
    val_paths_fold = [video_paths[i] for i in val_idx]
    train_labels_fold = [labels[i] for i in train_idx]
    val_labels_fold = [labels[i] for i in val_idx]

    train_loader = DataLoader(SoccerDataset(train_paths_fold, train_labels_fold, CONFIG), batch_size=CONFIG['batch_size'], shuffle=True)
    val_loader = DataLoader(SoccerDataset(val_paths_fold, val_labels_fold, CONFIG), batch_size=CONFIG['batch_size'], shuffle=False)

    model = R3DClassifier(num_classes=len(label_to_idx), config=CONFIG).to(CONFIG['device'])

    criterion = nn.CrossEntropyLoss()

    if CONFIG['optimizer'] == 'adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=CONFIG['learning_rate'])
    elif CONFIG['optimizer'] == 'sgd':
        optimizer = torch.optim.SGD(model.parameters(), lr=CONFIG['learning_rate'], momentum=0.9)
    else:
        raise ValueError("Unsupported optimizer in CONFIG.")    
    
    best_val_acc = 0.0
    epoch_train_accs, epoch_val_accs = [], []
    epoch_train_errors, epoch_val_errors = [], []
    for epoch in range(CONFIG['epochs']):
        model.train()
        train_preds, train_targets = [], []

        for videos, labels_batch in tqdm(train_loader, desc=f"Fold {fold+1} - Epoch {epoch+1}"):
            videos, labels_batch = videos.to(CONFIG['device']), labels_batch.to(CONFIG['device'])
            outputs = model(videos)
            loss = criterion(outputs, labels_batch)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_preds.extend(torch.argmax(outputs, 1).cpu().numpy())
            train_targets.extend(labels_batch.cpu().numpy())

        train_acc = accuracy_score(train_targets, train_preds)
        train_errors = sum(p != t for p, t in zip(train_preds, train_targets))

        model.eval()
        val_preds, val_targets = [], []

        with torch.no_grad():
            for videos, labels_batch in val_loader:
                videos, labels_batch = videos.to(CONFIG['device']), labels_batch.to(CONFIG['device'])
                outputs = model(videos)
                val_preds.extend(torch.argmax(outputs, 1).cpu().numpy())
                val_targets.extend(labels_batch.cpu().numpy())

        val_acc = accuracy_score(val_targets, val_preds)
        val_errors = sum(p != t for p, t in zip(val_preds, val_targets))
        print(f"✅ Epoch {epoch+1}: Train Acc = {train_acc:.4f}, Val Acc = {val_acc:.4f}")
        
        epoch_train_accs.append(train_acc)
        epoch_val_accs.append(val_acc)
        epoch_train_errors.append(train_errors)
        epoch_val_errors.append(val_errors)

        if val_acc > best_val_acc:
            best_val_acc = val_acc
            model_path = f"models/best_model_fold{fold+1}.pth"
            torch.save(model.state_dict(), model_path)
            print(f"💾 Saved best model for Fold {fold+1}: {model_path}")
    
    fold_tracking.append({
    'fold': fold + 1,
    'train_accuracies': epoch_train_accs,
    'val_accuracies': epoch_val_accs,
    'train_errors': epoch_train_errors,
    'val_errors': epoch_val_errors,
    'best_val_acc': best_val_acc
    })

    all_fold_accuracies.append(best_val_acc)
    

run_summary = {
    'config': {
        'learning_rate': float(CONFIG['learning_rate']),
        'batch_size': int(CONFIG['batch_size']),
        'dropout': float(CONFIG['dropout']),
        'num_frames': int(CONFIG['num_frames']),
        'optimizer': CONFIG['optimizer'],
        'k_folds': int(CONFIG['fold'])
    },
    'fold_accuracies': [float(a) for a in all_fold_accuracies],
    'avg_accuracy': float(np.mean(all_fold_accuracies)),
    'fold_tracking': fold_tracking  # we’ll sanitize this below
}


def sanitize(obj):
    if isinstance(obj, (np.int64, np.int32, np.integer)):
        return int(obj)
    elif isinstance(obj, (np.float64, np.float32, np.floating)):
        return float(obj)
    elif isinstance(obj, list):
        return [sanitize(x) for x in obj]
    elif isinstance(obj, dict):
        return {k: sanitize(v) for k, v in obj.items()}
    else:
        return obj

run_summary['fold_tracking'] = sanitize(fold_tracking)


▶️ Fold 1/2


Fold 1 - Epoch 1:   0%|          | 0/89 [00:09<?, ?it/s]


KeyboardInterrupt: 

In [None]:
with open(f"run_result_lr{CONFIG['learning_rate']}.json", "w") as f:
    json.dump(run_summary, f, indent=2)

In [None]:
import json
import matplotlib.pyplot as plt

with open(f"run_result_lr{CONFIG['learning_rate']}.json") as f:
    results = json.load(f)

In [None]:
for fold in results['fold_tracking']:
    f = fold['fold']
    epochs = list(range(1, len(fold['train_accuracies']) + 1))
    plt.figure(figsize=(12, 5))

    # Accuracy Plot
    plt.subplot(1, 2, 1)
    plt.plot(epochs, fold['train_accuracies'], label='Train Acc')
    plt.plot(epochs, fold['val_accuracies'], label='Val Acc')
    plt.title(f"Fold {f} - Accuracy")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy")
    plt.legend()
    plt.grid(True)

    # Error Plot
    plt.subplot(1, 2, 2)
    plt.plot(epochs, fold['train_errors'], label='Train Errors')
    plt.plot(epochs, fold['val_errors'], label='Val Errors')
    plt.title(f"Fold {f} - Errors")
    plt.xlabel("Epoch")
    plt.ylabel("# Errors")
    plt.legend()
    plt.grid(True)

    plt.tight_layout()
    plt.show()


In [None]:
for fold in results['fold_tracking']:
    print(f"Fold {fold['fold']} | Best Val Accuracy: {fold['best_val_acc']:.4f} | Final Train Accuracy: {fold['train_accuracies'][-1]:.4f}")


In [None]:
from sklearn.metrics import confusion_matrix, classification_report, precision_recall_fscore_support
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Use val_paths_fold and val_labels_fold from last fold
val_dataset = SoccerDataset(val_paths_fold, val_labels_fold, CONFIG)
val_loader = DataLoader(val_dataset, batch_size=CONFIG['batch_size'], shuffle=False)

model.eval()
all_preds, all_true, all_probs, all_videos = [], [], [], []

with torch.no_grad():
    for i, (videos, labels_batch) in enumerate(tqdm(val_loader, desc="Evaluating error analysis")):
        videos = videos.to(CONFIG['device'])
        outputs = model(videos)
        probs = torch.nn.functional.softmax(outputs, dim=1)
        preds = torch.argmax(probs, dim=1).cpu().numpy()

        all_preds.extend(preds)
        all_true.extend(labels_batch.numpy())
        all_probs.extend(probs.cpu().numpy())
        all_videos.extend(val_paths_fold[i*CONFIG['batch_size']: (i+1)*CONFIG['batch_size']])


In [None]:
df = pd.DataFrame({
    "video_path": all_videos,
    "true_label": [idx_to_label[i] for i in all_true],
    "predicted_label": [idx_to_label[i] for i in all_preds],
    "correct": np.array(all_true) == np.array(all_preds),
    "confidence": [row[p] for row, p in zip(all_probs, all_preds)]
})

# Save CSV
df.to_csv("fold_predictions.csv", index=False)


In [None]:
# Total errors
total = len(df)
errors = df[~df['correct']]
print(f"\nTotal errors: {len(errors)} out of {total} ({100*len(errors)/total:.2f}%)")

# Most common misclassifications
misclass_df = errors.groupby(['true_label', 'predicted_label']).size().reset_index(name='Count')
misclass_df = misclass_df.sort_values('Count', ascending=False)
print("\nMost common misclassifications:")
print(misclass_df.head(10))

# Error rate by class
class_errors = df.groupby('true_label')['correct'].agg(['count', lambda x: (~x).sum()])
class_errors.columns = ['Total', 'Errors']
class_errors['Error Rate'] = class_errors['Errors'] / class_errors['Total']
class_errors = class_errors.sort_values('Error Rate', ascending=False)
print("\nError rates by class:")
print(class_errors)

# Highest confidence incorrect predictions
print("\nHighest confidence errors:")
print(errors.sort_values('confidence', ascending=False).head(10)[['video_path', 'true_label', 'predicted_label', 'confidence']])


In [None]:
# Error rate bar plot
plt.figure(figsize=(8, 4))
sns.barplot(x=class_errors.index, y=class_errors['Error Rate'])
plt.title("Class-wise Error Rate")
plt.xticks(rotation=45)
plt.grid(True)
plt.show()

# Confusion Matrix
cm = confusion_matrix(df['true_label'], df['predicted_label'], labels=list(label_to_idx.keys()))
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=label_to_idx.keys(), yticklabels=label_to_idx.keys())
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix")
plt.show()
