# Split the Big Video and Store in their corresponding folders on the Device

In [None]:
import os
import json
import cv2

# Rankings of event labels
RANKINGS = {
    "Kick-off": 1, "Goal": 2, "Shots on target": 3, "Red card": 4, "Corner": 5,
    "Yellow card": 6, "Shots off target": 7, "Foul": 8, "Direct free-kick": 9,
    "Offside": 10, "Clearance": 11, "Indirect free-kick": 12, "Throw-in": 13,
    "Ball out of play": 14, "Substitution": 15
}

def extract_events_with_context(data, context_time=10000):
    """Extracts events with surrounding context."""
    events = []
    
    for idx, annotation in enumerate(data["annotations"]):
        # Ensure 'position' is treated as an integer
        try:
            event_position = int(annotation["position"])
        except ValueError:
            print(f"Warning: Invalid position value {annotation['position']} at index {idx}. Skipping this event.")
            continue
        
        start_time = max(0, event_position - context_time)
        end_time = event_position + context_time

        events.append({
            "label": annotation["label"],
            "event_position": event_position,
            "start_time": start_time,
            "end_time": end_time,
            "gameTime": annotation["gameTime"],
            "index": idx
        })

    return events

def extract_frames(video_path, start_time, end_time, frame_rate=30):
    """Extracts frames from the video within the given time range."""
    video = cv2.VideoCapture(video_path)
    if not video.isOpened():
        print(f"Error: Could not open video {video_path}")
        return []

    video.set(cv2.CAP_PROP_POS_MSEC, start_time)
    frames = []
    for _ in range(int((end_time - start_time) / 1000 * frame_rate)):
        success, frame = video.read()
        if not success:
            break
        frames.append(frame)

    video.release()
    return frames

def save_video(frames, output_path, frame_rate=30):
    """Creates a video from frames."""
    if not frames:
        print(f"No frames to create video at {output_path}.")
        return

    height, width, _ = frames[0].shape
    writer = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), frame_rate, (width, height))

    for frame in frames:
        writer.write(frame)

    writer.release()
    print(f"Video saved at {output_path}")

def process_match(json_file_path, video_dir, output_dir, context_time=10000):
    """Processes a single match, extracting event clips."""
    if not os.path.exists(json_file_path):
        print(f"No JSON file found at {json_file_path}.")
        return

    with open(json_file_path) as f:
        data = json.load(f)

    events = extract_events_with_context(data, context_time)

    for event in events:
        event_label = event["label"]
        half = event["gameTime"][0]
        video_path = os.path.join(video_dir, f"{half}_720p.mkv")
        if not os.path.exists(video_path):
            print(f"Error: Video file {video_path} does not exist.")
            continue

        label_folder = event_label if RANKINGS.get(event_label, 15) <= 10 else "nothing"
        label_dir = os.path.join(output_dir, label_folder)
        os.makedirs(label_dir, exist_ok=True)

        frames = extract_frames(video_path, event["start_time"], event["end_time"])
        output_video = os.path.join(label_dir, f"{label_folder}_half_{half}_pos_{event['event_position']}_idx_{event['index']}.mp4")
        save_video(frames, output_video)

def process_directory(json_file_path, video_dir, output_dir):
    """Processes videos based on a JSON file."""
    os.makedirs(output_dir, exist_ok=True)
    print(f"Processing videos in: {video_dir}")
    process_match(json_file_path, video_dir, output_dir)

# Example paths for JSON file, video directory, and output directory
json_file_path = "Labels-v2.json"
video_dir = "D:\\FAI Project\\FAI_Data_Final\\Full_Vids_2"
output_dir = "D:\\FAI Project\\FAI_Data_Final\\extracted2"

# Process videos
process_directory(json_file_path, video_dir, output_dir)

# Access the "extracted" folder, then create .pt files for Training and Testing (Video + Label pairs)

In [None]:
import json
import os
import cv2
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm
from sklearn.model_selection import train_test_split

# Check if CUDA is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# If CUDA is available, print additional GPU information
if device.type == "cuda":
    print(f"GPU Name: {torch.cuda.get_device_name(0)}")
    print(f"CUDA Version: {torch.version.cuda}")
    print(f"cuDNN Version: {torch.backends.cudnn.version()}")

# Load the ResNet model and move it to the GPU
resnet = models.resnet50(weights=models.ResNet50_Weights.DEFAULT).to(device)
resnet.eval()

# Function to extract features from frames of a video clip
def extract_features(video_clip):
    frames = []
    cap = cv2.VideoCapture(video_clip)
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.resize(frame, (224, 224))
        frame = frame / 255.0
        frames.append(frame)
    cap.release()
    frames = np.array(frames)
    feature_vectors = []
    with torch.no_grad():
        for frame in frames:
            # Move input tensor to GPU
            input_tensor = torch.tensor(frame).permute(2, 0, 1).unsqueeze(0).float().to(device)
            feature = resnet(input_tensor)
            # Move feature back to CPU for numpy conversion
            feature_vectors.append(feature.cpu().numpy())
    return np.array(feature_vectors)

# Load Label annotations from folder names
def get_label_from_folder(folder_name):
    return folder_name

# Specify the root directory containing the extracted folders
root_directory = r'D:\\FAI Project\\FAI_Data_Final\\extracted'

# Initialize lists to store features and labels
all_features = []
all_labels = []

# Iterate through each folder inside the extracted directory
for folder_name in tqdm(os.listdir(root_directory), desc="Processing folders"):
    folder_path = os.path.join(root_directory, folder_name)
    if os.path.isdir(folder_path):  # Ensure it is a folder
        # Use folder name as the label
        label = get_label_from_folder(folder_name)
        
        # Iterate through video files in the folder
        video_files = [f for f in os.listdir(folder_path) if f.endswith(('.mp4', '.avi', '.mov'))]
        for video_file in tqdm(video_files, desc=f"Processing videos in {folder_name}", leave=False):
            video_clip_path = os.path.join(folder_path, video_file)
            features = extract_features(video_clip_path)
            
            # Collect all features and their corresponding labels
            all_features.append(features)
            all_labels.extend([label] * features.shape[0])  # Add the label for each frame

# Convert features to numpy array
all_features = np.concatenate(all_features)

# Create a mapping from labels to integers
unique_labels = list(set(all_labels))
print("Unique labels:", unique_labels)
label_to_index = {label: index for index, label in enumerate(unique_labels)}
print("Mapping from labels to integers:", label_to_index)

# Convert all_labels to integers
numeric_labels = [label_to_index[label] for label in all_labels]

# Split features and labels into training and testing sets (80:20 ratio)
X_train, X_test, y_train, y_test = train_test_split(
    all_features, 
    numeric_labels, 
    test_size=0.2, 
    random_state=42  # For reproducibility
)

# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
y_test = torch.tensor(y_test, dtype=torch.long)

# Calculate the number of unique classes for the output layer
output_size = len(unique_labels)
print("Feature extraction completed.")

# Paths for saving training and testing data
train_save_path = r'D:\\FAI Project\\FAI_Data_Final\\extracted_features1.pt'
test_save_path = r'D:\\FAI Project\\FAI_Data_Final\\extracted_features2.pt'

# Save training features and labels
torch.save({
    'features': X_train,
    'labels': y_train,
    'label_to_index': label_to_index,
    'unique_labels': unique_labels
}, train_save_path)

# Save testing features and labels
torch.save({
    'features': X_test,
    'labels': y_test,
    'label_to_index': label_to_index,
    'unique_labels': unique_labels
}, test_save_path)

print(f"Training features and labels saved to {train_save_path}")
print(f"Testing features and labels saved to {test_save_path}")

# Print some additional information about the split
print(f"Total number of samples: {len(numeric_labels)}")
print(f"Number of training samples: {len(y_train)}")
print(f"Number of testing samples: {len(y_test)}")

# Training Loop using the .pt files

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import os

# Define the device based on CUDA availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Define an enhanced GRU model with more layers and dropout
class EnhancedGRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=3, dropout=0.3):
        super(EnhancedGRUModel, self).__init__()
        
        # Add multiple GRU layers
        self.gru = nn.GRU(input_size, hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout)
        
        # Fully connected layers with increased hidden size
        self.fc1 = nn.Linear(hidden_size, hidden_size * 2)  # Increase the size of the first FC layer
        self.fc2 = nn.Linear(hidden_size * 2, hidden_size)  # Keep the second layer the same size
        self.fc3 = nn.Linear(hidden_size, output_size)  # Final output layer
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)  # Dropout layer
    
    def forward(self, x):
        out, _ = self.gru(x)
        out = out[:, -1, :]  # Take the output from the last time step
        out = self.fc1(out)
        out = self.relu(out)
        out = self.dropout(out)  # Apply dropout
        out = self.fc2(out)
        out = self.relu(out)
        out = self.dropout(out)  # Apply dropout again
        return self.fc3(out)

# Load the previously extracted features
features_path = r'D:\\FAI Project\\FAI_Data_Final\\extracted_features1.pt'
loaded_data = torch.load(features_path)

# Extract features and labels
all_features = loaded_data['features']
all_labels = loaded_data['labels']
label_to_index = loaded_data['label_to_index']
unique_labels = loaded_data['unique_labels']

# Print some information about the dataset
print("Loaded Features Shape:", all_features.shape)
print("Loaded Labels Shape:", all_labels.shape)
print("Unique Labels:", unique_labels)
print("Label to Index Mapping:", label_to_index)

# Hyperparameters
input_size = all_features.shape[2]  # Feature vector size
hidden_size = 2048  # Increased hidden size for better representation
output_size = len(unique_labels)  # Set output size based on the number of unique labels
num_epochs = 50  # Increased epochs for potentially better training
batch_size = 16  # Adjusted batch size
learning_rate = 0.0001

# Prepare the dataset and dataloader
dataset = TensorDataset(all_features, all_labels)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Instantiate the model, loss function, and optimizer
model = EnhancedGRUModel(input_size, hidden_size, output_size).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training loop with validation tracking
best_loss = float('inf')
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    
    for features_batch, labels_batch in dataloader:
        # Move data to GPU
        features_batch, labels_batch = features_batch.to(device), labels_batch.to(device)
        
        # Zero gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(features_batch)
        
        # Calculate loss
        loss = criterion(outputs, labels_batch)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    # Calculate average loss for the epoch
    avg_loss = total_loss / len(dataloader)
    print(f'Epoch [{epoch + 1}/{num_epochs}], Average Loss: {avg_loss:.4f}')
    
    # Save the best model
    if avg_loss < best_loss:
        best_loss = avg_loss
        torch.save({
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': best_loss,
            'label_to_index': label_to_index,
            'unique_labels': unique_labels,
            'input_size': input_size,  # Add this
            'output_size': output_size  # Add this
        }, 'best_enhanced_gru_model.pth')
        print(f"Best model saved with loss: {best_loss:.4f}")

# Final model save
torch.save({
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'loss': best_loss,
    'label_to_index': label_to_index,
    'unique_labels': unique_labels,
    'input_size': input_size,  # Add this
    'output_size': output_size  # Add this
}, 'best_enhanced_gru_model.pth')

print("Training completed. Models saved.")

# Calculating the Accuracy and other metrics on all the catergories

In [None]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

# Define the same GRU model architecture
class EnhancedGRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=3, dropout=0.3):
        super(EnhancedGRUModel, self).__init__()
        
        self.gru = nn.GRU(input_size, hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout)
        
        self.fc1 = nn.Linear(hidden_size, hidden_size * 2)
        self.fc2 = nn.Linear(hidden_size * 2, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
    
    def forward(self, x):
        out, _ = self.gru(x)
        out = out[:, -1, :]
        out = self.fc1(out)
        out = self.relu(out)
        out = self.dropout(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.dropout(out)
        return self.fc3(out)

# Define the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the features
features_path = r'D:\\FAI Project\\FAI_Data_Final\\extracted_features1.pt'
loaded_data = torch.load(features_path)

# Extract features and labels
all_features = loaded_data['features']
all_labels = loaded_data['labels']
label_to_index = loaded_data['label_to_index']
unique_labels = loaded_data['unique_labels']

# Prepare the dataset
dataset = TensorDataset(all_features, all_labels)
dataloader = DataLoader(dataset, batch_size=16, shuffle=False)

# Load the best model
model_path = 'best_enhanced_gru_model.pth'
checkpoint = torch.load(model_path)
model = EnhancedGRUModel(
    input_size=all_features.shape[2], 
    hidden_size=2048, 
    output_size=len(unique_labels)
).to(device)
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()

# Evaluation
correct_predictions = 0
total_predictions = 0
class_correct = {label: 0 for label in unique_labels}
class_total = {label: 0 for label in unique_labels}

# Confusion matrix
confusion_matrix = torch.zeros(len(unique_labels), len(unique_labels), dtype=torch.int32)

# Disable gradient computation for evaluation
with torch.no_grad():
    for features_batch, labels_batch in dataloader:
        # Move data to GPU
        features_batch, labels_batch = features_batch.to(device), labels_batch.to(device)
        
        # Forward pass
        outputs = model(features_batch)
        
        # Get predictions
        _, predicted = torch.max(outputs, 1)
        
        # Update total predictions
        total_predictions += labels_batch.size(0)
        correct_predictions += (predicted == labels_batch).sum().item()
        
        # Update class-wise accuracy
        for t, p in zip(labels_batch, predicted):
            # Update confusion matrix
            confusion_matrix[t.long(), p.long()] += 1
            
            # Class-wise accuracy
            if t == p:
                class_correct[unique_labels[t]] += 1
            class_total[unique_labels[t]] += 1

# Calculate overall accuracy
overall_accuracy = 100 * correct_predictions / total_predictions
print(f"\nOverall Accuracy: {overall_accuracy:.2f}%")

# Print class-wise accuracy
print("\nClass-wise Accuracy:")
for label in unique_labels:
    class_acc = 100 * class_correct[label] / class_total[label] if class_total[label] > 0 else 0
    print(f"{label}: {class_acc:.2f}%")

# Print confusion matrix
print("\nConfusion Matrix:")
print("Rows: True Labels, Columns: Predicted Labels")
print(confusion_matrix.numpy())

# Optional: Visualize confusion matrix
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(10, 8))
sns.heatmap(confusion_matrix.numpy(), 
            annot=True, 
            fmt='d', 
            xticklabels=unique_labels, 
            yticklabels=unique_labels)
plt.title('Confusion Matrix')
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.tight_layout()
plt.show()

# Predition on a Single Test video using the Saved Model

In [None]:
import torch
import torch.nn as nn
import cv2
import numpy as np
import os

# Flexible GRU Model with configurable input size
class FlexibleGRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=3, dropout=0.3):
        super(FlexibleGRUModel, self).__init__()
        
        self.gru = nn.GRU(input_size, hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout)
        
        self.fc1 = nn.Linear(hidden_size, hidden_size * 2)
        self.fc2 = nn.Linear(hidden_size * 2, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
    
    def forward(self, x):
        out, _ = self.gru(x)
        out = out[:, -1, :]
        out = self.fc1(out)
        out = self.relu(out)
        out = self.dropout(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.dropout(out)
        return self.fc3(out)

def preprocess_video(video_path, target_frames=100, target_size=(224, 224), feature_reduction=True):
    """
    Preprocess video with optional feature reduction
    """
    # Open the video
    cap = cv2.VideoCapture(video_path)
    
    # Read frames
    frames = []
    while len(frames) < target_frames and cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        # Resize frame
        frame_resized = cv2.resize(frame, target_size)
        
        # Convert to float and normalize
        frame_normalized = frame_resized.astype(np.float32) / 255.0
        frames.append(frame_normalized)
    
    cap.release()
    
    # Pad or truncate to target_frames
    if len(frames) < target_frames:
        padding = [np.zeros_like(frames[0])] * (target_frames - len(frames))
        frames.extend(padding)
    elif len(frames) > target_frames:
        indices = np.linspace(0, len(frames) - 1, target_frames).astype(int)
        frames = [frames[i] for i in indices]
    
    # Convert to numpy array
    video_tensor = np.array(frames)
    
    # Optional feature reduction
    if feature_reduction:
        # Reduce features by averaging color channels or using PCA
        feature_vector = video_tensor.reshape(target_frames, -1)
        
        # You can add more advanced feature reduction here if needed
        # For example, using PCA or other dimensionality reduction techniques
        
        return feature_vector
    
    # If no reduction, return full tensor
    return video_tensor.reshape(target_frames, -1)

def extract_features(video_path, target_feature_size=1000):
    """
    Extract features from the video with optional size reduction
    """
    # Extract full features
    full_features = preprocess_video(video_path)
    
    # If features are larger than target, reduce using averaging
    if full_features.shape[1] > target_feature_size:
        # Reduce features by averaging or sampling
        feature_indices = np.linspace(0, full_features.shape[1]-1, target_feature_size).astype(int)
        reduced_features = full_features[:, feature_indices]
        return torch.FloatTensor(reduced_features).unsqueeze(0)
    
    # If features are smaller, pad with zeros
    elif full_features.shape[1] < target_feature_size:
        padded_features = np.zeros((full_features.shape[0], target_feature_size))
        padded_features[:, :full_features.shape[1]] = full_features
        return torch.FloatTensor(padded_features).unsqueeze(0)
    
    # If features match exactly
    return torch.FloatTensor(full_features).unsqueeze(0)

def predict_video_label(model, features, label_to_index, unique_labels):
    """
    Predict the label for a video using the trained model
    """
    model.eval()
    
    with torch.no_grad():
        features = features.to(next(model.parameters()).device)
        outputs = model(features)
        _, predicted = torch.max(outputs, 1)
        predicted_index = predicted.item()
        predicted_label = unique_labels[predicted_index]
    
    return predicted_label

def main(video_path, model_path):
    """
    Main function to load model and predict video label
    """
    # Determine device
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {device}")
    
    # Load the saved model with weights_only=False for full compatibility
    checkpoint = torch.load(model_path, map_location=device, weights_only=False)
    
    # Retrieve necessary information from checkpoint
    label_to_index = checkpoint['label_to_index']
    unique_labels = checkpoint['unique_labels']
    
    # Set up model parameters
    hidden_size = 2048
    output_size = len(unique_labels)
    
    # Try to get the original input size from the checkpoint if possible
    input_size = checkpoint.get('input_size', 1000)
    
    # Initialize model
    model = FlexibleGRUModel(input_size, hidden_size, output_size).to(device)
    
    # Attempt to load model weights
    model.load_state_dict(checkpoint['model_state_dict'])
    
    # Extract features from the video
    video_features = extract_features(video_path, target_feature_size=input_size)
    
    # Predict label
    predicted_label = predict_video_label(model, video_features, label_to_index, unique_labels)
    
    print(f"Predicted Label: {predicted_label}")
    return predicted_label

# Example usage
if __name__ == "__main__":
    video_path = r"Corner_half_1_pos_2545244_idx_92.mp4"
    model_path = 'best_enhanced_gru_model.pth'
    
    main(video_path, model_path)

# Final Stitching Logic by taking the Big Video as the input (Options - 3/5 min highlight)

In [None]:
import os
import torch
import torch.nn as nn
import cv2
import numpy as np
from torchvision import transforms
from torchvision.models import resnet50
import torch.nn.functional as F
from collections import Counter
import itertools

class EnhancedGRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=3, dropout=0.3):
        super(EnhancedGRUModel, self).__init__()
        
        self.feature_reducer = nn.Linear(input_size, 1000)  # Reduce features to 1000
        
        self.gru = nn.GRU(1000, hidden_size, num_layers=num_layers, batch_first=True, dropout=dropout)
        
        self.fc1 = nn.Linear(hidden_size, hidden_size * 2)
        self.fc2 = nn.Linear(hidden_size * 2, hidden_size)
        self.fc3 = nn.Linear(hidden_size, output_size)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
    
    def forward(self, x):
        # Reduce feature dimensions
        x = self.feature_reducer(x)
        
        out, _ = self.gru(x)
        out = out[:, -1, :]
        out = self.fc1(out)
        out = self.relu(out)
        out = self.dropout(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.dropout(out)
        return self.fc3(out)

class HighlightGenerator:
    def __init__(self, model_path='best_enhanced_gru_model.pth'):
        # Rankings dictionary
        self.rankings = {
            "Kick-off": 1, "Goal": 2, "Shots on target": 3, "Red card": 4, 
            "Corner": 5, "Yellow card": 6, "Shots off target": 7, 
            "Foul": 8, "Direct free-kick": 9, "Offside": 10, "nothing": 11
        }

        # Device configuration
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        
        # Load the model with weights_only=True
        checkpoint = torch.load(model_path, map_location=self.device, weights_only=True)
        
        # Model parameters
        input_size = 2048  # ResNet feature size
        hidden_size = 2048
        output_size = checkpoint['output_size']
        
        # Initialize model
        self.model = EnhancedGRUModel(input_size, hidden_size, output_size).to(self.device)
        
        # Load state dict with strict=False to ignore missing keys
        self.model.load_state_dict(checkpoint['model_state_dict'], strict=False)
        self.model.eval()
        
        # Store label mapping
        self.label_to_index = checkpoint['label_to_index']
        self.index_to_label = {v: k for k, v in self.label_to_index.items()}
        
        # ResNet feature extractor
        resnet = resnet50(pretrained=True)
        self.feature_extractor = torch.nn.Sequential(*list(resnet.children())[:-1])
        self.feature_extractor.to(self.device)
        self.feature_extractor.eval()
        
        # Image transformations
        self.transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

    def extract_features(self, video_path):
        """Extract features from video clips"""
        cap = cv2.VideoCapture(video_path)
        fps = cap.get(cv2.CAP_PROP_FPS)
        clip_duration = 20  # seconds
        clip_frames = int(fps * clip_duration)
        
        video_features = []
        video_clips = []
        clip_labels = []
        
        frame_count = 0
        current_clip = []
        
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            
            current_clip.append(frame)
            frame_count += 1
            
            # When clip is full or video ends
            if len(current_clip) == clip_frames:
                # Convert clip to features
                clip_tensor = self.process_clip(current_clip)
                video_features.append(clip_tensor)
                video_clips.append(current_clip)
                
                # Reset clip
                current_clip = []
        
        # Handle last incomplete clip if exists
        if current_clip:
            # Pad the clip to match the expected length
            while len(current_clip) < clip_frames:
                current_clip.append(current_clip[-1])  # Repeat last frame
            
            clip_tensor = self.process_clip(current_clip)
            video_features.append(clip_tensor)
            video_clips.append(current_clip)
        
        cap.release()
        
        return video_features, video_clips

    def process_clip(self, clip):
        """Process a video clip to extract features"""
        clip_features = []
        for frame in clip:
            # Convert to tensor and extract features
            input_tensor = self.transform(frame).unsqueeze(0).to(self.device)
            with torch.no_grad():
                features = self.feature_extractor(input_tensor)
                clip_features.append(features.squeeze().cpu().numpy())
        
        return torch.tensor(clip_features).float().unsqueeze(0)

    def predict_highlights(self, video_features):
        """Predict highlights for each clip"""
        predictions = []
        with torch.no_grad():
            for features in video_features:
                features = features.to(self.device)
                output = self.model(features)
                prob = F.softmax(output, dim=1)
                pred = torch.argmax(prob, dim=1).item()
                predicted_label = self.index_to_label[pred]
                predictions.append(predicted_label)
                print(f"Predicted label for the clip: {predicted_label}")  # Print predicted label for each clip
        
        return predictions

    def create_highlights(self, video_path, highlight_duration_minutes):
        """
        Generate highlights with a specific stitching strategy:
        1. Start with a Kick-off clip
        2. Distribute Goal clips sequentially throughout the video
        3. Add other clips based on ranking
        """
        # Extract features and predict labels
        video_features, video_clips = self.extract_features(video_path)
        predictions = self.predict_highlights(video_features)
        
        # Group clips by label
        labeled_clips = {}
        for label, clip in zip(predictions, video_clips):
            if label not in labeled_clips:
                labeled_clips[label] = []
            labeled_clips[label].append(clip)
        
        # Sort labels by ranking
        sorted_labels = sorted(self.rankings.keys(), key=lambda x: self.rankings[x])
        
        # Prepare final highlight clips
        highlight_clips = []
        total_duration = 0
        max_duration = highlight_duration_minutes * 60
        
        # Get video capture details
        cap = cv2.VideoCapture(video_path)
        fps = cap.get(cv2.CAP_PROP_FPS)
        clip_duration = len(video_clips[0]) / fps
        cap.release()
        
        # Strategy for clip selection and sequential placement
        
        # 1. Start with a Kick-off clip if available
        if "Kick-off" in labeled_clips and labeled_clips["Kick-off"]:
            highlight_clips.append(labeled_clips["Kick-off"].pop(0))
            total_duration += clip_duration
        
        # Prepare Goal clips for sequential insertion
        goal_clips = labeled_clips.get("Goal", [])
        
        # Tracking clip order for goal insertion
        label_clip_counts = {label: 0 for label in self.rankings.keys()}
        
        # Add clips to fill the highlight duration
        while total_duration < max_duration:
            for label in sorted_labels:
                # Skip if we've reached max duration
                if total_duration >= max_duration:
                    break
                
                # Special handling for Goal clips
                if label == "Goal":
                    # If goal clips are available, insert them sequentially
                    if goal_clips:
                        highlight_clips.append(goal_clips.pop(0))
                        total_duration += clip_duration
                        continue
                
                # Add clips for other labels
                if label in labeled_clips and labeled_clips[label]:
                    highlight_clips.append(labeled_clips[label].pop(0))
                    total_duration += clip_duration
                    label_clip_counts[label] += 1
        
        # Video writing process
        output_path = f'highlights_{highlight_duration_minutes}min.mp4'
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        
        # Create video writer
        out = cv2.VideoWriter(output_path, fourcc, fps, 
                               (highlight_clips[0][0].shape[1], highlight_clips[0][0].shape[0]))
        
        # Stitch clips together by writing frames
        for clip in highlight_clips:
            for frame in clip:
                out.write(frame)
        
        out.release()
        
        return output_path
        
def main():
    # Get video path from user
    video_path = 'Test_7min.mp4'
    
    # Validate video path
    if not os.path.exists(video_path):
        print("Invalid video path. Please check and try again.")
        return
    
    # Ask for highlight duration
    while True:
        try:
            duration = int(input("How many minutes of highlights do you want? (3/5): "))
            if duration in [3, 5]:
                break
            else:
                print("Please choose 3 or 5 minutes.")
        except ValueError:
            print("Please enter a valid number.")
    
    # Generate highlights
    generator = HighlightGenerator()
    output_video = generator.create_highlights(video_path, duration)
    
    print(f"Highlights generated successfully: {output_video}")

if __name__ == "__main__":
    main()