In [None]:
# Pull data
import os
import pandas as pd
import numpy as np
import cv2
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
from torchvision import datasets, transforms
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from sklearn.model_selection import StratifiedKFold
from IPython.display import display, Image
import imageio

In [None]:
label_to_int = {'corner':0, 'longpass':1, 'ontarget':2, 'penalty':3, 'substitution':4, 'throw-in':5}
int_to_label = {0:'corner', 1:'longpass', 2:'ontarget', 3:'penalty', 4:'substitution', 5:'throw-in'}

In [None]:
class VideoDataset(Dataset):
    def __init__(self, video_folder, num_frames=5, transform=None):
        self.video_folder = video_folder
        self.num_frames = num_frames
        self.transform = transform

        self.file_names = []
        self.labels_dic = {}
        self.video_files = []
        self.labels = []
        for label in os.listdir(video_folder):
            self.video_files += [f'{label}/{f}' for f in os.listdir(f'{video_folder}/{label}')]
            self.labels += [label_to_int[label]]*len(os.listdir(f'{video_folder}/{label}'))
            self.file_names += [f'{video_folder}/{label}/{f}' for f in os.listdir(f'{video_folder}/{label}')]
            for f in os.listdir(f'{video_folder}/{label}'):
                self.labels_dic[f'{label}/{f}'] = label_to_int[label]

    def __len__(self):
        return len(self.video_files)

    def __getitem__(self, idx):
        video_path = os.path.join(self.video_folder, self.video_files[idx])
        frames = self.read_frames(video_path)

        # Select 5 frames evenly spaced throughout the video
        selected_frames = frames[::len(frames)//self.num_frames][:self.num_frames]

        # Apply transformations to each frame if specified
        if self.transform:
            selected_frames = [self.transform(frame) for frame in selected_frames]

        # Stack frames along a new dimension to create a single tensor
        video_tensor = torch.stack(selected_frames, dim=0)

        label = self.labels_dic[self.video_files[idx]]
        return video_tensor, label


    def read_frames(self, video_path):
        cap = cv2.VideoCapture(video_path)
        frames = []
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            frames.append(frame)
        cap.release()
        return frames

# Example usage
video_folder = './SoccerAct10Dataset'
transform = transforms.Compose([
    transforms.ToPILImage(),
    #transforms.Resize((224, 224)),
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
])

dataset = VideoDataset(video_folder, num_frames=5, transform=transform)

In [None]:
# Load the pre-trained VGG16 model
vgg16 = models.vgg16(pretrained=True)

# Remove the last fully connected layer of VGG16
vgg16 = nn.Sequential(*list(vgg16.children())[:-1])

# Set the model to evaluation mode
vgg16.eval()

# Define your own model
class CustomModel(nn.Module):
    def __init__(self, vgg_features, num_classes, num_time_steps):
        super(CustomModel, self).__init__()
        self.num_time_steps = num_time_steps
        self.vgg_features = vgg_features

        # Assuming the VGG output size is 25088, adjust if needed
        vgg_output_size = self._calculate_vgg_output_size()
        self.reduce_vgg = nn.Linear(vgg_output_size,256)
        self.time_distributed_fc = nn.ModuleList([
            nn.Linear(256, 256) for _ in range(num_time_steps)
        ])
        
        self.flat = nn.Flatten()
        self.gru = nn.GRU(256*num_time_steps, 20, 2)  # Adjust input_size based on your needs
        self.dropout1 = nn.Dropout(0.25)
        self.fc = nn.Linear(20, num_classes)
    
    def _calculate_vgg_output_size(self):
        # Create a sample input tensor
        sample_input = torch.randn(dataset[0][0].shape)

        # Pass the sample input through the VGG model
        vgg_output = self.vgg_features(sample_input)

        # Calculate the total number of elements in the output tensor
        vgg_output_size = vgg_output.view(vgg_output.size(0), -1).size(1)

        return vgg_output_size

    def forward(self, x):
        #with torch.autograd.detect_anomaly():
        batch_size, num_frames, channels, height, width = x.size()
        x = x.view(-1, channels, height, width)
        x = self.vgg_features(x)
        x = x.view(batch_size, num_frames, -1)
        updated_x = torch.zeros_like(torch.empty(batch_size, self.num_time_steps, 256))
        # Time-distributed stacked dense layers
        for i in range(self.num_time_steps):
            # Flatten the input before passing it to the dense layer
            updated_x[:, i, :] = F.relu(self.time_distributed_fc[i](self.reduce_vgg(self.flat(x[:, i, :]))))

        x = updated_x
        x = self.flat(x)
        x = F.relu(x)

        # Apply GRU
        x, hidden = self.gru(x)
        x = self.dropout1(x)

        # Fully connected layer
        x = self.fc(x)

        return x

# Create an instance of your custom model
num_classes = 6  # Change this to the number of classes in your task
num_time_steps = 5
custom_model = CustomModel(vgg16, num_classes, num_time_steps)

In [None]:
# Define cross-validation settings
num_folds = 5  # Adjust as needed
skf = StratifiedKFold(n_splits=num_folds, shuffle=True, random_state=42)

# Define training parameters
num_epochs = 5
learning_rate = 0.001

#torch.autograd.set_detect_anomaly(True)
# Perform cross-validation
for fold, (train_index, val_index) in enumerate(skf.split(range(len(dataset)), dataset.labels)):
    print(f'Fold {fold + 1}/{num_folds}')

    # Split data into training and validation sets for this fold
    train_dataset = torch.utils.data.Subset(dataset, train_index)
    val_dataset = torch.utils.data.Subset(dataset, val_index)

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    # Create an instance of your custom model
    custom_model = CustomModel(models.vgg16(pretrained=True).features, num_classes, num_time_steps)

    # Define loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(custom_model.parameters(), lr=learning_rate)

    # Training loop
    for epoch in range(num_epochs):
        custom_model.train()  # Set the model to training mode
        running_loss = 0.0

        for videos, labels in train_loader:
            optimizer.zero_grad()

            outputs = custom_model(videos)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        # Print training statistics
        print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {running_loss / len(train_loader)}')

    # Validation loop
    custom_model.eval()  # Set the model to evaluation mode
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for videos, labels in val_loader:
            outputs = custom_model(videos)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    # Print validation statistics
    print(f'Validation Loss: {val_loss / len(val_loader)}, Accuracy: {100 * correct / total}%')

print('Cross-validation finished')

In [None]:
torch.save(custom_model.state_dict(), 'model1.pth')

In [None]:
# Function to create a gif from frames
def create_gif(frames, gif_path):
    imageio.mimsave(gif_path, frames, duration=0.1)

def predict_on_video(video_path):
    idx = dataset.file_names.index(video_path)

    # Make predictions
    actual_label = dataset[idx][1]
    with torch.no_grad():
        predictions = custom_model(dataset[idx][0].unsqueeze(0))

    # Convert predictions to probabilities
    probabilities = torch.nn.functional.softmax(predictions, dim=1)

    # Convert tensor to a list of probabilities
    probabilities_list = probabilities.numpy().tolist()

    # Create a dictionary with class labels and corresponding probabilities
    result = {int_to_label[i]: probability for i, probability in enumerate(probabilities_list[0])}

    return result, int_to_label[actual_label]


# Function to display videos with labels and predictions
def display_video_with_predictions(video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # Convert frame to RGB (OpenCV uses BGR by default)
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frames.append(frame_rgb)

    cap.release()

    # Save frames as GIF
    gif_path = 'temp.gif'
    imageio.mimsave(gif_path, frames, duration=0.1)
    
    predicted_labels, actual_label = predict_on_video(video_path)

    print(f'Actual Label: {actual_label}')


    for j in predicted_labels:
        print(f'Predicted Probability for {j}: {predicted_labels[j]}')

    # Display GIF in Jupyter Notebook
    display(Image(filename=gif_path,width=500))

# Display videos with predictions
for label in os.listdir(video_folder):
    for file in os.listdir(f'{video_folder}/{label}')[:1]:
        display_video_with_predictions(f'{video_folder}/{label}/{file}')