### 1. Set Up Environment

First, ensure your environment is set up with the necessary libraries:


In [None]:
!conda install -c pytorch-nightly torchvision
!pip install img2vec_pytorch


### 2. Load and Process Video Frames

We'll write a Python script to load videos, extract frames at a specified frame rate, and prepare the dataset:



In [None]:
import cv2
import numpy as np
from pathlib import Path
from img2vec_pytorch import Img2Vec
from PIL import Image

def load_video_frames(video_path, fps=8):
    video = cv2.VideoCapture(video_path)
    frames = []
    frame_rate = video.get(cv2.CAP_PROP_FPS)
    frame_skip = max(1, round(frame_rate / fps))
    
    success, frame = video.read()
    count = 0
    while success:
        if count % frame_skip == 0:
            frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frames.append(Image.fromarray(frame_rgb))
        success, frame = video.read()
        count += 1
    
    video.release()
    return frames


### 3. Frame Embedding

Use the `Img2Vec` library to replace images with their embedded representations:

In [None]:
def get_frame_embeddings(frames, img2vec):
    return [img2vec.get_vec(frame, tensor=True) for frame in frames]

### 4. Prepare Training Data

Generate input-target pairs from frame embeddings, initializing with null frames for indices < 0:

In [None]:
def prepare_data(embedded_frames):
    X, y = [], []
    null_frame = torch.zeros(embedded_frames[0].shape)  # Adjust shape accordingly
    
    for i in range(len(embedded_frames)):
        X.append([
            null_frame if i - 3 < 0 else embedded_frames[i - 3],
            null_frame if i - 2 < 0 else embedded_frames[i - 2],
            null_frame if i - 1 < 0 else embedded_frames[i - 1],
        ])
        y.append(embedded_frames[i])
        
    return torch.stack([torch.stack(x) for x in X]), torch.stack(y)

### Dataloader:

 create a custom dataset loader that iterates over all MP4 files in a given directory, loads the videos, processes them into frames, and applies the embedding model to each frame.

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
import os

class VideoFrameDataset(Dataset):
    def __init__(self, video_paths, img2vec, transform=None, fps=8):
        self.video_paths = video_paths
        self.img2vec = img2vec
        self.transform = transform
        self.fps = fps
        self.dataset = self.load_and_process_videos()

    def load_and_process_videos(self):
        dataset = []
        for video_path in self.video_paths:
            frames = load_video_frames(video_path, self.fps)
            embedded_frames = get_frame_embeddings(frames, self.img2vec)
            dataset.extend(prepare_data(embedded_frames))
        return dataset

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        return self.dataset[idx]

def load_video_paths(training_path):
    video_paths = [os.path.join(training_path, f) for f in os.listdir(training_path) if f.endswith('.mp4')]
    return video_paths

### Data Instantiation:

In [None]:
# Initialize Img2Vec with CUDA if available
img2vec = Img2Vec(cuda=torch.cuda.is_available(), model="efficientnet_b0")

training_path = 'videos/'
video_paths = load_video_paths(training_path)

# Instantiate your dataset
dataset = VideoFrameDataset(video_paths, img2vec, fps=8)

# Create DataLoader
data_loader = DataLoader(dataset, batch_size=16, shuffle=True, )


### 5. Neural Network Training

