In [None]:
#-- Install Libraries -------------------------------------------------------------------------------------------
# !pip install torchsummary
!pip install pytorchvideo

from IPython import display
display.clear_output()
#-----------------------------------------------------------------------------------------------------------------

In [None]:
#-- Imports ------------------------------------------------------------------------------------------------------
import torch

# import torch.nn as nn
# import torch.nn.functional as F
# import torch.optim as optim
# from torch.utils.data import Dataset, DataLoader

import torchvision
import torchvision.transforms as transforms

from sklearn.model_selection import train_test_split

# from torchsummary import summary

import cv2
# import matplotlib.pyplot as plt

import numpy as np
import random

import os
import shutil
import copy
#-----------------------------------------------------------------------------------------------------------------

In [None]:
#-- Initialize ---------------------------------------------------------------------------------------------------
ds_input_path = '/kaggle/input/real-life-violence-situations-dataset/Real Life Violence Dataset/'

ds_preprocessed_path = '/kaggle/working/ds/'
train_path = ds_preprocessed_path + 'train'
val_path = ds_preprocessed_path + 'val'
test_path = ds_preprocessed_path + 'test'

NUM_FRAMES = 16
FRAME_W = 256
FRAME_H = 256

CLASS_NAMES = ['Violence', 'NonViolence']
#-----------------------------------------------------------------------------------------------------------------

In [None]:
#-- Create Folders  ----------------------------------------------------------------------------------------------
os.makedirs(train_path, exist_ok=True)
os.makedirs(val_path, exist_ok=True)
os.makedirs(test_path, exist_ok=True)
#-----------------------------------------------------------------------------------------------------------------

In [None]:
#-- Check Number of Frames and Resolution of Videos ------------------------------------------------------------
i = 0
for root, dirs, files in os.walk(ds_input_path):
    
    for filename in files:
        file_path = os.path.join(root, filename)   
        
        if file_path.endswith(('.mp4')):          
            cap = cv2.VideoCapture(file_path)        

            width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

            num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

            print(f"Video: {filename} | number of frames: {num_frames} - Resolution: {width} x {height}")

            # Release the video capture object
            cap.release()
            
            i += 1
            
            if i>=10:
                break
#-------------------------------------------------------------------------------------------------------------

In [None]:
#-- Function to Preprocess videos -------------------------------------------------------------------------------
def preprocess_video(video_path, output_path, num_frames=NUM_FRAMES, resize=(FRAME_W, FRAME_H)):    
    
    cap = cv2.VideoCapture(video_path)
    frames = []
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame = cv2.resize(frame, resize)
        frames.append(frame)
    cap.release()

    frames = np.array(frames)

    #-- Sample frames --
    if len(frames) > num_frames:
        indices = np.linspace(0, len(frames) - 1, num_frames).astype(int)
        sampled_frames = frames[indices]
    elif len(frames) < num_frames:
        padding = np.zeros((num_frames - len(frames), *resize, 3))
        sampled_frames = np.concatenate((frames, padding), axis=0)
    else:
        sampled_frames = frames
    
    #-- Save preprocessed frames --
    np.save(output_path, sampled_frames)
#-------------------------------------------------------------------------------------------------------------

In [None]:
#-- Preprocees and Split Data ------------------------------------------------------------------------------------
def split_and_preprocess_videos(input_folder, train_dir, val_dir, test_dir, frame_size):
    
    #-- Create output directories if they don't exist --
    for dir in [train_dir, val_dir, test_dir]:
        os.makedirs(dir, exist_ok=True)

    for class_name in CLASS_NAMES:
        class_folder = os.path.join(input_folder, class_name)
       
        #-- Get all video files in the class folder --
        videos = [f for f in os.listdir(class_folder) if f.endswith(('.mp4', '.avi', '.mov'))]
        
        #-- Split into train, val, test --
        train_videos, temp_videos = train_test_split(videos, test_size=0.2, random_state=42)
        val_videos, test_videos = train_test_split(temp_videos, test_size=0.5, random_state=42)
        
        splits = {'train': train_videos, 'val': val_videos, 'test': test_videos}
        
        for split in splits:
            split_folder = os.path.join(train_dir if split == 'train' else val_dir if split == 'val' else test_dir, class_name)
            os.makedirs(split_folder, exist_ok=True)
            
            for video in splits[split]:
                video_path = os.path.join(class_folder, video)
                output_path = os.path.join(split_folder, video.replace('.mp4', '.npy'))
                preprocess_video(video_path, output_path, NUM_FRAMES, frame_size)
                print(f'Processed and saved {video} to {split_folder}')
#-------------------------------------------------------------------------------------------------------------

In [None]:
#-- Run the preprocessing and splitting --
split_and_preprocess_videos(input_folder= ds_input_path,
                            train_dir= train_path,
                            val_dir= val_path,
                            test_dir= test_path,
                            frame_size = (FRAME_W, FRAME_H))
#-------------------------------------------------------------------------------------------------------------

In [None]:
#-- Zip Data -----------------------------------------------------------------------------------------------------
ds_zip_file = '/kaggle/working/data'
shutil.make_archive(ds_zip_file, 'zip', ds_preprocessed_path)
shutil.rmtree(ds_preprocessed_path)
#-------------------------------------------------------------------------------------------------------------