In [None]:
# -- Imports ------------------------------------------------------------------------------------------------------
import os
import pickle
from sklearn.model_selection import KFold
import random
#-----------------------------------------------------------------------------------------------------------------

In [None]:
#-- Initialize ---------------------------------------------------------------------------------------------------
input_path = '/kaggle/input/'
folds_indices_file = 'folds_indices.pkl'
data_file = 'video_paths_labels.pkl'

CLASS_NAMES = ['Violence','NonViolence']

#-----------------------------------------------------------------------------------------------------------------

In [None]:
#-- Get paths for all data  --------------------------------------------------------------------------------------
ds_directories = []
for item in os.listdir(input_path):
    item_path = os.path.join(input_path, item)        
    if os.path.isdir(item_path):
        ds_directories.append(item_path + '/data/')

print(ds_directories)
#-----------------------------------------------------------------------------------------------------------------

In [None]:
#-- Function to prepare data ------------------------------------------------------------------------------------
def prepare_data(dataset_dir):
    video_paths = []
    labels = []       
    for folder_path in dataset_dir:        
        for class_name in CLASS_NAMES:
            class_path = os.path.join(folder_path, class_name)
            if os.path.isdir(class_path):
                for video in os.listdir(class_path):
                    video_paths.append(os.path.join(class_path, video))
                    labels.append(CLASS_NAMES.index(class_name))  #-- 0 for Violence, 1 for NonViolence --                     
        
    return video_paths, labels
#-----------------------------------------------------------------------------------------------------------------

In [None]:
#-- Prepare Data as list ---------------------------------------------------------------------------------------
all_video_paths, all_labels = prepare_data(ds_directories)

print(f'Number of Videos: {len(all_video_paths)}\nNumber of Labels:{len(all_labels)}')

#-- ُSave --
with open(data_file, 'wb') as f:
    pickle.dump((all_video_paths, all_labels), f)
#-----------------------------------------------------------------------------------------------------------------

In [None]:
#-- Set up K-Fold and save indices --------------------------------------------------------------------------------
kf = KFold(n_splits=5, shuffle=True, random_state=42)

all_folds_indices = []

# -- Iterate over each fold --
for fold, (train_idx, test_idx) in enumerate(kf.split(all_video_paths)):
    all_folds_indices.append((train_idx, test_idx))

#-- Save Folds Indices --
with open(folds_indices_file, 'wb') as f:
    pickle.dump(all_folds_indices, f)
#-----------------------------------------------------------------------------------------------------------------