## Hyperparameters and data preparation

In [1]:
import os

whole_dataset = '/home/jadebustos/tfm/datasets/real-life-violence-situations/'

video_path_train = '/home/jadebustos/tfm/datasets/video_tmp/full/train'
video_path_validation = '/home/jadebustos/tfm/datasets/video_tmp/full/validation'
video_path_test = '/home/jadebustos/tfm/datasets/video_tmp/full/test'

imgs_path_train = '/home/jadebustos/tfm/datasets/imgs/lstm-16-frames/train'
imgs_path_validation = '/home/jadebustos/tfm/datasets/imgs/lstm-16-frames/validation'

# Hyperparams
IMAGE_SIZE = 200
IMAGE_WIDTH, IMAGE_HEIGHT = IMAGE_SIZE, IMAGE_SIZE

#input_shape = (IMAGE_WIDTH, IMAGE_HEIGHT, 3)
#input_shape = (IMAGE_WIDTH, IMAGE_HEIGHT, 1)

frames_per_video = 16

labels = ['violence', 'nonviolence']
labels_dict = {}
labels_dict['violence'] = 0.
labels_dict['nonviolence'] = 1.

# get files in directory
def get_files_dir(directory):
    return os.listdir(directory)

Split the original dataset in several datasets:

* Train
* Validation
* Test

In [3]:
import random
import shutil
import os

train_percent = 60
validation_percent = 20
test_percent = 20

# remove old video directories and recreate them
def create_dirs(path, labels):
    !rm -Rf $path
    for item in labels:
        dir = os.path.join(path, item)
        !mkdir -p $dir

def copy_random_videos(src, dst, video_list, number):
    # video index in video_list
    index_videos = []
    # generate random video index
    while len(index_videos) != number:
        rnd = random.randint(0, len(video_list)-1)
        if rnd not in index_videos:
            index_videos.append(rnd)

    # copy videos
    for i in index_videos:
        src_video = os.path.join(src, video_list[i])
        dst_video = os.path.join(dst, video_list[i])
        shutil.copyfile(src_video, dst_video)

    # remove select elements from video_list
    for i in index_videos:
        # replace copied videos index with -1
        video_list[i] = -1
    # remove all -1 indexes
    video_list[:] = (value for value in video_list if value != -1)
        
create_dirs(video_path_train, labels)
create_dirs(video_path_validation, labels)
create_dirs(video_path_test, labels)

violence_files = get_files_dir(os.path.join(whole_dataset, labels[0]))
nonviolence_files = get_files_dir(os.path.join(whole_dataset, labels[1]))

# violence
validation_videos = int(len(violence_files) * (validation_percent/100))
train_videos = int(len(violence_files) * (train_percent/100))
test_videos = len(violence_files) - validation_videos - train_videos

src = os.path.join(whole_dataset, labels[0])
dst = os.path.join(video_path_validation, labels[0])
copy_random_videos(src, dst, violence_files, validation_videos)

dst = os.path.join(video_path_train, labels[0])
copy_random_videos(src, dst, violence_files, train_videos)

dst = os.path.join(video_path_test, labels[0])
copy_random_videos(src, dst, violence_files, test_videos)

# nonviolence
validation_videos = int(len(nonviolence_files) * (validation_percent/100))
train_videos = int(len(nonviolence_files) * (train_percent/100))
test_videos = len(nonviolence_files) - validation_videos - train_videos

src = os.path.join(whole_dataset, labels[1])
dst = os.path.join(video_path_validation, labels[1])
copy_random_videos(src, dst, nonviolence_files, validation_videos)

dst = os.path.join(video_path_train, labels[1])
copy_random_videos(src, dst, nonviolence_files, train_videos)

dst = os.path.join(video_path_test, labels[1])
copy_random_videos(src, dst, nonviolence_files, test_videos)

Execute the following to get datasets size:

In [2]:
import numpy as np
import string
import os

files_dir = get_files_dir(os.path.join(video_path_train, 'violence'))
print("Training videos (violence): %d" % len(files_dir))
files_dir = get_files_dir(os.path.join(video_path_train, 'nonviolence'))
print("Training videos (nonviolence): %d" % len(files_dir))

files_dir = get_files_dir(os.path.join(video_path_validation, 'violence'))
print("\nValidation videos (violence): %d" % len(files_dir))
files_dir = get_files_dir(os.path.join(video_path_validation, 'nonviolence'))
print("Validation videos (nonviolence): %d" % len(files_dir))

files_dir = get_files_dir(os.path.join(video_path_test, 'violence'))
print("\nTest videos (violence): %d" % len(files_dir))
files_dir = get_files_dir(os.path.join(video_path_test, 'nonviolence'))
print("Test videos (nonviolence): %d" % len(files_dir))

Training videos (violence): 600
Training videos (nonviolence): 600

Validation videos (violence): 200
Validation videos (nonviolence): 200

Test videos (violence): 200
Test videos (nonviolence): 200


Convert to frames. One image is picked every **N** frames:

In [9]:
import glob
import cv2
import os

# extract so many frames as BATCH_SIZE
def video2frames(input_dir, output_dir, nframes):
    videos = glob.glob(input_dir)

    for item in videos:
        # directory to store video frames
        dir_name = os.path.join(output_dir, item.split('/')[-1].split('.')[0])
        if not os.path.exists(dir_name):
            os.makedirs(dir_name)
        frame_index = 1
        vidcap = cv2.VideoCapture(item)
        length = int(vidcap. get(cv2. CAP_PROP_FRAME_COUNT))
        # distance among video frames
        step = (length // nframes)
        # frames to capture
        frames = []
        for i in range(1, nframes):
            frames.append(i * step)
            
        videoname, ext = os.path.basename(item).split('.')
        
        # read first frame
        success, image = vidcap.read()
        filename = videoname + '-' + format(0, '05d') + '.png'
        resized = cv2.resize(image, (IMAGE_WIDTH, IMAGE_HEIGHT))
        output_file = os.path.join(dir_name, filename)
        cv2.imwrite(output_file, resized)
        
        for i in frames:
            vidcap.set(1, i - 1)
            success, image = vidcap.read()
            filename = videoname + '-' + format(i, '05d') + '.png'
            resized = cv2.resize(image, (IMAGE_WIDTH, IMAGE_HEIGHT))
            output_file = os.path.join(dir_name, filename)
            cv2.imwrite(output_file, resized)
            
        # close video capture    
        vidcap.release()
    
    return

# delete frames directories
!rm -Rf $imgs_path_train 
!rm -Rf $imgs_path_validation

# create frames directories
for item in labels:
    directory = os.path.join(imgs_path_train, item)
    !mkdir -p $directory
    directory = os.path.join(imgs_path_validation, item)
    !mkdir -p $directory
    
# muestreamos los videos de violencia (train)
print("Getting frames for training dataset (violence) ...")
input_dir_train_violence = os.path.join(video_path_train, 'violence' + '/*')
output_dir_train_violence = os.path.join(imgs_path_train, 'violence')
video2frames(input_dir_train_violence, output_dir_train_violence, frames_per_video)
files_dir = get_files_dir(os.path.join(imgs_path_train, 'violence'))
print("Training images (violence): %d" % len(files_dir))

# muestreamos los videos de no violencia (train)
print("Getting frames for training dataset (nonviolence) ...")
input_dir_train_nonviolence = os.path.join(video_path_train, 'nonviolence' + '/*')
output_dir_train_nonviolence = os.path.join(imgs_path_train, 'nonviolence')
video2frames(input_dir_train_nonviolence, output_dir_train_nonviolence, frames_per_video)
files_dir = get_files_dir(os.path.join(imgs_path_train, 'nonviolence'))
print("Training images (nonviolence): %d" % len(files_dir))

# muestreamos los videos de violencia (validation)
print("Getting frames for validation dataset (violence) ...")
input_dir_validation_violence = os.path.join(video_path_validation, 'violence' + '/*')
output_dir_validation_violence = os.path.join(imgs_path_validation, 'violence')
video2frames(input_dir_validation_violence, output_dir_validation_violence, frames_per_video)
files_dir = get_files_dir(os.path.join(imgs_path_validation, 'violence'))
print("Validation images (violence): %d" % len(files_dir))

# muestreamos los videos de no violencia (validation)
print("Getting frames for validation dataset (nonviolence) ...")
input_dir_validation_nonviolence = os.path.join(video_path_validation, 'nonviolence' + '/*')
output_dir_validation_nonviolence = os.path.join(imgs_path_validation, 'nonviolence')
video2frames(input_dir_validation_nonviolence, output_dir_validation_nonviolence, frames_per_video)
files_dir = get_files_dir(os.path.join(imgs_path_validation, 'nonviolence'))
print("Validation images (nonviolence): %d" % len(files_dir))

Getting frames for training dataset (violence) ...
Training images (violence): 600
Getting frames for training dataset (nonviolence) ...
Training images (nonviolence): 600
Getting frames for validation dataset (violence) ...
Validation images (violence): 200
Getting frames for validation dataset (nonviolence) ...
Validation images (nonviolence): 200
