# Violence Detection using CNN + LSTM neural netowrk

## Imports

In [1]:
#!pip install kagglehub

In [2]:
%matplotlib inline
import cv2
import os
import numpy as np
import keras
import matplotlib.pyplot as plt
import kagglehub
import shutil

# import download
from random import shuffle
import tensorflow as tf

import sys
import h5py

current_dir = os.getcwd()
print(current_dir)
sys.path.append(current_dir+'/utils')
from utils.datamanager import download_dataset, label_video_names
from utils.pipeline import getLSTM


c:\Users\Cesar\Desktop\maestria\TFM


## Check if gpu enabled

In [3]:
# Check if TensorFlow can see a GPU
gpus = tf.config.list_physical_devices('GPU')
print("Num GPUs Available:", len(gpus))
print("GPU Details:", gpus)

Num GPUs Available: 1
GPU Details: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


## Download dataset

In [4]:
#current_dir = os.getcwd()

#download_dataset(current_dir)

## Load Data

Firstly, we define the directory to place the video dataset

In [5]:
in_dir = "data"

Copy some of the data-dimensions for convenience.

In [6]:
# Frame size  
img_size = 224

img_size_touple = (img_size, img_size)

# Number of channels (RGB)
num_channels = 3

img_size_tiplet = (img_size, img_size,num_channels)

# Flat frame size
img_size_flat = img_size * img_size * num_channels

# Number of classes for classification (Violence-No Violence)
num_classes = 2

frames_per_file = 10
_images_per_file = frames_per_file


# Video extension
video_exts = ".avi"

In order to load the saved transfer values into RAM memory we are going to use this two functions:

In [7]:
def process_alldata(file):
    
    joint_transfer=[]
    frames_num=20
    count = 0
    
    with h5py.File(file, 'r') as f:
            
        X_batch = f['data'][:]
        y_batch = f['labels'][:]

    for i in range(int(len(X_batch)/frames_num)):
        inc = count+frames_num
        joint_transfer.append([X_batch[count:inc],y_batch[count]])
        count =inc
        
    data =[]
    target=[]
    
    for i in joint_transfer:
        data.append(i[0])
        target.append(np.array(i[1]))
        
    return data, target

##Recurrent Neural Network

### Define LSTM architecture

check utils.pipeline.py

## Model training


In [8]:
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
n_chunks = _images_per_file

early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=3,
    min_delta=0.005,          # training stops if val_loss improves less than 0.001
    restore_best_weights=True
)

np.random.seed(42)
tf.random.set_seed(42)

def train_model(data, target, input_size, cells, batchs = 16, epochs=200):
    model = getLSTM(cells,input_size,n_chunks)
    X_train, X_val, y_train, y_val = train_test_split(data, target, test_size=0.2, random_state=42)
    history = model.fit(np.array(X_train), np.array(y_train), epochs=epochs,
                        validation_data=(np.array(X_val), np.array(y_val)), 
                        batch_size=batchs, verbose=2, callbacks=[early_stopping])
    return model, history

## Test the model and print results

We are going to test the model with 20 % of the total videos. This videos have not been used to train the network. 

In [10]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Directorios
script_dir = os.getcwd()
training_dir = os.path.join(script_dir, "processedData/training/")
testing_dir = os.path.join(script_dir, "processedData/testing/")
graphs_dir = os.path.join(script_dir, "graphs/")
os.makedirs(graphs_dir, exist_ok=True)

# Rango de LSTM cells
cells_start = 1
cells_end = 10
cells_list = list(range(cells_start, cells_end))  # 1 to 9 cells

for filename in os.listdir(training_dir):
    print(f"Processing CNN Model: {filename}")
    cnn_name = filename.split('.')[0]  # Nombre base para gráficas/CSV

    # Cargar datos
    data, target = process_alldata(os.path.join(training_dir, filename))
    data_test, target_test = process_alldata(os.path.join(testing_dir, filename))

    model_metrics = []   # Métricas por época (para gráficas)
    final_metrics = []   # Métricas finales por configuración (para CSV)

    for cells in cells_list:
        print(f"  → LSTM cells: {cells}")
        model, history = train_model(data, target, data[0].shape[1], cells)

        # Guardar métricas por época
        for epoch, (loss, acc, val_loss, val_acc) in enumerate(zip(
            history.history['loss'],
            history.history['accuracy'],
            history.history['val_loss'],
            history.history['val_accuracy']
        ), start=1):
            model_metrics.append({
                'cnn_model': cnn_name,
                'lstm_cells': cells,
                'epoch': epoch,
                'train_loss': loss,
                'train_accuracy': acc,
                'val_loss': val_loss,
                'val_accuracy': val_acc
            })

        # Evaluar en conjunto de prueba
        test_loss, test_accuracy = model.evaluate(np.array(data_test), np.array(target_test), verbose=0)

        # Guardar métricas finales
        final_metrics.append({
            'Celdas Bi-LSTM': cells,
            'Perdida entrenamiento': round(history.history['loss'][-1], 2),
            'Accuracy entrenamiento': round(history.history['accuracy'][-1], 2),
            'Perdida validacion': round(history.history['val_loss'][-1], 2),
            'Accuracy validacion': round(history.history['val_accuracy'][-1], 2),
            'Perdida prueba': round(test_loss, 2),
            'Accuracy prueba': round(test_accuracy, 2)
        })

    # Crear y guardar métricas finales en CSV
    df_final = pd.DataFrame(final_metrics)
    df_final.to_csv(os.path.join(graphs_dir, f"{cnn_name}_metrics.csv"), index=False)

    # Crear DataFrame con métricas por época
    df_model = pd.DataFrame(model_metrics)

    # === Encontrar la mejor configuración ===
    best_index = df_final['Accuracy prueba'].idxmax()
    best_cells = df_final.loc[best_index, 'Celdas Bi-LSTM']
    best_model_subset = df_model[df_model['lstm_cells'] == best_cells]

    # Insertar punto inicial ficticio para curvas
    first_row = pd.DataFrame({'epoch': [0], 'val_accuracy': [0], 'val_loss': [1]})
    best_model_subset_val_acc = pd.concat([first_row, best_model_subset[['epoch', 'val_accuracy']]], ignore_index=True)
    best_model_subset_val_loss = pd.concat([first_row, best_model_subset[['epoch', 'val_loss']]], ignore_index=True)

    # === Graficar Accuracy ===
    plt.figure(figsize=(10, 6))
    plt.plot(best_model_subset_val_acc['epoch'], best_model_subset_val_acc['val_accuracy'],
             label=f'{best_cells} cells (best)', color='green')
    plt.title(f'Best Validation Accuracy Score Curve - {cnn_name}')
    plt.xlabel('Epoch')
    plt.ylabel('Validation Accuracy')
    plt.ylim(bottom=0)
    plt.grid(True)
    plt.legend()
    plt.savefig(os.path.join(graphs_dir, f"{cnn_name}_best_val_accuracy.png"), dpi=300)
    plt.close()

    # === Graficar Loss ===
    plt.figure(figsize=(10, 6))
    plt.plot(best_model_subset_val_loss['epoch'], best_model_subset_val_loss['val_loss'],
             label=f'{best_cells} cells (best)', color='red')
    plt.title(f'Best Validation Loss Curve - {cnn_name}')
    plt.xlabel('Epoch')
    plt.ylabel('Validation Loss')
    plt.ylim(bottom=0)
    plt.grid(True)
    plt.legend()
    plt.savefig(os.path.join(graphs_dir, f"{cnn_name}_best_val_loss.png"), dpi=300)
    plt.close()


Processing CNN Model: efficientnetb0.h5
  → LSTM cells: 1
Epoch 1/200
20/20 - 7s - loss: 0.5860 - accuracy: 0.8031 - val_loss: 0.3970 - val_accuracy: 0.8750 - 7s/epoch - 356ms/step
Epoch 2/200
20/20 - 0s - loss: 0.2997 - accuracy: 0.9031 - val_loss: 0.2288 - val_accuracy: 0.9125 - 435ms/epoch - 22ms/step
Epoch 3/200
20/20 - 0s - loss: 0.2024 - accuracy: 0.9406 - val_loss: 0.2688 - val_accuracy: 0.8875 - 404ms/epoch - 20ms/step
Epoch 4/200
20/20 - 0s - loss: 0.1744 - accuracy: 0.9469 - val_loss: 0.1933 - val_accuracy: 0.9250 - 371ms/epoch - 19ms/step
Epoch 5/200
20/20 - 0s - loss: 0.1358 - accuracy: 0.9625 - val_loss: 0.1607 - val_accuracy: 0.9500 - 313ms/epoch - 16ms/step
Epoch 6/200
20/20 - 0s - loss: 0.1085 - accuracy: 0.9656 - val_loss: 0.1650 - val_accuracy: 0.9500 - 316ms/epoch - 16ms/step
Epoch 7/200
20/20 - 0s - loss: 0.0851 - accuracy: 0.9812 - val_loss: 0.1879 - val_accuracy: 0.9375 - 311ms/epoch - 16ms/step
Epoch 8/200
20/20 - 0s - loss: 0.0663 - accuracy: 0.9844 - val_loss: 