## Importing Libraries and Modules

In [1]:
import os
import numpy as np
import cv2
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv3D, MaxPooling3D
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split




## Define the path to the dataset directory

In [2]:
dataset_dir = "Real Life Violence Dataset"

# Define the paths to the "Violence" and "NonViolence" subfolders
violence_dir = os.path.join(dataset_dir, "Violence")
non_violence_dir = os.path.join(dataset_dir, "NonViolence")

## Define constants and configuration

In [3]:
# Define constants and configuration
IMG_SIZE = (224, 224)
NUM_FRAMES = 16
BATCH_SIZE = 32
EPOCHS = 15

## Function to extract video frames and preprocess 

In [5]:
# Function to extract video frames and preprocess them
def extract_frames(video_path, num_frames=NUM_FRAMES):
    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_indices = np.linspace(0, frame_count - 1, num_frames, dtype=int)
    frames = []
    for idx in frame_indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
        ret, frame = cap.read()
        if ret:
            frame = cv2.resize(frame, IMG_SIZE)
            frames.append(frame)
    cap.release()
    return frames

## Load and preprocess the dataset

In [6]:
X = []
y = []
for label, label_dir in [("Violence", violence_dir), ("NonViolence", non_violence_dir)]:
    for video_filename in os.listdir(label_dir):
        video_path = os.path.join(label_dir, video_filename)
        frames = extract_frames(video_path)
        if len(frames) == NUM_FRAMES:
            X.append(frames)
            y.append(1 if label == "Violence" else 0)
X = np.array(X)
y = np.array(y)

## Split data into train, validation, and test sets

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42)

## Define the 3D CNN model

In [8]:
model = Sequential([
    Conv3D(32, kernel_size=(3, 3, 3), activation='relu', input_shape=(NUM_FRAMES,) + IMG_SIZE + (3,)),
    MaxPooling3D(pool_size=(2, 2, 2)),
    Conv3D(64, kernel_size=(3, 3, 3), activation='relu'),
    MaxPooling3D(pool_size=(2, 2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])




## Train the model

In [9]:

model.fit(X_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_data=(X_val, y_val))


Epoch 1/15


Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.src.callbacks.History at 0x1eb0913c990>

## Evaluate the model

In [52]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}, Test Accuracy: {accuracy}")

Test Loss: 1.0837340354919434, Test Accuracy: 0.7925000190734863


## Extract and display frames from a video

In [53]:
def display_frames(video_path, num_frames=5):
    frames = extract_frames(video_path, num_frames)
    for i, frame in enumerate(frames):
        cv2.imshow(f"Frame {i+1}", frame)
        cv2.waitKey(1000)  # Display each frame for 1 second
        cv2.destroyAllWindows()



## Function to display video

In [54]:
from IPython.display import Video

def display_video(video_path):
    return Video(video_path, width=640, height=480, embed=True)

## Example usage

In [55]:
video_path = "V_999.mp4"
#display_frames(video_path)
display_video(video_path)

## Function to predict violence based on frames

In [58]:
def predict_violence(video_path):
    frames = extract_frames(video_path)
    X = np.array([frames])
    prediction = model.predict(X)
    if prediction > 0.5:
        return "Violent"
    else:
        return "Non-violent"
    
video_path = os.path.join(violence_dir, "V_999.mp4")

# Predict whether the video is violent or not
prediction = predict_violence(video_path)
print("Prediction:", prediction)

Prediction: Violent


### Hence the Model is Classifying videos with an accuracy of around: 80%