In [12]:
# Cell 1: Importing necessary libraries

import cv2
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Bidirectional, BatchNormalization, Dropout, Dense
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from sklearn.metrics import classification_report


In [5]:

def extract_frame_features(frame, pretrained_model):
    # Expand the dimensions of the frame for model compatibility
    img = np.expand_dims(frame, axis=0)
    # Use the pre-trained feature extraction model to obtain the feature vector
    feature_vector = pretrained_model.predict(img, verbose=0)
    # Return the extracted feature vector
    return feature_vector


In [7]:

def extract_video_frames(video_path, sequence_length=16, image_width=299, image_height=299):
    frames_list = []
    # Open the video file for reading
    video_reader = cv2.VideoCapture(video_path)
    # Get the total number of frames in the video
    video_frames_count = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
    # Calculate the number of frames to skip in order to achieve the desired sequence length
    skip_frames_window = max(int(video_frames_count / sequence_length), 1)

    # Loop through each frame in the sequence
    for frame_counter in range(sequence_length):
        # Set the position of the video reader to the current frame
        video_reader.set(cv2.CAP_PROP_POS_FRAMES, frame_counter * skip_frames_window)
        # Read the frame
        success, frame = video_reader.read()
        # Break if unable to read the frame
        if not success:
            break
        # Convert the frame to RGB and resize it
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        resized_frame = cv2.resize(frame_rgb, (image_height, image_width))
        # Append the resized frame to the frames list
        frames_list.append(resized_frame)
    
    # Release the video reader
    video_reader.release()
    # Return the list of frames
    return frames_list


In [9]:

def extract_features_from_videos(video_paths, total_videos, pretrained_model):
    all_video_features = []
    # Loop through each video
    for pos in tqdm(range(total_videos)):
        frames_list = []
        # Extract frames from the current video
        frames = extract_video_frames(video_paths[pos])
        # Extract features from each frame
        for frame in frames:
            features = extract_frame_features(frame, pretrained_model)
            frames_list.append(features)
        all_video_features.append(frames_list)
    return np.array(all_video_features)


In [13]:
# Define violence and non-violence directories
import os
violence_dir = '/kaggle/input/real-life-violence-situations-dataset/Real Life Violence Dataset/Violence'
nonviolence_dir = '/kaggle/input/real-life-violence-situations-dataset/Real Life Violence Dataset/NonViolence'

# Create paths to individual videos
violence_path = [os.path.join(violence_dir, name) for name in os.listdir(violence_dir)]
nonviolence_path = [os.path.join(nonviolence_dir, name) for name in os.listdir(nonviolence_dir)]

# Extract features from videos
violence_features = extract_features_from_videos(violence_path[:500], len(violence_path[:500]), pretrained_model)
non_violence_features = extract_features_from_videos(nonviolence_path[:500], len(nonviolence_path[:500]), pretrained_model)

# Save extracted features

np.save('/kaggle/working/violence_features.npy', violence_features)
np.save('/kaggle/working/non_violence_features.npy', non_violence_features)


100%|██████████| 500/500 [35:00<00:00,  4.20s/it]
 99%|█████████▉| 497/500 [30:00<00:13,  4.37s/it][h264 @ 0x5c5d1e0a1a40] mb_type 104 in P slice too large at 98 31
[h264 @ 0x5c5d1e0a1a40] error while decoding MB 98 31
[h264 @ 0x5c5d1e0a1a40] mb_type 104 in P slice too large at 98 31
[h264 @ 0x5c5d1e0a1a40] error while decoding MB 98 31
[h264 @ 0x5c5d1e0a1a40] mb_type 104 in P slice too large at 98 31
[h264 @ 0x5c5d1e0a1a40] error while decoding MB 98 31
[h264 @ 0x5c5d1e0a1a40] mb_type 104 in P slice too large at 98 31
[h264 @ 0x5c5d1e0a1a40] error while decoding MB 98 31
100%|██████████| 500/500 [30:14<00:00,  3.63s/it]


In [14]:
# Load features and labels
violence_features = np.load('/kaggle/working/violence_features.npy')
non_violence_features = np.load('/kaggle/working/non_violence_features.npy')

# Creating labels
violence_labels = np.zeros(len(violence_features))
non_violence_labels = np.ones(len(non_violence_features))

# Combining features and labels
X = np.concatenate([violence_features, non_violence_features], axis=0)
y = np.concatenate([violence_labels, non_violence_labels], axis=0)

# Splitting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=32)

# Reshaping data for LSTM input
X_train_reshaped = X_train.reshape((X_train.shape[0], 16, 2048))
X_test_reshaped = X_test.reshape((X_test.shape[0], 16, 2048))


In [15]:
inputs = Input(shape=(16, 2048))
x = Bidirectional(LSTM(200, return_sequences=True))(inputs)
x = BatchNormalization()(x)
x = Dropout(0.3)(x)
x = Bidirectional(LSTM(100))(x)
x = BatchNormalization()(x)
x = Dropout(0.3)(x)
x = Dense(200, activation='relu')(x)
outputs = Dense(1, activation='sigmoid')(x)

model = Model(inputs=inputs, outputs=outputs)
model.summary()

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.fit(X_train_reshaped, y_train, validation_data=(X_test_reshaped, y_test), epochs=5, batch_size=32)


Epoch 1/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 199ms/step - accuracy: 0.5407 - loss: 0.8267 - val_accuracy: 0.4900 - val_loss: 0.6891
Epoch 2/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 169ms/step - accuracy: 0.5614 - loss: 0.7396 - val_accuracy: 0.5450 - val_loss: 0.6928
Epoch 3/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 169ms/step - accuracy: 0.6212 - loss: 0.6663 - val_accuracy: 0.5250 - val_loss: 0.6882
Epoch 4/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 183ms/step - accuracy: 0.6459 - loss: 0.6308 - val_accuracy: 0.5500 - val_loss: 0.6880
Epoch 5/5
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 172ms/step - accuracy: 0.6959 - loss: 0.5818 - val_accuracy: 0.5450 - val_loss: 0.6847


<keras.src.callbacks.history.History at 0x7a8f7b5d5b40>

In [16]:

accuracy = model.evaluate(X_test_reshaped, y_test)
print("Test Accuracy:", accuracy[1])


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step - accuracy: 0.5220 - loss: 0.6827
Test Accuracy: 0.5450000166893005


In [17]:

violence_features_test = extract_features_from_videos(violence_path[500:510], len(violence_path[500:510]), pretrained_model)
non_violence_features_test = extract_features_from_videos(nonviolence_path[500:510], len(nonviolence_path[500:510]), pretrained_model)

# Reshape the features for LSTM input
test_violence = violence_features_test.reshape((violence_features_test.shape[0], 16, 2048))
test_non_violence = non_violence_features_test.reshape((non_violence_features_test.shape[0], 16, 2048))

# Define class names
class_names = ['violence', 'non_violence']

# Predictions for test videos
predicted_non_violence = [class_names[1] if i > 0.5 else class_names[0] for i in model.predict(test_non_violence)]
predicted_violence = [class_names[1] if i > 0.5 else class_names[0] for i in model.predict(test_violence)]

print("Predicted labels for non-violence videos:", predicted_non_violence)
print("Predicted labels for violence videos:", predicted_violence)


100%|██████████| 10/10 [00:42<00:00,  4.30s/it]
100%|██████████| 10/10 [00:37<00:00,  3.71s/it]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 670ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
Predicted labels for non-violence videos: ['non_violence', 'violence', 'violence', 'non_violence', 'non_violence', 'non_violence', 'non_violence', 'non_violence', 'violence', 'violence']
Predicted labels for violence videos: ['violence', 'violence', 'violence', 'violence', 'non_violence', 'non_violence', 'violence', 'non_violence', 'violence', 'non_violence']


In [18]:
def classify_video(video_path, model, pretrained_model):
    """
    Classifies a single video as either 'violence' or 'non_violence'.
    
    Args:
    - video_path (str): The path to the video to be classified.
    - model: The trained LSTM model for classification.
    - pretrained_model: The pre-trained feature extraction model used for preparing the video data.
    
    Returns:
    - str: The predicted label for the video ('violence' or 'non_violence').
    """
    # Extract features from the given video
    video_features = extract_features_from_videos([video_path], 1, pretrained_model)
    
    # Reshape the features for LSTM input
    video_features_reshaped = video_features.reshape((1, 16, 2048))  # Assuming the model expects inputs of shape (batch_size, 16, 2048)
    
    # Make a prediction
    prediction = model.predict(video_features_reshaped)
    predicted_label = 'non_violence' if prediction > 0.5 else 'violence'
    
    # Return the predicted label
    return predicted_label

# Example usage:
# video_path = 'path/to/your/video.mp4'
label = 
print(f"The video is predicted to be: {label}")


100%|██████████| 1/1 [00:04<00:00,  4.24s/it]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 624ms/step
The video is predicted to be: non_violence


In [19]:
classify_video('/kaggle/input/real-life-violence-situations-dataset/Real Life Violence Dataset/NonViolence/NV_1000.mp4', model, pretrained_model)

100%|██████████| 1/1 [00:04<00:00,  4.17s/it]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step





'non_violence'