In [None]:
import tensorflow as tf
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.models import Model
import numpy as np
import cv2
import os
import xml.etree.ElementTree as ET

# Define the path to the directory containing the JPG and XML files
train_directory_path = r'/content/drive/MyDrive/thesis/dataset/roboflow-cnn/train'
valid_directory_path = r'/content/drive/MyDrive/thesis/dataset/roboflow-cnn/valid'

# Define the batch size
batch_size = 32

# Define the input shape of the model
input_shape = (224, 224, 3)

In [None]:
# Load the VGG16 model
base_model = VGG16(weights='imagenet', include_top=False, input_shape=input_shape)

# Add a custom output layer for bounding box regression (4 values)
x = base_model.output
x = Flatten()(x)
x = Dense(4, activation='linear')(x)  # 4 values for xmin, ymin, xmax, ymax
model = Model(inputs=base_model.input, outputs=x)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
# Compile the model
model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), metrics=['accuracy'])
save_best = tf.keras.callbacks.ModelCheckpoint("/content/drive/MyDrive/thesis/cnn_models/VGG16Modelv1.h5", monitor='accuracy', save_best_only=True, verbose=1)

In [None]:
# Define a data generator that yields batches of images and their corresponding annotations
def data_generator(directory_path, batch_size, input_shape):
    while True:
        data = []
        train_annotation = []
        for filename in os.listdir(directory_path):
            if filename.endswith('.jpg'):
                # Extract the file path of the JPG image
                image_path = os.path.join(directory_path, filename)
                # Construct the file path of the corresponding XML annotation file
                xml_path = os.path.join(directory_path, filename[:-4] + '.xml')

                # Parse the XML annotation file
                tree = ET.parse(xml_path)
                root = tree.getroot()
                # Check if the bndbox element is present
                bndbox = root.find('object/bndbox')
                if bndbox is not None:
                    # Extract the bounding box coordinates
                    xmin = int(bndbox.find('xmin').text)
                    xmax = int(bndbox.find('xmax').text)
                    ymin = int(bndbox.find('ymin').text)
                    ymax = int(bndbox.find('ymax').text)
                    # Normalize the bounding box coordinates to the range [0, 1]
                    train_annotation.append([xmin / input_shape[0], ymin / input_shape[1],
                                             xmax / input_shape[0], ymax / input_shape[1]])

                    # Load the image
                    image = cv2.resize(cv2.imread(image_path), input_shape[:2])
                    data.append(image)

                    # Yield the batch when the data and train_annotation lists are of length batch_size
                    if len(data) == batch_size:
                        yield np.array(data), np.array(train_annotation)
                        data = []
                        train_annotation = []

        # Yield the last batch if it is smaller than batch_size
        if len(data) > 0:
            yield np.array(data), np.array(train_annotation)


In [None]:
# Train the model
model.fit(data_generator(train_directory_path, batch_size, input_shape),
          steps_per_epoch=len(os.listdir(train_directory_path)) // batch_size,
          epochs=25,
          verbose=1,
          validation_data=data_generator(valid_directory_path, batch_size, input_shape),
          validation_steps=len(os.listdir(valid_directory_path)) // batch_size,
          callbacks=[save_best])

Epoch 1/25
Epoch 1: accuracy improved from -inf to 0.57587, saving model to /content/drive/MyDrive/thesis/cnn_models/VGG16Modelv1.h5
Epoch 2/25
Epoch 2: accuracy improved from 0.57587 to 0.65591, saving model to /content/drive/MyDrive/thesis/cnn_models/VGG16Modelv1.h5
Epoch 3/25
Epoch 3: accuracy improved from 0.65591 to 0.69585, saving model to /content/drive/MyDrive/thesis/cnn_models/VGG16Modelv1.h5
Epoch 4/25
Epoch 4: accuracy improved from 0.69585 to 0.72734, saving model to /content/drive/MyDrive/thesis/cnn_models/VGG16Modelv1.h5
Epoch 5/25
Epoch 5: accuracy improved from 0.72734 to 0.74398, saving model to /content/drive/MyDrive/thesis/cnn_models/VGG16Modelv1.h5
Epoch 6/25
Epoch 6: accuracy improved from 0.74398 to 0.76395, saving model to /content/drive/MyDrive/thesis/cnn_models/VGG16Modelv1.h5
Epoch 7/25
Epoch 7: accuracy improved from 0.76395 to 0.77624, saving model to /content/drive/MyDrive/thesis/cnn_models/VGG16Modelv1.h5
Epoch 8/25
Epoch 8: accuracy improved from 0.77624 

<keras.callbacks.History at 0x7cc91c7f9e40>

In [21]:
import tensorflow as tf
import numpy as np
import cv2
import matplotlib.pyplot as plt
import os
import xml.etree.ElementTree as ET
import matplotlib.pyplot as plt

# Define a data generator that yields batches of test images and their corresponding annotations
def test_data_generator(directory_path, batch_size, input_shape):
    while True:
        data = []
        test_annotation = []
        for filename in os.listdir(directory_path):
            if filename.endswith('.jpg'):
                # Extract the file path of the JPG image
                image_path = os.path.join(directory_path, filename)
                # Construct the file path of the corresponding XML annotation file
                xml_path = os.path.join(directory_path, filename[:-4] + '.xml')

                # Parse the XML annotation file
                tree = ET.parse(xml_path)
                root = tree.getroot()
                # Check if the bndbox element is present
                bndbox = root.find('object/bndbox')
                if bndbox is not None:
                    # Extract the bounding box coordinates
                    xmin = int(bndbox.find('xmin').text)
                    xmax = int(bndbox.find('xmax').text)
                    ymin = int(bndbox.find('ymin').text)
                    ymax = int(bndbox.find('ymax').text)
                    # Normalize the bounding box coordinates to the range [0, 1]
                    test_annotation.append([xmin / input_shape[0], ymin / input_shape[1],
                                            xmax / input_shape[0], ymax / input_shape[1]])

                    # Load the image
                    image = cv2.resize(cv2.imread(image_path), input_shape[:2])
                    data.append(image)

                    # Yield the batch when the data and test_annotation lists are of length batch_size
                    if len(data) == batch_size:
                        yield np.array(data), np.array(test_annotation)
                        data = []
                        test_annotation = []

        # Yield the last batch if it is smaller than batch_size
        if len(data) > 0:
            yield np.array(data), np.array(test_annotation)


# Load the saved model
model_path = '/content/drive/MyDrive/thesis/cnn_models/VGG16Modelv1.h5'
Model = tf.keras.models.load_model(model_path)

# Define the path to the directory containing the test JPG files
test_directory_path = r'/content/drive/MyDrive/thesis/dataset/roboflow-cnn/test'

# Assuming you have 32 test samples, change this to the actual number of test samples
num_test_samples = 32

# Initialize the data generator for the test set
batch_size = 1  # Set the batch size for testing (you can adjust it as needed)
input_shape = (224, 224)  # Set the input shape based on the model's input requirements
# Get the list of image file names in the test directory
test_image_files = [filename for filename in os.listdir(test_directory_path) if filename.endswith('.jpg')]

test_generator = test_data_generator(test_directory_path, batch_size, input_shape)

# Loop through the test dataset and make predictions
for i in range(num_test_samples):
    images, annotations = next(test_generator)

    predictions = Model.predict(images)

    image = images[0].astype('uint8')  # Assuming batch size is 1
    annotation = annotations[0]
    pred = predictions[0]

    # Convert the normalized coordinates to pixel values for 224x224 input shape
    xmin, ymin, xmax, ymax = [int(anno * input_shape[i // 2]) for i, anno in enumerate(annotation)]
    xmin1, ymin1, xmax1, ymax1 = [int(p * input_shape[i // 2]) for i, p in enumerate(pred)]

    # Determine the class label based on the prediction score
    class_label = "Violence" if pred[0] > 0.5 else "NonViolence"

    # Set the color of the bounding box based on the class label
    box_color = (0, 255, 0)  # Green color for NonViolence
    if class_label == "Violence":
        box_color = (255, 0, 0)  # Red color for Violence

    cv2.rectangle(image, (xmin1, ymin1), (xmax1, ymax1), box_color, 2)
    cv2.putText(image, class_label, (xmin1, ymin1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, box_color, 1)
    plt.imshow(image)
    plt.title('Prediction from VGG16Modelv1')
    plt.show()

    box1 = [xmin, ymin, xmax, ymax]  # coordinates of the ground truth bounding box
    box2 = [xmin1, ymin1, xmax1, ymax1]  # coordinates of the predicted bounding box

    # Function to calculate the Intersection over Union (IoU)
    def calculate_iou(box1, box2):
        x5 = max(box1[0], box2[0])
        y5 = max(box1[1], box2[1])
        x6 = min(box1[2], box2[2])
        y6 = min(box1[3], box2[3])

        # calculate the area of intersection
        inter_area = max(0, x6 - x5) * max(0, y6 - y5)

        # calculate the area of union
        box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
        box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
        union_area = box1_area + box2_area - inter_area

        # calculate the IoU
        iou = inter_area / union_area
        return iou

    iou = calculate_iou(box1, box2)
    print(f"IoU for image {i+1}: {iou}")

Output hidden; open in https://colab.research.google.com to view.

In [25]:
import tensorflow as tf
import numpy as np
import cv2
import os
import xml.etree.ElementTree as ET

# Define a data generator that yields batches of frames and their corresponding annotations
def video_data_generator(video_path, batch_size, input_shape):
    cap = cv2.VideoCapture(video_path)
    while True:
        data = []
        video_frame = []
        while len(data) < batch_size:
            ret, frame = cap.read()
            if not ret:
                break  # Break the loop if the video ends

            video_frame.append(frame)
            if len(video_frame) == batch_size:
                # Preprocess the frames and add them to the data list
                data.extend([cv2.resize(frame, input_shape[:2]) for frame in video_frame])
                video_frame = []

        if len(data) == 0:
            break

        yield np.array(data)

    cap.release()

# Load the saved model
model_path = '/content/drive/MyDrive/thesis/cnn_models/VGG16Modelv1.h5'
Model = tf.keras.models.load_model(model_path)

# Define the path to the video file
video_path = '/content/drive/MyDrive/thesis/dataset/mixed_videos/NV_1.mp4'

# Initialize the data generator for the video
batch_size = 1  # Set the batch size for testing (you can adjust it as needed)
input_shape = (224, 224)  # Set the input shape based on the model's input requirements

video_generator = video_data_generator(video_path, batch_size, input_shape)

# Define the output video path
output_video_path = '/content/drive/MyDrive/thesis/cnn_models/NV_1_output.mp4'

# Initialize the VideoWriter
output_shape = (input_shape[1], input_shape[0])  # Width, Height
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
output_video = cv2.VideoWriter(output_video_path, fourcc, 30.0, output_shape)

# Loop through the video frames and make predictions
frame_idx = 0
try:
    while True:
        frames = next(video_generator)
        if len(frames) == 0:
            break  # Break the loop if there are no more frames

        predictions = Model.predict(frames)

        for frame_idx, frame in enumerate(frames):
            pred = predictions[frame_idx]

            # Convert the normalized coordinates to pixel values for 224x224 input shape
            xmin, ymin, xmax, ymax = [int(p * input_shape[i // 2]) for i, p in enumerate(pred)]

            # Determine the class label based on the prediction score
            class_label = "Violence" if pred[0] > 0.5 else "NonViolence"

            # Set the color of the bounding box based on the class label
            box_color = (0, 255, 0)  # Green color for NonViolence
            if class_label == "Violence":
                box_color = (255, 0, 0)  # Red color for Violence

            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), box_color, 2)
            cv2.putText(frame, class_label, (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, box_color, 1)

            # Write the frame with annotations to the output video
            output_video.write(frame)

except StopIteration:
    pass

# Release the video writer and close the output video file
output_video.release()

