In [1]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.utils import to_categorical, Sequence
from tensorflow.keras import layers, models, optimizers
from sklearn.utils import shuffle

from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau ,EarlyStopping

In [2]:
class_labels = ['abuse', 'arrest', 'arson', 'assault', 'burglary', 'explosion', 'fighting', 
                'normal', 'roadaccidents', 'robbery', 'shooting', 'shoplifting', 'stealing', 'vandalism']

# Dataset directories
data_dir = '/kaggle/input/real-time-anomaly-detection-in-cctv-surveillance/data'  # Replace with correct path
batch_size = 1
epochs = 30
img_size = (224, 224)
frame_count = 64
num_classes = len(class_labels)


In [3]:
class VideoDataGenerator(Sequence):
    def __init__(self, data_dir, class_labels, batch_size=8, img_size=(224, 224), frame_count=64, 
                 shuffle=True, max_videos_per_class=None):
        self.data_dir = data_dir
        self.batch_size = batch_size
        self.img_size = img_size
        self.frame_count = frame_count
        self.shuffle = shuffle
        self.class_labels = class_labels
        self.max_videos_per_class = max_videos_per_class
        self.video_paths, self.labels = self._load_video_paths()
        self.on_epoch_end()
    
    def _load_video_paths(self):
        video_paths = []
        labels = []
        for label in self.class_labels:
            folder_path = os.path.join(self.data_dir, label)
            count = 0  # Initialize a counter for the number of videos added
            for file in os.listdir(folder_path):
                if file.endswith('.mp4'):
                    if self.max_videos_per_class is None or count < self.max_videos_per_class:
                        video_paths.append(os.path.join(folder_path, file))
                        labels.append(self.class_labels.index(label))
                        count += 1  # Increment the counter
                if self.max_videos_per_class is not None and count >= self.max_videos_per_class:
                    break  # Stop if we reached the maximum for this class
        return video_paths, labels
    
    def __len__(self):
        return len(self.video_paths) // self.batch_size

    def __getitem__(self, index):
        batch_video_paths = self.video_paths[index * self.batch_size:(index + 1) * self.batch_size]
        batch_labels = self.labels[index * self.batch_size:(index + 1) * self.batch_size]
        X, y = self._load_videos(batch_video_paths, batch_labels)
        return X, y

    def _load_videos(self, video_paths, labels):
        X = np.empty((self.batch_size, self.frame_count, *self.img_size, 3), dtype=np.float32)
        y = np.empty((self.batch_size), dtype=int)
        
        for i, video_path in enumerate(video_paths):
            frames = self._load_video_frames(video_path)
            X[i] = frames
            y[i] = labels[i]
        
        return X, to_categorical(y, num_classes=len(self.class_labels))

    def _load_video_frames(self, video_path):
        cap = cv2.VideoCapture(video_path)
        frames = []
        total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        step = max(1, total_frames // self.frame_count)
        for i in range(0, total_frames, step):
            cap.set(cv2.CAP_PROP_POS_FRAMES, i)
            ret, frame = cap.read()
            if not ret:
                break
            frame = cv2.resize(frame, self.img_size)
            frame = frame / 255.0
            frames.append(frame)
            if len(frames) == self.frame_count:
                break
        cap.release()
        # If not enough frames, pad with last frame
        while len(frames) < self.frame_count:
            frames.append(frames[-1])
        return np.array(frames)

    def on_epoch_end(self):
        if self.shuffle:
            self.video_paths, self.labels = shuffle(self.video_paths, self.labels)

In [4]:
# Define the maximum number of videos per class to use as a subset
max_videos_per_class = 10  # Set this to the desired subset size

# Initialize the training data generator with the subset
train_generator = VideoDataGenerator(
    data_dir=data_dir, 
    class_labels=class_labels, 
    batch_size=batch_size,
    img_size=img_size, 
    frame_count=frame_count, 
    shuffle=True,
    max_videos_per_class=max_videos_per_class
)

# Initialize the validation data generator with the subset
val_generator = VideoDataGenerator(
    data_dir=data_dir, 
    class_labels=class_labels, 
    batch_size=batch_size,
    img_size=img_size, 
    frame_count=frame_count, 
    shuffle=False,
    max_videos_per_class=max_videos_per_class
)


In [5]:
from tensorflow.keras.layers import Conv3D, BatchNormalization, MaxPooling3D, GlobalAveragePooling3D, Dense, Dropout, Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import tensorflow as tf

def create_3d_cnn_model(num_classes):
    inputs = Input(shape=(64, 224, 224, 3))  # 64 frames, 224x224 image size, 3 channels (RGB)

    # First 3D convolutional block
    x = Conv3D(64, kernel_size=(3, 7, 7), activation='relu', padding="same")(inputs)
    x = BatchNormalization()(x)
    x = MaxPooling3D(pool_size=(1, 2, 2))(x)

    # Second 3D convolutional block
    x = Conv3D(128, kernel_size=(3, 5, 5), activation='relu', padding="same")(x)
    x = BatchNormalization()(x)
    x = MaxPooling3D(pool_size=(2, 2, 2))(x)

    # Third 3D convolutional block
    x = Conv3D(256, kernel_size=(3, 3, 3), activation='relu', padding="same")(x)
    x = BatchNormalization()(x)
    x = MaxPooling3D(pool_size=(2, 2, 2))(x)

    # Fourth 3D convolutional block
    x = Conv3D(512, kernel_size=(3, 3, 3), activation='relu', padding="same")(x)
    x = BatchNormalization()(x)
    x = MaxPooling3D(pool_size=(2, 2, 2))(x)

    # Global pooling and output layer
    x = GlobalAveragePooling3D()(x)
    x = Dropout(0.5)(x)
    outputs = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs, outputs)
    return model

# Define class labels
class_labels = ["abuse", "arrest", "arson", "assault", "burglary", "explosion", "fighting", "normal", "roadaccidents", "robbery", "shooting", "shoplifting", "stealing", "vandalism"]

num_classes = len(class_labels)
model_3dcnn = create_3d_cnn_model(num_classes)

# Compile the model
model_3dcnn.compile(optimizer=Adam(learning_rate=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])
model_3dcnn.summary()


In [6]:
checkpoint = ModelCheckpoint('best_model.keras', monitor='val_accuracy', save_best_only=True, mode='max')
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)


In [7]:
history = model_3dcnn.fit(train_generator,
                    epochs=epochs,
                    validation_data=val_generator,
                    callbacks=[checkpoint, early_stopping])

# Summary of the model
model_3dcnn.summary()

Epoch 1/30


  self._warn_if_super_not_called()
I0000 00:00:1730831445.796638      66 service.cc:145] XLA service 0x7c0be00136e0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1730831445.796695      66 service.cc:153]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
2024-11-05 18:30:49.935970: E external/local_xla/xla/service/slow_operation_alarm.cc:65] Trying algorithm eng0{} for conv (f32[1,64,64,224,224]{4,3,2,1,0}, u8[0]{0}) custom-call(f32[1,3,64,224,224]{4,3,2,1,0}, f32[64,3,3,7,7]{4,3,2,1,0}, f32[64]{0}), window={size=3x7x7 pad=1_1x3_3x3_3}, dim_labels=bf012_oi012->bf012, custom_call_target="__cudnn$convBiasActivationForward", backend_config={"operation_queue_id":"0","wait_on_operation_queues":[],"cudnn_conv_backend_config":{"conv_result_scale":1,"activation_mode":"kRelu","side_input_scale":0,"leakyrelu_alpha":0}} is taking a while...
2024-11-05 18:30:50.437685: E external/local_xla/xla/service/slow_operation_alarm

[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m671s[0m 4s/step - accuracy: 0.0909 - loss: 3.1725 - val_accuracy: 0.1000 - val_loss: 3.1314
Epoch 2/30
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m614s[0m 4s/step - accuracy: 0.1145 - loss: 2.6911 - val_accuracy: 0.0857 - val_loss: 3.6590
Epoch 3/30
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m605s[0m 4s/step - accuracy: 0.0852 - loss: 2.6005 - val_accuracy: 0.1286 - val_loss: 2.9542
Epoch 4/30
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m605s[0m 4s/step - accuracy: 0.0900 - loss: 2.6231 - val_accuracy: 0.0857 - val_loss: 3.3583
Epoch 5/30
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m605s[0m 4s/step - accuracy: 0.1974 - loss: 2.6103 - val_accuracy: 0.1500 - val_loss: 2.9943
Epoch 6/30
[1m140/140[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m606s[0m 4s/step - accuracy: 0.2086 - loss: 2.5520 - val_accuracy: 0.1214 - val_loss: 3.0117
Epoch 7/30
[1m140/140[0m [32m━

In [8]:
model_save_path = 'trained_3dcnn_model.h5'
print("\nTraining Metrics:")
for key, values in history.history.items():
    print(f"{key}: {values[-1]}")

# Save the final model
model_3dcnn.save(model_save_path)
print(f"\nModel saved at {model_save_path}")

# To print out the training and validation accuracy and loss for each epoch
train_loss = history.history['loss']
train_accuracy = history.history['accuracy']
val_loss = history.history['val_loss']
val_accuracy = history.history['val_accuracy']

print("\nDetailed Epoch Metrics:")
for i in range(len(train_loss)):
    print(f"Epoch {i+1}:")
    print(f"  - Training loss: {train_loss[i]:.4f}, Training accuracy: {train_accuracy[i]:.4f}")
    print(f"  - Validation loss: {val_loss[i]:.4f}, Validation accuracy: {val_accuracy[i]:.4f}")


Training Metrics:
accuracy: 0.16428571939468384
loss: 2.3651695251464844
val_accuracy: 0.12857143580913544
val_loss: 3.471968412399292

Model saved at trained_3dcnn_model.h5

Detailed Epoch Metrics:
Epoch 1:
  - Training loss: 2.9582, Training accuracy: 0.0857
  - Validation loss: 3.1314, Validation accuracy: 0.1000
Epoch 2:
  - Training loss: 2.7307, Training accuracy: 0.0857
  - Validation loss: 3.6590, Validation accuracy: 0.0857
Epoch 3:
  - Training loss: 2.6901, Training accuracy: 0.1000
  - Validation loss: 2.9542, Validation accuracy: 0.1286
Epoch 4:
  - Training loss: 2.6251, Training accuracy: 0.1000
  - Validation loss: 3.3583, Validation accuracy: 0.0857
Epoch 5:
  - Training loss: 2.6555, Training accuracy: 0.1357
  - Validation loss: 2.9943, Validation accuracy: 0.1500
Epoch 6:
  - Training loss: 2.5717, Training accuracy: 0.1714
  - Validation loss: 3.0117, Validation accuracy: 0.1214
Epoch 7:
  - Training loss: 2.5472, Training accuracy: 0.1929
  - Validation loss: 2.9