# CNN-LSTM with Grad-CAM

In [18]:
# Imports
import os
import cv2
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tqdm import tqdm
import matplotlib.cm as cm
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import (Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout,
                                     TimeDistributed, LSTM)
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam

## Preprocess videos

In [19]:
# Preprocessing pipeline (extract 10 frames per video)
def extract_frames(video_path, num_frames=10, size=(100, 100)):
    cap = cv2.VideoCapture(video_path)
    frames = []
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    step = max(total_frames // num_frames, 1)
    for i in range(0, total_frames, step):
        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
        ret, frame = cap.read()
        if ret:
            frame = cv2.resize(frame, size)
            frame = frame / 255.0
            frames.append(frame)
        if len(frames) == num_frames:
            break
    cap.release()
    return np.array(frames) if len(frames) == num_frames else None

In [20]:
# Load dataset from directory structure: dataset/ClassName/*.mp4
def load_dataset(data_dir):
    X, y = [], []
    class_map = {}
    for idx, class_name in enumerate(sorted(os.listdir(data_dir))):
        class_path = os.path.join(data_dir, class_name)
        if not os.path.isdir(class_path):
            continue
        class_map[idx] = class_name
        for video_file in tqdm(os.listdir(class_path), desc=f"{class_name}"):
            video_path = os.path.join(class_path, video_file)
            frames = extract_frames(video_path)
            if frames is not None:
                X.append(frames)
                y.append(idx)
    return np.array(X), to_categorical(y), class_map

In [None]:
X, y, class_map = load_dataset('../CKT Dataset')
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

## Define and train CNN-LSTM

In [27]:
# Define CNN-LSTM model
model = Sequential([
    Input(shape=(10, 100, 100, 3)),  # explicitly define input shape here
    TimeDistributed(Conv2D(16, (3, 3), strides=(2, 2), padding='same', activation='relu')),
    TimeDistributed(MaxPooling2D((4, 4), strides=(2, 2), padding='same')),
    TimeDistributed(Conv2D(32, (3, 3), strides=(2, 2), padding='same', activation='relu')),
    TimeDistributed(MaxPooling2D((4, 4), strides=(2, 2), padding='same')),
    TimeDistributed(Conv2D(64, (3, 3), strides=(2, 2), padding='same', activation='relu'), name='target_layer'),
    TimeDistributed(MaxPooling2D((2, 2), strides=(2, 2), padding='same')),
    TimeDistributed(Conv2D(128, (3, 3), strides=(2, 2), padding='same', activation='relu')),
    TimeDistributed(MaxPooling2D((2, 2), strides=(2, 2), padding='same')),
    TimeDistributed(Flatten()),
    LSTM(128),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(5, activation='softmax')
])
model.compile(loss='categorical_crossentropy', optimizer=Adam(1e-3), metrics=['accuracy'])

In [None]:
# Train CNN
model.fit(X_train, y_train, epochs=60, batch_size=16, validation_data=(X_val, y_val))

## Grad-CAM implementation and visualization

In [29]:
# For more details, check out: https://keras.io/examples/vision/grad_cam/
# Grad-CAM
def make_gradcam_heatmap(sequential_model, video, class_index, frame_index):
    # Recreate the functional version of the Sequential model
    input_layer = Input(shape=(10, 100, 100, 3))
    x = input_layer
    for layer in sequential_model.layers:
        x = layer(x)
        if layer.name == "target_layer":
            target_output = x
    output_layer = x

    grad_model = Model(inputs=input_layer, outputs=[target_output, output_layer])

    # Forward pass
    input_tensor = tf.convert_to_tensor(video[np.newaxis])
    with tf.GradientTape() as tape:
        conv_outputs, predictions = grad_model(input_tensor)
        loss = predictions[:, class_index]

    # Backpropagation
    grads = tape.gradient(loss, conv_outputs)[0]
    pooled_grads = tf.reduce_mean(grads, axis=(0, 1, 2))
    conv_output = conv_outputs[0][frame_index]
    heatmap = conv_output @ pooled_grads[..., tf.newaxis]
    heatmap = tf.squeeze(heatmap)
    heatmap = tf.maximum(heatmap, 0) / tf.reduce_max(heatmap)
    return heatmap.numpy()


In [None]:
# Visualize Grad-CAM
video = X_val[10]
predicted_class = np.argmax(model.predict(video[np.newaxis]), axis=1)[0]
heatmap = make_gradcam_heatmap(model, video, predicted_class, frame_index=0)
original_frame = video[0]
heatmap_resized = cv2.resize(heatmap, (100, 100))
overlay = cm.jet(heatmap_resized)[..., :3]
superimposed = (overlay * 0.4 + original_frame).clip(0, 1)

plt.figure(figsize=(10, 3))
plt.subplot(1, 3, 1); plt.title("Original"); plt.imshow(original_frame); plt.axis('off')
plt.subplot(1, 3, 2); plt.title("Heatmap"); plt.imshow(heatmap_resized, cmap='jet'); plt.axis('off')
plt.subplot(1, 3, 3); plt.title("Overlay"); plt.imshow(superimposed); plt.axis('off')
plt.tight_layout(); plt.show()

In [None]:
# Visualize Grad-CAM for all frames in video
video = X_val[70]  # shape: (10, 100, 100, 3)
predicted_class = np.argmax(model.predict(video[np.newaxis]), axis=1)[0]

# Generate heatmaps for each frame
heatmaps = []
for i in range(10):
    heatmap = make_gradcam_heatmap(model, video, predicted_class, frame_index=i)
    heatmaps.append(heatmap)

fig, axs = plt.subplots(10, 3, figsize=(12, 20))
for i in range(10):
    original_frame = video[i]
    heatmap_resized = cv2.resize(heatmaps[i], (100, 100))
    overlay = cm.jet(heatmap_resized)[..., :3]
    superimposed = (overlay * 0.4 + original_frame).clip(0, 1)

    axs[i, 0].imshow(original_frame)
    axs[i, 0].axis('off')
    axs[i, 0].set_title(f"Frame {i}")

    axs[i, 1].imshow(heatmap_resized, cmap='jet')
    axs[i, 1].axis('off')
    axs[i, 1].set_title(f"Heatmap {i}")

    axs[i, 2].imshow(superimposed)
    axs[i, 2].axis('off')
    axs[i, 2].set_title(f"Superimposed {i}")

plt.suptitle(f"Grad-CAM for Class: {predicted_class}", fontsize=16)
plt.tight_layout(rect=[0, 0.03, 1, 0.97])  # Leave space for title
plt.show()