# Grad-CAM

In [2]:
import torch
import torch.nn.functional as F
from torchvision import models, transforms
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt

In [3]:
class GradCAM:
    def __init__(self, model):
        self.model = model
        self.model.eval()
        # Get the last convolutional layer
        self.target_layer = model.layer4[-1].conv3
        self.gradients = None
        self.activations = None
        
        # Register hooks
        def forward_hook(module, input, output):
            self.activations = output
            
        def backward_hook(module, grad_input, grad_output):
            self.gradients = grad_output[0]
            
        self.target_layer.register_forward_hook(forward_hook)
        self.target_layer.register_backward_hook(backward_hook)
        
    def generate_cam(self, input_image, target_class=None):
        # Forward pass
        output = self.model(input_image)
        
        if target_class is None:
            target_class = output.argmax(dim=1).item()
        
        # Zero gradients
        self.model.zero_grad()
        
        # Target for backprop
        one_hot = torch.zeros_like(output)
        one_hot[0][target_class] = 1
        
        # Backward pass
        output.backward(gradient=one_hot)
        
        # Get weights
        pooled_gradients = torch.mean(self.gradients, dim=[2, 3])
        
        # Weight the channels by corresponding gradients
        for i in range(self.activations.shape[1]):
            self.activations[:, i, :, :] *= pooled_gradients[0, i]
            
        # Generate heatmap
        heatmap = torch.mean(self.activations, dim=1).squeeze()
        heatmap = F.relu(heatmap)  # ReLU to only keep positive influence
        heatmap = heatmap / torch.max(heatmap)  # Normalize
        
        return heatmap.detach().cpu().numpy()


In [14]:
def preprocess_image(image_path):
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                           std=[0.229, 0.224, 0.225])
    ])
    
    image = Image.open(image_path)
    input_tensor = transform(image).unsqueeze(0)
    return input_tensor, image

def visualize_cam(image_path, model, layer_name):
    # Load and preprocess image
    input_tensor, original_image = preprocess_image(image_path)
    
    # Initialize GradCAM
    grad_cam = GradCAM(model,layer_name)
    
    # Generate heatmap
    heatmap = grad_cam.generate_cam(input_tensor)
    
    # Resize heatmap to match original image
    heatmap = np.uint8(255 * heatmap)
    heatmap = Image.fromarray(heatmap).resize(
        (original_image.size[0], original_image.size[1]),
        Image.Resampling.BILINEAR
    )
    
    # Convert to RGB for overlay
    heatmap = np.array(heatmap)
    heatmap = np.uint8(plt.cm.jet(heatmap)[..., :3] * 255)
    
    # Overlay heatmap on original image
    original_array = np.array(original_image)
    overlaid = 0.6 * original_array + 0.4 * heatmap
    overlaid = np.clip(overlaid, 0, 255).astype(np.uint8)
    
    # Display results
    plt.figure(figsize=(12, 4))
    
    plt.subplot(1, 3, 1)
    plt.imshow(original_array)
    plt.title('Original Image')
    plt.axis('off')
    
    plt.subplot(1, 3, 2)
    plt.imshow(heatmap)
    plt.title('Grad-CAM Heatmap')
    plt.axis('off')
    
    plt.subplot(1, 3, 3)
    plt.imshow(overlaid)
    plt.title('Overlay')
    plt.axis('off')
    
    plt.tight_layout()
    plt.show()


In [16]:
# Load pre-trained ResNet50 with latest weights
model = models.resnet50(pretrained=True)
model.eval()

# Define image paths
image_paths = [
    "./subject4/LIME/data/African_elephant/ILSVRC2012_val_00048781.JPEG",
    "./subject4/LIME/data/African_elephant/ILSVRC2012_val_00039678.JPEG",
    "./subject4/LIME/data/black_bear/ILSVRC2012_val_00014576.JPEG"
]

for image_path in image_paths:
    visualize_cam(image_path, model, layer_name='conv5_block3_out')

other_model = model = models.vgg16(pretrained=True)

for image_path in image_paths:
    visualize_cam(image_path, other_model)

NameError: name 'tf' is not defined

# Grad-CAM

In [None]:
class FEM:
    def __init__(self, model, k_sigma=2):
        self.model = model
        self.model.eval()
        self.k_sigma = k_sigma
        # Get the last convolutional layer
        self.target_layer = model.layer4[-1].conv3
        self.activations = None
        
        # Register forward hook
        def forward_hook(module, input, output):
            self.activations = output
            
        self.target_layer.register_forward_hook(forward_hook)
        
    def generate_heatmap(self, input_image):
        # Forward pass to get feature maps
        with torch.no_grad():
            output = self.model(input_image)
            predicted_class = output.argmax(dim=1).item()
        
        # Get feature maps from last conv layer
        feature_maps = self.activations.squeeze(0)  # Remove batch dimension
        
        # Calculate mean and std for each feature map
        means = []
        stds = []
        thresholded_maps = []
        
        # Process each feature map
        for feat_map in feature_maps:
            feat_map_np = feat_map.cpu().numpy()
            mean = np.mean(feat_map_np)
            std = np.std(feat_map_np)
            
            # Apply K-sigma thresholding
            threshold = mean + (self.k_sigma * std)
            thresholded = np.where(feat_map_np > threshold, feat_map_np, 0)
            thresholded_maps.append(thresholded)
            
            means.append(mean)
            stds.append(std)
        
        # Stack thresholded maps
        all_thresholded = np.stack(thresholded_maps)
        
        # Combine feature maps
        combined_map = np.sum(all_thresholded, axis=0)
        
        # Normalize
        if combined_map.max() != 0:
            combined_map = (combined_map - combined_map.min()) / (combined_map.max() - combined_map.min())
            
        return combined_ma

In [None]:
def visualize_fem(image_path, model, k_sigma=2):
    # Load and preprocess image
    input_tensor, original_image = preprocess_image(image_path)
    
    # Initialize FEM
    fem = FEM(model, k_sigma=k_sigma)
    
    # Generate heatmap
    heatmap, predicted_class = fem.generate_heatmap(input_tensor)
    
    # Get class name
    with open('imagenet_classes.txt') as f:
        classes = [line.strip() for line in f.readlines()]
    predicted_class_name = classes[predicted_class]
    
    # Resize heatmap to match original image
    heatmap_resized = Image.fromarray(np.uint8(255 * heatmap)).resize(
        (original_image.size[0], original_image.size[1]),
        Image.Resampling.BILINEAR
    )
    
    # Convert to RGB for overlay
    heatmap_colored = np.uint8(plt.cm.jet(np.array(heatmap_resized))[..., :3] * 255)
    
    # Create overlay
    original_array = np.array(original_image)
    overlaid = 0.7 * original_array + 0.3 * heatmap_colored
    overlaid = np.clip(overlaid, 0, 255).astype(np.uint8)
    
    # Visualization
    plt.figure(figsize=(15, 5))
    
    plt.subplot(1, 3, 1)
    plt.imshow(original_array)
    plt.title(f'Original Image\nPredicted: {predicted_class_name}')
    plt.axis('off')
    
    plt.subplot(1, 3, 2)
    plt.imshow(heatmap_colored)
    plt.title(f'FEM Heatmap\n(k={k_sigma}σ)')
    plt.axis('off')
    
    plt.subplot(1, 3, 3)
    plt.imshow(overlaid)
    plt.title('Overlay')
    plt.axis('off')
    
    plt.tight_layout()
    plt.show()
    
    return heatmap, predicted_class_name

In [None]:
def compare_fem_gradcam(image_path, model):
    from gradcam import GradCAM  # Import from previous implementation
    
    # Load and preprocess image
    input_tensor, original_image = preprocess_image(image_path)
    
    # Generate FEM heatmap
    fem = FEM(model)
    fem_heatmap, predicted_class = fem.generate_heatmap(input_tensor)
    
    # Generate Grad-CAM heatmap
    grad_cam = GradCAM(model)
    grad_cam_heatmap, _ = grad_cam.generate_cam(input_tensor)
    
    # Get class name
    with open('imagenet_classes.txt') as f:
        classes = [line.strip() for line in f.readlines()]
    predicted_class_name = classes[predicted_class]
    
    # Visualization
    plt.figure(figsize=(15, 8))
    
    # Original image
    plt.subplot(2, 3, 2)
    plt.imshow(np.array(original_image))
    plt.title(f'Original Image\nPredicted: {predicted_class_name}')
    plt.axis('off')
    
    # FEM results
    plt.subplot(2, 3, 4)
    plt.imshow(fem_heatmap, cmap='jet')
    plt.title('FEM Heatmap')
    plt.axis('off')
    
    # Grad-CAM results
    plt.subplot(2, 3, 5)
    plt.imshow(grad_cam_heatmap, cmap='jet')
    plt.title('Grad-CAM Heatmap')
    plt.axis('off')
    
    # Overlays
    fem_overlay = 0.7 * np.array(original_image) + 0.3 * np.uint8(plt.cm.jet(fem_heatmap)[..., :3] * 255)
    grad_cam_overlay = 0.7 * np.array(original_image) + 0.3 * np.uint8(plt.cm.jet(grad_cam_heatmap)[..., :3] * 255)
    
    plt.subplot(2, 3, 6)
    plt.imshow(np.clip(fem_overlay, 0, 255).astype(np.uint8))
    plt.title('FEM Overlay')
    plt.axis('off')
    
    plt.subplot(2, 3, 6)
    plt.imshow(np.clip(grad_cam_overlay, 0, 255).astype(np.uint8))
    plt.title('Grad-CAM Overlay')
    plt.axis('off')
    
    plt.tight_layout()
    plt.show()


In [None]:
model = models.resnet50(pretrained=True)

# Path to your African Elephant image
image_path = "./subject4/LIME/data/African_elephant/ILSVRC2012_val_00048781.JPEG"

# Generate and visualize FEM
visualize_fem(image_path, model)

# Compare with Grad-CAM
compare_fem_gradcam(image_path, model)