# Import module

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils import data

import torchvision
from torchvision import transforms, datasets
import matplotlib.pyplot as plt
import numpy as np

import os
import cv2

# Define Target Directory

In [2]:
if not os.path.exists('./grad_cam_result'):
    os.mkdir('grad_cam_result')
    
target_dir='./grad_cam_result'

In [3]:
device = "cuda" if torch.cuda.is_available() else "cpu"
learning_rate = 1e-4
epochs = 1
batch_size = 256
log_interval = 20


# Prepare dataset

In [4]:
transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),    
])

In [5]:
cifar_train = torchvision.datasets.CIFAR10('./data/CIFAR10', train=True, download=True, transform=transform)
cifar_test = torchvision.datasets.CIFAR10('./data/CIFAR10', train=False, download=True, transform=transform)

Files already downloaded and verified
Files already downloaded and verified


In [6]:
train_loader = data.DataLoader(dataset=cifar_train, shuffle=True, batch_size=batch_size)
test_loader = data.DataLoader(dataset=cifar_test, shuffle=False, batch_size=batch_size)

# Define model

In [7]:
class VGG(nn.Module):
    def __init__(self):
        super(VGG, self).__init__()
        
        self.vgg = torchvision.models.vgg11(pretrained=True)
        
        self.features_conv = self.vgg.features[:20]
        
        self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
        
        self.classifier = self.vgg.classifier
        
        self.classifier[6] = nn.Linear(in_features=4096, out_features=10, bias=True)
        
        self.gradients = None
    
    def activations_hook(self, grad):
        self.gradients = grad
    
    def forward(self, x):
        x = self.features_conv(x)
        
        if self.train and x.requires_grad: # only for train, register hook
            x.register_hook(self.activations_hook)
        
        x = self.max_pool(x)
        x = x.view(x.size(0),-1)
        x = self.classifier(x)
        
        return x
    
    def get_activations_gradient(self):
        return self.gradients
    
    def get_activations(self, x):
        return self.features_conv(x)
    


In [8]:
vgg = VGG().to(device)

In [9]:
print(vgg)

VGG(
  (vgg): VGG(
    (features): Sequential(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU(inplace=True)
      (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (4): ReLU(inplace=True)
      (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (7): ReLU(inplace=True)
      (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (9): ReLU(inplace=True)
      (10): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (11): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (12): ReLU(inplace=True)
      (13): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (14): ReLU(inplace=True)
      (15): MaxPool2d(kernel_size=2, str

In [10]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(vgg.parameters(), lr=learning_rate)

# Training model

In [None]:
for epoch in range(epochs):
    vgg.train()
    
    loss_value = 0
    matches = 0
    for idx, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)
        
        outs = vgg(images)
        preds = torch.argmax(outs, dim=-1)
        loss = criterion(outs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        loss_value += loss.item()
        matches += (preds == labels).sum().item()
        
        if (idx + 1) % log_interval == 0:
            train_loss = loss_value / log_interval
            train_acc = matches / batch_size / log_interval
            print(
                f"Epoch[{epoch}/{epochs}]({idx + 1}/{len(train_loader)}) || "
                f"training loss {train_loss:4.4} || training accuracy {train_acc:4.2%}"
            )

            loss_value = 0
            matches = 0
    
    with torch.no_grad():
        vgg.eval()
        
        val_loss_items = []
        val_acc_items = []
        
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            
            outs = vgg(images)
            preds = torch.argmax(outs, dim=-1)
            loss = criterion(outs, labels)
            
            loss_item = loss.item()
            acc_item = (preds==labels).sum().item()
            
            val_loss_items.append(loss_item)
            val_acc_items.append(acc_item)
            
        val_loss = np.sum(val_loss_items) / len(test_loader)
        val_acc = np.sum(val_acc_items) / len(test_loader.dataset)
        
        print(f"epoch:[{epoch}/{epochs}] val_acc : {val_acc:4.2%}, val_loss: {val_loss:4.2} ")       

Epoch[0/1](20/196) || training loss 1.553 || training accuracy 45.45%
Epoch[0/1](40/196) || training loss 0.8202 || training accuracy 70.72%
Epoch[0/1](60/196) || training loss 0.6037 || training accuracy 79.24%
Epoch[0/1](80/196) || training loss 0.5439 || training accuracy 81.43%
Epoch[0/1](100/196) || training loss 0.4746 || training accuracy 83.38%
Epoch[0/1](120/196) || training loss 0.4268 || training accuracy 84.73%
Epoch[0/1](140/196) || training loss 0.4235 || training accuracy 85.49%
Epoch[0/1](160/196) || training loss 0.3927 || training accuracy 86.60%
Epoch[0/1](180/196) || training loss 0.3689 || training accuracy 87.52%


# Gard-CAM

In [None]:
def generate_grad_cam(model, img, target_dir='./', index=0):
    
    model.eval()
    
    gradients = model.get_activations_gradient()
    
    pooled_gradients = torch.mean(gradients, dim=[0,2,3])
    
    img = img.to(device)
    
    img = img.unsqueeze(0)
    
    activations = model.get_activations(img).detach()
    
    for i in range(img.size(1)):
        activations[:,i,:,:] += pooled_gradients[i]

    heatmap = torch.mean(activations, dim=1).squeeze().cpu()
    heatmap = np.maximum(heatmap, 0)
    heatmap /= torch.max(heatmap) 
    
    img = img[0].cpu().permute(1, 2, 0).numpy()
    resized_heatmap = heatmap.numpy()
    resized_heatmap = cv2.resize(resized_heatmap, (img.shape[1], img.shape[0]))
    resized_heatmap = np.uint8(255 * resized_heatmap)
    resized_heatmap = cv2.applyColorMap(resized_heatmap, cv2.COLORMAP_JET)
    
    cv2.imwrite(os.path.join(target_dir, f'heatmap_{index}.jpg'), resized_heatmap)
    
    img = np.uint8(255 * img)
    superimposed_img = (resized_heatmap) * 0.4 + img
    
    cv2.imwrite(os.path.join(target_dir, f'original_{index}.jpg'), img)
    cv2.imwrite(os.path.join(target_dir, f'blending_{index}.jpg'), superimposed_img)


In [None]:
images, _ = next(iter(test_loader))

print(images.shape)

In [None]:
generate_grad_cam(vgg, images[3], target_dir, index=0)

In [None]:
original_img = cv2.imread(os.path.join(target_dir, 'original_0.jpg'))
plt.imshow(original_img)

In [None]:
heatmap_img = cv2.imread(os.path.join(target_dir, 'heatmap_0.jpg'))
plt.imshow(heatmap_img)

In [None]:
blending_img = cv2.imread(os.path.join(target_dir, 'blending_0.jpg'))
plt.imshow(blending_img)