In [1]:
import torch
import torch.nn as nn
from torchvision import models

import cv2
import numpy as np

from PIL import Image, ImageDraw

In [25]:
def preprocess_image(img):
    means = [0.485, 0.456, 0.406]
    stds = [0.229, 0.224, 0.225]

    preprocessed_img = img[:, :, ::-1]
    
    for i in range(3):
        preprocessed_img[:, :, i] = preprocessed_img[:, :, i] - means[i]
        preprocessed_img[:, :, i] = preprocessed_img[:, :, i] / stds[i]
    preprocessed_img = \
        np.ascontiguousarray(np.transpose(preprocessed_img, (2, 0, 1)))
    preprocessed_img = torch.from_numpy(preprocessed_img)
    preprocessed_img.unsqueeze_(0)
    input = preprocessed_img.requires_grad_(True) # 입력에 대해서 gradient가 흐르게 만든다
    return input

input = preprocess_image(img)

<function preprocess_image at 0x00000229C8E3E948>


In [50]:
img = cv2.imread('both.png', 1)
img = np.float32(cv2.resize(img, (224, 224))) / 255
input = preprocess_image(img)
img[:,:,2].shape

<function preprocess_image at 0x00000229C8E3E948>


(224, 224)

In [4]:
def show_cam_on_image(img, mask):
    heatmap = cv2.applyColorMap(np.uint8(255 * mask), cv2.COLORMAP_JET)
    heatmap = np.float32(heatmap) / 255
    cam = heatmap + np.float32(img)
    cam = cam / np.max(cam)
    cv2.imwrite("cam.jpg", np.uint8(255 * cam))
    
    plt.imshow(cam)

In [None]:
# 처음에 Feature를 뽑아야함 -> 알파를 만들기 위한 작업을 해야댐.
# 마지막 conv layer에 대한 graident를 뽑기 위한 작업을 한다.

In [61]:
for name, module in model.layer4._modules.items():
    print(name)
    print(module)

0
Bottleneck(
  (conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
  (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
  (bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
  (bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (downsample): Sequential(
    (0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)
    (1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
)
1
Bottleneck(
  (conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
  (bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), 

In [None]:
class FeatureExtractor(): # feature를 뽑는 모듈
    # 모델의 마지막 layer만 뽑고 싶다. 그중에서는 0, 1, 2 -> 2번째 layer
    def __init__(self, model, target_layers): # model에 cnn 모델들이 들어간다. target_layer는 원하는 layer
        self.model = model
        self.target_layers = target_layers
        self.gradients = []
        
    def save_gradient(self, grad):
        self.gradients.append(grad) # hook이 실행될때마다 기울기가 저장된다. append
        
    # ModelOutputs 클래스의 call함수에서 이쪽에 있는 call함수를 부른다. 인자는 x를 넣음.
    def __call__(self, x): # layer에 대한 hook이 아닌 변수 x에 대한 hook
        outputs = []
        self.gradients = []
        for name, module in model.layer4._moodules.items(): # model(resnet의 마지막 layer)
            x = module(x)
            if name in self.target_layers:  # 마지막 layer의 마지막 conv
                x.register_hook(self.save_gradient)
                outputs += [x]
        return outputs, x
        
        

In [None]:
class ModelOutputs():
    def __init__(self, model, feature_module, target_layers): # 진짜 모델, 타켓 레이어(4번째), 타켓 중 몇번째 레이어(2번쨰)
        self.model = model
        self.feature_module = feature_module
        self.feature_extractor = FeatureExtractor(self.feature_module, target_layers)
        
    def get_gradients(self): # 저장된 gradient를 가져오는 함수
        return self.feature_extractor.gradients
        
    def __call__(self, x): 
        target_activations= []
        for name, module in self.model._modules.items(): # model(resnet)의 modules.item()
            if module == self.feature_module: # 마지막 layer일때 feature_extractor(x) 
                target_activations, x = self.feature_extractor(x) # return -> outputs 리스트, x
            elif 'avgpool' in name.lower():
                x = module(x)
                x = x.view(x.size(0), -1)
            else:
                x = module(x)
                
        return target_activations, x


In [None]:
extractor = ModelOutputs(model, model.layer4, ['2'])

In [None]:
class GradCam:
    def __init__(self, model, feature_module, target_layer_names, use_cuda):
        self.model = model
        self.feature_module = feature_module
        self.model.eval()
        self.cuda = use_cuda
        if self.cuda:
            self.model = model.cuda()
            
        self.extractor= ModelOutputs(self.model, self.feature_module, target_layer_names)
        
    def __call__(self, input, index=None):
        if self.cuda:
            features, output = self.extractor(input.cuda())
        else:
            features, output = self.extractor(input) # return -> target_activations 리스트, x
            
        if index == None:
            index = np.argmax(output.cpu().data.numpy())
            
        one_hot = np.zeros((1, output.size()[-1]), dtype=np.float32)
        one_hot[0][index]= 1
        one_hot= torch.from_numpy(one_hot).requires_grad_(True)
        
        if self.cuda:
            one_hot = torch.sum(one_hot.cuda() * output)
        else:
            one_hot = torch.sum(one_hot * output)
        
        self.feature_module.zero_grad()
        self.model.zero_grad()
        one_hot.backward(retain_graph = True)
        
        grads_val = self.extractor.get_gradients()[-1].cpu().data.numpy()
        
        target= features[-1]
        target= target.cpu().data.numpy()[0, :]
        
        weights = np.mean(grads_val, axis=(2,3))[0, :]
        cam = np.zeros(target.shape[1:], dtype=np.float32)
        
        for i, w in enumerate(weights):
            cam += w * target[i, :, :]
       
        cam = np.maximum(cam, 0)
        cam = cv2.resize(cam, input.shape[2:])
        cam = cam - np.min(cam)
        cam = cam / np.max(cam)
        return cam

AttributeError: 'float' object has no attribute 'cpu'

In [None]:
model = models.resnet50(pretrained=True) # imageNet으로 pretrained된 resnet 모델을 불러와서 사용

grad_cam = GradCam()

In [None]:
# 마지막 layer만 쓰고 싶다.


In [70]:
img = cv2.imread('both.png', 1)
img = np.float32(cv2.resize(img, (224, 224))) / 255
input = preprocess_image(img)

<function preprocess_image at 0x00000229C8E3E948>


1번 클래스: FeatureExtractor
뽑고 싶은 모델의 layer를 선택 -> layer안에서의 마지막 layer
gradient를 뽑기 위한 hook fuction을 사용 -> backward 할 때 사용한다
gradient 저장하는 함수를 만든다.
마지막 layer의 feature와 graident를 ㅃ뽀는다

2번 클래스: MOdelOoutputs
return 결과값에 대한 피쳐맵과 결과값이 나온다

3번 클래스: 알파와 피쳐맵 곱해줌