# Grad-CAM
- https://github.com/betashort/pytorch-grad-cam/blob/master/gradcam.py

In [51]:
import torch
import torch.nn as nn
import torch.optim as optim

from torch.autograd import Function

## ResNet
- https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py

## PyTorch's Global Pooling
https://www.テクめも.com/entry/pytorch-pooling


- adaptive_max_pool2d(x, (1,1)
- adaptive_avg_pool2d(x, (1,1))

ResNetのConv層=>Dense層は、adaptive_avg_pool2d((1,1))になっているので、Global Average Poolingである。

In [None]:
# Can work with any model, but it assumes that the model has a
# feature method, and a classifier method,
# as in the VGG models in torchvision.
model = models.resnet50(pretrained=True)


grad_cam = GradCam(model=model, 
                   feature_module=model.layer4,
                   target_layer_names=["2"], 
                   use_cuda=args.use_cuda)

#feature_module => ResNetのlayer4は、最終のCov層
#https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py
#target_layer_names =>
#use_cuda => GPUの話

#==== 画像の読み込みと前処理 ====
img = cv2.imread(args.image_path, 1)
img = np.float32(cv2.resize(img, (224, 224))) / 255
input = preprocess_image(img)

#==== Grad-CAM =====
# If None, returns the map for the highest scoring category.
# Otherwise, targets the requested index.

#画像のターゲットが分からなかったら、Noneにしておく

target_index = None
mask = grad_cam(input, target_index)

show_cam_on_image(img, mask)

#==== Guided BackPropagation ====
gb_model = GuidedBackpropReLUModel(model=model, use_cuda=args.use_cuda)
print(model._modules.items())

#gbはnumpyで返却される => Transposeで軸を入れ替える
gb = gb_model(input, index=target_index)
gb = gb.transpose((1, 2, 0))

#mask画像を3channelの画像にする
cam_mask = cv2.merge([mask, mask, mask])
#Grad-CAMとGuided BackPropagationの結果を掛け合わせる
cam_gb = deprocess_image(cam_mask*gb)
gb = deprocess_image(gb)

cv2.imwrite('gb.jpg', gb)
cv2.imwrite('cam_gb.jpg', cam_gb)

# \_\_call\_\_とは？

- https://qiita.com/ko-da-k/items/439d8cc3a0424c45214a

クラスインスタンスを関数として呼び出すことができる

\_\_init\_\_は、インスタンス生成時に呼び出される

In [59]:
class A:
    def __init__(self, x):
        self.x = x
        print("__init__:", self.x)
        
    def __call__(self, y):
        self.y = y
        print("__call__:", self.y)
    
    def a_method(self, z):
        self.z = z
        print("a_func:", self.z)
        

In [60]:
a = A(1)

__init__: 1


In [61]:
a(2)

__call__: 2


In [62]:
a.a_method(3)

a_func: 3


# FeatureExtractor

In [None]:
class FeatureExtractor():
    """ Class for extracting activations and 
    registering gradients from targetted intermediate layers """

    def __init__(self, model, target_layers):
        self.model = model
        self.target_layers = target_layers
        self.gradients = []

    def save_gradient(self, grad):
        self.gradients.append(grad)

    def __call__(self, x):
        outputs = []
        self.gradients = []
        #self.model._modules.items() => 
        #https://blog.snowhork.com/2018/08/pytorch-parameters
        #torch.nn.Moduleは， __init__ 時に，
        #_parameters と _modules というインスタンス変数を OrderedDict で初期化しています
        #_modulesは、変数名,　構成する層が解ってくる
        
        for name, module in self.model._modules.items():
            x = module(x)
            
            #register_hookとは？
            #特定の学習時間におけるパラメータの特定の誤差を記録することができる
            #勾配記録を
            if name in self.target_layers:
                x.register_hook(self.save_gradient)
                outputs += [x]
        return outputs, x

# self.model._modulesとは？

In [4]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv1d(1, 6, 5)
        self.bn1 = nn.BatchNorm1d(6)
        self.conv2 = nn.Conv1d(6, 16, 1)
        self.bn2 = nn.BatchNorm1d(16)
        self.fc1 = nn.Linear(16 * 996, 120)
        self.bn3 = nn.BatchNorm1d(120)
        self.fc2 = nn.Linear(120, 84)
        self.bn4 = nn.BatchNorm1d(84)
        self.fc3 = nn.Linear(84, 4)

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = torch.flatten(x, 1)
        x = F.relu(self.bn3(self.fc1(x)))
        x = F.relu(self.bn4(self.fc2(x)))
        x = F.relu(self.fc3(x))
        return x

In [5]:
model = Net()

In [10]:
for i, s in model._modules.items():
    print(i, s)

conv1 Conv1d(1, 6, kernel_size=(5,), stride=(1,))
bn1 BatchNorm1d(6, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
conv2 Conv1d(6, 16, kernel_size=(1,), stride=(1,))
bn2 BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
fc1 Linear(in_features=15936, out_features=120, bias=True)
bn3 BatchNorm1d(120, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
fc2 Linear(in_features=120, out_features=84, bias=True)
bn4 BatchNorm1d(84, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
fc3 Linear(in_features=84, out_features=4, bias=True)


# register_hookとは？

- https://www.kaggle.com/sironghuang/understanding-pytorch-hooks

特定の学習時間におけるパラメータの特定の誤差を記録すること(勾配記録)ができる

by : https://discuss.pytorch.org/t/what-are-hooks-used-for/40020

In [40]:
x = torch.randn(1, 1)
w = torch.randn(1, 1, requires_grad=True)
y = torch.randn(1, 1)

In [41]:
w.register_hook(lambda x, param_name="w_params" : print(param_name, x))

<torch.utils.hooks.RemovableHandle at 0x7fcbc519f4d0>

In [42]:
out = x * w

In [43]:
loss = (out - y)**2

In [44]:
# same script as above
my_param = "Loss"
loss.register_hook(lambda x, my_param=my_param: print(my_param, x))
loss.mean().backward(gradient=torch.tensor(0.1)) 

Loss tensor([[0.1000]])
w_params tensor([[0.0715]])


# ModelOutputs

In [None]:
class ModelOutputs():
    """ Class for making a forward pass, and getting:
    1. The network output.
    2. Activations from intermeddiate targetted layers.
    3. Gradients from intermeddiate targetted layers. """

    def __init__(self, model, feature_module, target_layers):
        self.model = model
        self.feature_module = feature_module
        self.feature_extractor = FeatureExtractor(self.feature_module, target_layers)

    def get_gradients(self):
        return self.feature_extractor.gradients

    def __call__(self, x):
        target_activations = []
        for name, module in self.model._modules.items():
            if module == self.feature_module:
                target_activations, x = self.feature_extractor(x)
            elif "avgpool" in name.lower():
                x = module(x)
                x = x.view(x.size(0),-1)
            else:
                x = module(x)
        
        return target_activations, x


# Grad-CAM

In [None]:
class GradCam:
    def __init__(self, model, feature_module, target_layer_names, use_cuda):
        #model => resnet50
        #feature_module=model.layer4
        #target_layer_names => ["2"]

        self.model = model
        self.feature_module = feature_module
        self.model.eval()
        self.cuda = use_cuda
        if self.cuda:
            self.model = model.cuda()
            
         #ModelOutputs => FeatureExtractor()
        self.extractor = ModelOutputs(self.model, self.feature_module, target_layer_names)

    
    
    def forward(self, input):
        # If None, returns the map for the highest scoring category.
        # Otherwise, targets the requested index.
        #target_index = None
        #input => image(画像)
        #mask = grad_cam(input, target_index)
        
        return self.model(input)

    def __call__(self, input, index=None):
        if self.cuda:
            features, output = self.extractor(input.cuda())
        else:
            features, output = self.extractor(input)

        if index == None:
            index = np.argmax(output.cpu().data.numpy())

        one_hot = np.zeros((1, output.size()[-1]), dtype=np.float32)
        
        #targetのインデックスに1をする
        one_hot[0][index] = 1
        #torch型に変換
        one_hot = torch.from_numpy(one_hot).requires_grad_(True)
        
        if self.cuda:
            one_hot = torch.sum(one_hot.cuda() * output)
        else:
            one_hot = torch.sum(one_hot * output)
            
        #lfeature_module
        self.feature_module.zero_grad()
        self.model.zero_grad()
        one_hot.backward(retain_graph=True)
        
        
        grads_val = self.extractor.get_gradients()[-1].cpu().data.numpy()

        target = features[-1]
        target = target.cpu().data.numpy()[0, :]

        weights = np.mean(grads_val, axis=(2, 3))[0, :]
        cam = np.zeros(target.shape[1:], dtype=np.float32)

        for i, w in enumerate(weights):
            cam += w * target[i, :, :]
            
        #ReLU関数
        cam = np.maximum(cam, 0)
        
        cam = cv2.resize(cam, input.shape[2:])
        
        #0~1に正規化
        cam = cam - np.min(cam)
        cam = cam / np.max(cam)
        
        return cam

# zero_grad()とは？

# Guided BackPropagation

# Guided BackPropagationReLUの定義

In [52]:
class GuidedBackpropReLU(Function):

    @staticmethod
    def forward(self, input):
        positive_mask = (input > 0).type_as(input)
        output = torch.addcmul(torch.zeros(input.size()).type_as(input), input, positive_mask)
        self.save_for_backward(input, output)
        return output

    @staticmethod
    def backward(self, grad_output):
        input, output = self.saved_tensors
        grad_input = None

        positive_mask_1 = (input > 0).type_as(grad_output)
        positive_mask_2 = (grad_output > 0).type_as(grad_output)
        grad_input = torch.addcmul(torch.zeros(input.size()).type_as(input),
                                   torch.addcmul(torch.zeros(input.size()).type_as(input), grad_output,
                                                 positive_mask_1), positive_mask_2)

        return grad_input


# torch.autograd.Functionとは？
- https://pytorch.org/tutorials/beginner/examples_autograd/two_layer_net_custom_function.html

## A. 新しい自動微分の関数を作るときに使う(継承)

# @staticmethodとは

- https://torch.classcat.com/2018/05/26/pytorch-docs-notes-extending/

foward と backward の両者は @staticmethod である

- https://qiita.com/msrks/items/fdc9afd12effc2cba1bc


- classmethod: クラス変数にアクセスすべきときや、継承クラスで動作が変わるべきときは classmethodを使おう。
- staticmethod: 継承クラスでも動作が変わらないときはstaticmethodを使おう

# Guided BackPropagationReLU Module

In [None]:
class GuidedBackpropReLUModel:
    def __init__(self, model, use_cuda):
        self.model = model
        self.model.eval()
        self.cuda = use_cuda
        if self.cuda:
            self.model = model.cuda()

        def recursive_relu_apply(module_top):
            for idx, module in module_top._modules.items():
                recursive_relu_apply(module)
                if module.__class__.__name__ == 'ReLU':
                    module_top._modules[idx] = GuidedBackpropReLU.apply
                
        # replace ReLU with GuidedBackpropReLU
        recursive_relu_apply(self.model)

    def forward(self, input):
        return self.model(input)

    def __call__(self, input, index=None):
        if self.cuda:
            output = self.forward(input.cuda())
        else:
            output = self.forward(input)

        if index == None:
            index = np.argmax(output.cpu().data.numpy())

        one_hot = np.zeros((1, output.size()[-1]), dtype=np.float32)
        one_hot[0][index] = 1
        one_hot = torch.from_numpy(one_hot).requires_grad_(True)
        if self.cuda:
            one_hot = torch.sum(one_hot.cuda() * output)
        else:
            one_hot = torch.sum(one_hot * output)

        # self.model.features.zero_grad()
        # self.model.classifier.zero_grad()
        one_hot.backward(retain_graph=True)

        output = input.grad.cpu().data.numpy()
        output = output[0, :, :, :]

        return output

# 画像に関するメソッド

## 画像の前処理

In [None]:
def preprocess_image(img):
    means = [0.485, 0.456, 0.406]
    stds = [0.229, 0.224, 0.225]

    preprocessed_img = img.copy()[:, :, ::-1]
    for i in range(3):
        preprocessed_img[:, :, i] = preprocessed_img[:, :, i] - means[i]
        preprocessed_img[:, :, i] = preprocessed_img[:, :, i] / stds[i]
    preprocessed_img = \
        np.ascontiguousarray(np.transpose(preprocessed_img, (2, 0, 1)))
    preprocessed_img = torch.from_numpy(preprocessed_img)
    preprocessed_img.unsqueeze_(0)
    input = preprocessed_img.requires_grad_(True)
    return input

## 画像とCAMを重ねる(元の画像+CAMのヒートマップ)

In [None]:
def show_cam_on_image(img, mask):
    heatmap = cv2.applyColorMap(np.uint8(255 * mask), cv2.COLORMAP_JET)
    heatmap = np.float32(heatmap) / 255
    cam = heatmap + np.float32(img)
    cam = cam / np.max(cam)
    cv2.imwrite("cam.jpg", np.uint8(255 * cam))

## 画像を前処理の前に戻す

In [None]:
def deprocess_image(img):
    """ see https://github.com/jacobgil/keras-grad-cam/blob/master/grad-cam.py#L65 """
    img = img - np.mean(img)
    img = img / (np.std(img) + 1e-5)
    img = img * 0.1
    img = img + 0.5
    img = np.clip(img, 0, 1)
    return np.uint8(img*255)