런타임 유형 GPU로 변경

In [None]:
%cd drive/MyDrive/gradcam-visualization/

/content/drive/MyDrive/gradcam-visualization


In [None]:
%ls

[0m[01;34m_examples[0m/                   [01;34mpytorch_grad_cam[0m/  [01;34m_results[0m/
gradcam_visualization.ipynb  README.md


라이브러리 설치 및 불러오기

In [None]:
!pip install timm
!pip install ttach

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting timm
  Downloading timm-0.6.12-py3-none-any.whl (549 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m549.1/549.1 KB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub
  Downloading huggingface_hub-0.12.0-py3-none-any.whl (190 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.3/190.3 KB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: huggingface-hub, timm
Successfully installed huggingface-hub-0.12.0 timm-0.6.12
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting ttach
  Downloading ttach-0.0.3-py3-none-any.whl (9.8 kB)
Installing collected packages: ttach
Successfully installed ttach-0.0.3


In [None]:
import math
import os
import argparse
import cv2
import numpy as np
import torch
import timm

from pytorch_grad_cam import GradCAM, \
    ScoreCAM, \
    GradCAMPlusPlus, \
    AblationCAM, \
    XGradCAM, \
    EigenCAM, \
    EigenGradCAM, \
    LayerCAM, \
    FullGrad
from pytorch_grad_cam.utils.image import show_cam_on_image, \
    preprocess_image
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
from pytorch_grad_cam.ablation_layer import AblationLayerVit

사용 가능한 pretrained model list 확인

In [None]:
timm.list_models('convnext*')

In [None]:
timm.list_models('resnet*')

테스트 이미지 및 method 설정

In [None]:
def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--use-cuda', action='store_true', default=True,
                        help='Use NVIDIA GPU acceleration')
    parser.add_argument(
        '--image-path',
        type=str,
        default='_examples',
        help='Input image folder name')
    parser.add_argument(
        '--image-name',
        type=str,
        default='horses.jpeg',
        help='Input image file name')
    parser.add_argument('--aug_smooth', action='store_true',
                        help='Apply test time augmentation to smooth the CAM')
    parser.add_argument(
        '--eigen_smooth',
        action='store_true',
        help='Reduce noise by taking the first principle componenet'
        'of cam_weights*activations')
    parser.add_argument(
        '--method',
        type=str,
        default='gradcam',
        choices=['gradcam', 'gradcam++', 'scorecam', 'xgradcam', 'ablationcam',
                 'eigencam', 'eigengradcam', 'layercam', 'fullgrad'])

    args = parser.parse_args(args=[])
    args.use_cuda = args.use_cuda and torch.cuda.is_available()
    if args.use_cuda:
        print('Using GPU for acceleration')
    else:
        print('Using CPU for computation')

    return args

tensor를 (B, C, H, W) shape으로 변환하는 함수

In [None]:
def reshape_transform(tensor):
    # convnext torch.Size([1, 768, 7, 7])
    # resnet18 torch.Size([1, 512, 7, 7])
    # resnet50 torch.Size([1, 2048, 7, 7])
    # Wide_resnet50 torch.Size([1, 2048, 7, 7])
    # ResNext50 torch.Size([1, 2048, 7, 7])
    
    if len(tensor.size()) == 4:
      if tensor.size(1) == tensor.size(2):
        # tensor shape이 아래와 같이 [B, H, W, C]
        # ex) tensor.shape = torch.Size([1, 7, 7, 768])
        result = tensor.transpose(2, 3).transpose(1, 2)
        # result.shape = torch.Size([1, 768, 7, 7])
      elif tensor.size(2) == tensor.size(3):
        # tensor shape이 아래와 같이 [B, C, H, W]
        # ex) tensor.shape = torch.Size([1, 768, 7, 7])
        result = tensor

    elif len(tensor.size()) == 3:
      if math.sqrt(tensor.size(1)) % 1 == 0:
        height = width = int(math.sqrt(tensor.size(1)))
        result = tensor.reshape(tensor.size(0),
                                height, width, tensor.size(2))
      else:
        height = width = int(math.sqrt(tensor.size(1)-1))
        result = tensor[:, 1:, :].reshape(tensor.size(0),
                                          height, width, tensor.size(2))
      result = result.transpose(2, 3).transpose(1, 2)
    
    return result

In [None]:
if __name__ == '__main__':
    """ python swinT_example.py -image-path <path_to_image>
    Example usage of using cam-methods on a SwinTransformers network.
    """

    args = get_args()
    methods = \
        {"gradcam": GradCAM,
         "scorecam": ScoreCAM,
         "gradcam++": GradCAMPlusPlus,
         "ablationcam": AblationCAM,
         "xgradcam": XGradCAM,
         "eigencam": EigenCAM,
         "eigengradcam": EigenGradCAM,
         "layercam": LayerCAM,
         "fullgrad": FullGrad}

    if args.method not in list(methods.keys()):
        raise Exception(f"method should be one of {list(methods.keys())}")

    rgb_img = cv2.imread(os.path.join(args.image_path, args.image_name), 1)
    org_img = cv2.resize(rgb_img, (224, 224))
    rgb_img = rgb_img[:, :, ::-1]
    rgb_img = cv2.resize(rgb_img, (224, 224))
    rgb_img = np.float32(rgb_img) / 255
    input_tensor = preprocess_image(rgb_img,
                                    mean=[0.5, 0.5, 0.5],
                                    std=[0.5, 0.5, 0.5])
    
    for model_name in ["ResNet50", "ViT", "SwinT", "ConvNext"]:
      if model_name == "ResNet50":
        model = timm.create_model('resnet50', pretrained=True)
        target_layers = [model.layer4]
      elif model_name == "ViT":
        model = timm.create_model('vit_base_patch16_224', pretrained=True)
        target_layers = [model.blocks[-1].norm1]
      elif model_name == "SwinT":
        model = timm.create_model('swin_base_patch4_window7_224', pretrained=True)
        target_layers = [model.layers[-1].blocks[-1].norm2]
      elif model_name == "ConvNext":
        model = timm.create_model('convnext_base', pretrained=True)
        target_layers = [model.stages[-1].blocks[-1].norm]
      
      model.eval()

      if args.use_cuda:
          model = model.cuda()

      if args.method not in methods:
          raise Exception(f"Method {args.method} not implemented")

      if args.method == "ablationcam":
          cam = methods[args.method](model=model,
                                    target_layers=target_layers,
                                    use_cuda=args.use_cuda,
                                    reshape_transform=reshape_transform,
                                    ablation_layer=AblationLayerVit())
      else:
          cam = methods[args.method](model=model,
                                    target_layers=target_layers,
                                    use_cuda=args.use_cuda,
                                    reshape_transform=reshape_transform)

      # AblationCAM and ScoreCAM have batched implementations.
      # You can override the internal batch size for faster computation.
      cam.batch_size = 32

      # 특정 class C에 대한 결과를 확인하려면 아래와 같이 설정
      # targets=[ClassifierOutputTarget(C의 class index)]
      # targets=None이면 classification score가 가장 높은 클래스에 대한 결과를 보여줌
      grayscale_cam = cam(input_tensor=input_tensor,
                          targets=None,
                          eigen_smooth=args.eigen_smooth,
                          aug_smooth=args.aug_smooth)

      # Here grayscale_cam has only one image in the batch
      grayscale_cam = grayscale_cam[0, :]

      cam_image = show_cam_on_image(rgb_img, grayscale_cam)
      org_img = np.hstack((org_img, cam_image))

    cv2.imwrite('_results/result_{}.jpg'.format(args.image_name.split(".")[0]),
                org_img)

Using GPU for acceleration
