In [1]:
import os
import cv2
import torch
import timm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from typing import Callable, Union, Tuple
from torchcam.methods import GradCAM
from torch.utils.data import Dataset, DataLoader
from IPython.display import clear_output
import ipywidgets as widgets

In [2]:
# 데이터셋 불러오기
test_df = pd.read_csv('/home/taeyoung4060ti/바탕화면/level1-imageclassification-cv-01/data/train_original.csv')

In [3]:
# timm 모델 생성
model = timm.create_model('resnext50_32x4d', pretrained=False,num_classes=500)

# 체크포인트 로드 (PyTorch Lightning에서 저장한 ckpt 파일)
ckpt = torch.load('/home/taeyoung4060ti/바탕화면/level1-imageclassification-cv-01/result/resnext50_32x4d-64-0.0005-AdamW-O-v2_09-21_6/fold4/epoch=19-step=2920.ckpt')
state_dict = {'.'.join(key.split('.')[2:]): val for key, val in ckpt['state_dict'].items()}

# state_dict에서 키를 수정하여 fc.1 -> fc로 변경
new_state_dict = {}
for key, val in state_dict.items():
    if 'fc.1' in key:
        new_key = key.replace('fc.1', 'fc')
    else:
        new_key = key
    new_state_dict[new_key] = val

# 새로운 state_dict로 모델 가중치 로드
model.load_state_dict(new_state_dict)
model.cuda()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act1): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act1): ReLU(inplace=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (drop_block): Identity()
      (act2): ReLU(inplace=True)
      (aa): Identity()
      (conv3): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_

In [4]:
from torchvision import transforms

common_transforms = transforms.Compose([  # transforms.Compose로 감싸줘야 함
    transforms.Resize((224, 224)),  # 이미지를 224x224 크기로 리사이즈
    transforms.ToTensor(),  # 이미지를 PyTorch 텐서로 변환
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406], 
        std=[0.229, 0.224, 0.225]
    )  # 정규화
])

In [5]:


class CustomDataset(Dataset):
    def __init__(self, root_dir, info_df, transform, is_inference):
        self.root_dir = root_dir
        self.transform = transform
        self.is_inference = is_inference
        self.info_df = info_df
        self.image_paths = self.info_df['image_path'].tolist()
        
        if not self.is_inference:
            self.targets = self.info_df['target'].tolist()

    def __len__(self) -> int:
        return len(self.image_paths)

    def __getitem__(self, index: int) -> Union[Tuple[torch.Tensor, int], torch.Tensor]:
        img_path = os.path.join(self.root_dir, self.image_paths[index])
        image = cv2.imread(img_path, cv2.IMREAD_COLOR)  # 이미지를 numpy로 로드
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # BGR에서 RGB로 변환
        
        # numpy 배열을 PIL 이미지로 변환
        image = Image.fromarray(image)

        # 변환 적용
        image = self.transform(image)  # 'image=image' 대신 'image'만 전달

        if self.is_inference:
            return image
        else:
            target = self.targets[index]
            return image, target, index

In [6]:
train_dataset = CustomDataset('./data/train_original', test_df, common_transforms, False)
train_dataloader = DataLoader(train_dataset, batch_size=64, num_workers=8, shuffle=False)

In [7]:
# GradCAM 추출기 및 모델, 디바이스 등은 함수 외부에서 미리 설정
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
target_layer = model.layer4[-1]
cam_extractor = GradCAM(model, target_layer)
model.eval()  # 모델을 평가 모드로 설정

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (act1): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (act1): ReLU(inplace=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (drop_block): Identity()
      (act2): ReLU(inplace=True)
      (aa): Identity()
      (conv3): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_

In [8]:
# Grad-CAM 결과와 예측값, 실제값을 저장할 리스트
gradcam_images = []
original_images = []
overlay_images = []
predicted_labels = []  # 예측값을 저장할 리스트
true_labels = []  # 실제값을 저장할 리스트

# 전체 데이터셋에 대해 Grad-CAM 계산
for batch in train_dataloader:
    inputs, targets, _ = batch  # inputs와 targets을 함께 가져옴
    inputs = inputs.to(device)
    targets = targets.to(device)
    
    outputs = model(inputs)
    _, preds = torch.max(outputs, 1)

    # 예측값과 실제값 비교 후, 예측이 틀린 경우에만 진행
    for i in range(inputs.size(0)):
        if preds[i] != targets[i]:  # 예측값과 실제값이 다를 때만 실행
            # Grad-CAM 계산
            cam = cam_extractor(preds[i].item(), outputs[i].unsqueeze(0), retain_graph=True)[0]
            cam = cam.mean(dim=0).cpu().numpy()
            cam = cv2.resize(cam, (inputs[i].shape[2], inputs[i].shape[1]))
            cam = (cam - cam.min()) / (cam.max() - cam.min())
            cam = np.uint8(255 * cam)
            cam = cv2.applyColorMap(cam, cv2.COLORMAP_JET)
            cam = cv2.cvtColor(cam, cv2.COLOR_BGR2RGB)

            # 원본 이미지 준비
            input_image = inputs[i].cpu().numpy().transpose((1, 2, 0))
            input_image = (input_image - input_image.min()) / (input_image.max() - input_image.min())
            input_image = (input_image * 255).astype(np.uint8)

            # Grad-CAM 오버레이 이미지 생성
            overlay = cv2.addWeighted(input_image, 0.5, cam, 0.5, 0)

            # 결과 저장
            gradcam_images.append(cam)
            original_images.append(input_image)
            overlay_images.append(overlay)
            predicted_labels.append(preds[i].item())  # 예측값 저장
            true_labels.append(targets[i].item())  # 실제값 저장

  cam = (cam - cam.min()) / (cam.max() - cam.min())
  cam = np.uint8(255 * cam)


In [9]:
# 슬라이더로 이미지를 시각화하는 함수
def plot_gradcam(value):
    value = int(value)  # 슬라이더 값 추출
    clear_output(wait=True)  # 이전 출력 제거
    fig, ax = plt.subplots(1, 3, figsize=(18, 6))  # 새 그림 생성

    # 미리 계산된 이미지를 시각화
    ax[0].imshow(original_images[value])
    ax[0].set_title(f"Original Image\nTrue Label: {true_labels[value]}, Predicted Label: {predicted_labels[value]}")
    ax[0].axis("off")

    ax[1].imshow(gradcam_images[value])
    ax[1].set_title(f"Grad-CAM Image\nTrue Label: {true_labels[value]}, Predicted Label: {predicted_labels[value]}")
    ax[1].axis("off")

    ax[2].imshow(overlay_images[value])
    ax[2].set_title(f"Overlay Image\nTrue Label: {true_labels[value]}, Predicted Label: {predicted_labels[value]}")
    ax[2].axis("off")

    plt.tight_layout()
    plt.show()

In [10]:
# 슬라이더 생성 (이미지 개수에 맞게 max 값을 설정)
slider = widgets.IntSlider(min=0, max=len(gradcam_images) - 1, step=1, description='Image Index')

# 슬라이더와 plot_gradcam 함수 연결
widgets.interact(plot_gradcam, value=slider)

interactive(children=(IntSlider(value=0, description='Image Index', max=701), Output()), _dom_classes=('widget…

<function __main__.plot_gradcam(value)>