In [2]:
import os
import sys
root = os.path.dirname(os.getcwd())
sys.path.append(root)
import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')
import torch, yaml, cv2, os, shutil
import numpy as np
np.random.seed(0)
import matplotlib.pyplot as plt
from tqdm import trange
from PIL import Image
from models.yolo import Model
from utils.augmentations import letterbox
from utils.general import xywh2xyxy, non_max_suppression
from models.experimental import attempt_load
from pytorch_grad_cam import GradCAMPlusPlus, GradCAM, XGradCAM, EigenCAM, HiResCAM, LayerCAM, RandomCAM, EigenGradCAM
from pytorch_grad_cam.utils.image import show_cam_on_image, scale_cam_image
from pytorch_grad_cam.activations_and_gradients import ActivationsAndGradients

# ModuleNotFoundError: No module named 'pytorch_grad_cam'
# pip install grad-cam

In [25]:
class ActivationsAndGradients:
    """ Class for extracting activations and
    registering gradients from targetted intermediate layers """

    def __init__(self, model, target_layers, reshape_transform):
        self.model = model
        self.gradients = []
        self.activations = []
        self.reshape_transform = reshape_transform
        self.handles = []
        for target_layer in target_layers:
            self.handles.append(
                target_layer.register_forward_hook(self.save_activation))
            # Because of https://github.com/pytorch/pytorch/issues/61519,
            # we don't use backward hook to record gradients.
            self.handles.append(
                target_layer.register_forward_hook(self.save_gradient))

    def save_activation(self, module, input, output):
        activation = output

        if self.reshape_transform is not None:
            activation = self.reshape_transform(activation)
        self.activations.append(activation.cpu().detach())

    def save_gradient(self, module, input, output):
        if not hasattr(output, "requires_grad") or not output.requires_grad:
            # You can only register hooks on tensor requires grad.
            return

        # Gradients are computed in reverse order
        def _store_grad(grad):
            if self.reshape_transform is not None:
                grad = self.reshape_transform(grad)
            self.gradients = [grad.cpu().detach()] + self.gradients

        output.register_hook(_store_grad)

    def post_process(self, result):
        logits_ = result[:, 4:]
        boxes_ = result[:, :4]
        sorted, indices = torch.sort(logits_.max(1)[0], descending=True)
        return torch.transpose(logits_[0], dim0=0, dim1=1)[indices[0]], torch.transpose(boxes_[0], dim0=0, dim1=1)[indices[0]], xywh2xyxy(torch.transpose(boxes_[0], dim0=0, dim1=1)[indices[0]]).cpu().detach().numpy()

  
    def __call__(self, x):
        self.gradients = []
        self.activations = []
        model_output = self.model(x)
        post_result, pre_post_boxes, post_boxes = self.post_process(model_output[0])
        return [[post_result, pre_post_boxes]]

    def release(self):
        for handle in self.handles:
            handle.remove()

In [27]:
class yolov9_target(torch.nn.Module):
    def __init__(self, ouput_type, conf, ratio) -> None:
        super().__init__()
        self.ouput_type = ouput_type
        self.conf = conf
        self.ratio = ratio
    
    def forward(self, data):
        post_result, pre_post_boxes = data
        result = []
        for i in trange(int(post_result.size(0) * self.ratio)):
            if float(post_result[i].max()) < self.conf:
                break
            if self.ouput_type == 'class' or self.ouput_type == 'all':
                result.append(post_result[i].max())
            elif self.ouput_type == 'box' or self.ouput_type == 'all':
                for j in range(4):
                    result.append(pre_post_boxes[i, j])
        return sum(result)

class Yolov9XAI:
    def __init__(self, weight_path, device, method, layer, backward_type, conf_threshold, ratio):
        self.device = torch.device(device)
        weight = torch.load(weight_path)
        model_names = weight['model'].names
        self.model = attempt_load(weight_path, device)


class yolov9_heatmap:
    def __init__(self, weight, device, method, layer, backward_type, conf_threshold, ratio, show_box, renormalize):
        device = torch.device(device)
        ckpt = torch.load(weight)
        model_names = ckpt['model'].names
        model = attempt_load(weight, device)
        for p in model.parameters():
            p.requires_grad_(True)
        model.eval()
        
        target = yolov9_target(backward_type, conf_threshold, ratio)
        target_layers = [model.model[l] for l in layer]
        method = eval(method)(model, target_layers, use_cuda=device.type=='cuda')
        method.activations_and_grads = ActivationsAndGradients(model, target_layers, None)

        colors = np.random.uniform(0, 255, size=(len(model_names), 3)).astype(np.int)
        self.__dict__.update(locals())

    def post_process(self, result):
        result = non_max_suppression(result, conf_thres=self.conf_threshold, iou_thres=0.65)[0]
        return result
    
    def draw_detections(self, box, color, name, img):
        xmin, ymin, xmax, ymax = list(map(int, list(box)))
        cv2.rectangle(img, (xmin, ymin), (xmax, ymax), tuple(int(x) for x in color), 2)
        cv2.putText(img, str(name), (xmin, ymin - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.8, tuple(int(x) for x in color), 2, lineType=cv2.LINE_AA)
        return img

    def renormalize_cam_in_bounding_boxes(self, boxes, image_float_np, grayscale_cam):
        """Normalize the CAM to be in the range [0, 1] 
        inside every bounding boxes, and zero outside of the bounding boxes. """
        renormalized_cam = np.zeros(grayscale_cam.shape, dtype=np.float32)
        for x1, y1, x2, y2 in boxes:
            x1, y1 = max(x1, 0), max(y1, 0)
            x2, y2 = min(grayscale_cam.shape[1] - 1, x2), min(grayscale_cam.shape[0] - 1, y2)
            renormalized_cam[y1:y2, x1:x2] = scale_cam_image(grayscale_cam[y1:y2, x1:x2].copy())    
        renormalized_cam = scale_cam_image(renormalized_cam)
        eigencam_image_renormalized = show_cam_on_image(image_float_np, renormalized_cam, use_rgb=True)
        return eigencam_image_renormalized
    
    def process(self, img_path, save_path):
        # img process
        img = cv2.imread(img_path)
        img = letterbox(img)[0]
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = np.float32(img) / 255.0
        tensor = torch.from_numpy(np.transpose(img, axes=[2, 0, 1])).unsqueeze(0).to(self.device)
        
        try:
            grayscale_cam = self.method(tensor, [self.target])
        except AttributeError as e:
            return
        
        grayscale_cam = grayscale_cam[0, :]
        cam_image = show_cam_on_image(img, grayscale_cam, use_rgb=True)
        
        with torch.no_grad():
            pred = self.model(tensor)
            pred = self.post_process(pred[0])
        if self.renormalize:
            cam_image = self.renormalize_cam_in_bounding_boxes(pred[:, :4].cpu().detach().numpy().astype(np.int32), img, grayscale_cam)
        if self.show_box:
            for data in pred:
                data = data.cpu().detach().numpy()
                cam_image = self.draw_detections(data[:4], self.colors[int(data[5])], f'{self.model_names[int(data[5])]} {float(data[4]):.2f}', cam_image)
        
        cam_image = Image.fromarray(cam_image)
        cam_image.save(save_path)
    
    def __call__(self, img_path, save_path):
        # remove dir if exist
        if os.path.exists(save_path):
            shutil.rmtree(save_path)
        # make dir if not exist
        os.makedirs(save_path, exist_ok=True)

        if os.path.isdir(img_path):
            for img_path_ in os.listdir(img_path):
                self.process(f'{img_path}/{img_path_}', f'{save_path}/{img_path_}')
        else:
            self.process(img_path, f'{save_path}/result.png')


In [29]:
def get_params():
    params = {
        'weight': '../experiments/gelan-e-1280-scratch-done/weights/gelan-e-1280-best-20240520.pt',
        'device': 'cuda:0',
        'method': 'XGradCAM', # GradCAMPlusPlus, GradCAM, XGradCAM, EigenCAM, HiResCAM, LayerCAM, RandomCAM, EigenGradCAM
        'layer': [11, 14, 17],
        'backward_type': 'all', # class, box, all
        'conf_threshold': 0.2, # 0.6
        'ratio': 0.02, # 0.02-0.1
        'show_box': True,
        'renormalize': False
    }

    return params

In [30]:
model = yolov9_heatmap(**get_params())

Fusing layers... 
gelan-e-sar summary: 690 layers, 57285011 parameters, 0 gradients, 188.6 GFLOPs
Exception ignored in: <function BaseCAM.__del__ at 0x7f7e01a8e2a0>
Traceback (most recent call last):
  File "/home/caerang/miniconda3/envs/torch/lib/python3.11/site-packages/pytorch_grad_cam/base_cam.py", line 196, in __del__
    self.activations_and_grads.release()
    ^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: 'XGradCAM' object has no attribute 'activations_and_grads'


TypeError: XGradCAM.__init__() got an unexpected keyword argument 'use_cuda'

**입력 이미지에 대해서 forward pass를 한 번 수행하고 검출 결과를 표시**

In [3]:
import torch
import cv2
import numpy as np
import requests
import torchvision.transforms as transforms
from pytorch_grad_cam import EigenCAM
from pytorch_grad_cam.utils.image import show_cam_on_image, scale_cam_image
from PIL import Image

In [None]:
COLORS = np.random.uniform(0, 255, size=(80, 3))

# TODO: yolov9에서 사용하는 prediction results format에 맞게 코드 수정
def parse_detections(results):
    pass

**weights 파일 읽어서 모델 객체 생성하기**

In [17]:
from pathlib import Path

import torch

from models.common import DetectMultiBackend
from utils.general import check_img_size, Profile, increment_path
from utils.dataloaders import LoadImages, IMG_FORMATS

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
weights = '/work/src/yolov9/weights/gelan-e-1280-lion5-best-20240605.pt'
data = '/work/src/yolov9/data/yolo-anatomy.yaml'
imgsz = 1280
half = True
dnn = False

In [8]:
model = DetectMultiBackend(weights, device=device, dnn=dnn, data=data, fp16=half)

Fusing layers... 
gelan-e-sar summary: 690 layers, 57285011 parameters, 0 gradients, 188.6 GFLOPs


In [12]:
stride, names, pt = model.stride, model.names, model.pt
imgsz = check_img_size(imgsz, s=stride)
print(f'imgsz: {imgsz}, stride: {model.stride}, names: {model.names}, model.pt: {model.pt}')

imgsz: 1280, stride: 32, names: {0: 'item'}, model.pt: True


**단일 이미지 데이터 세트 생성**

In [42]:
bs = 4
vid_stride = 1
# source = '/work/dataset/VR-DRONE-v1.0.0.test/20221207/1Class/30m/30c/Crop_0030_030m_30c_3_4_00344.jpg'
source = '/work/dataset/vr-drone-test/images'
dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt, vid_stride=vid_stride)

In [16]:
model.warmup(imgsz=(bs, 3, imgsz, imgsz))

**Start inference**

In [53]:
project = '/work/src/yolov9/runs'
name = 'debug'
visualize = False
exist_ok = True
save_dir = increment_path(Path(project) / name, exist_ok=exist_ok)
augment = False

In [54]:
dt = Profile(), Profile(), Profile()
# im:<class 'numpy.ndarray'> (3, 736, 1280)
for path, im, im0s, vid_cap, s in dataset:
    # print(type(im), im.shape, len(im.shape))
    with dt[0]:
        im = torch.from_numpy(im).to(model.device)
        im = im.half() if model.fp16 else im.float()
        im /= 255
        if len(im.shape) == 3:
            im = im[None]
    with dt[1]:
        visualize = increment_path(save_dir/Path(path).stem, mkdir=True) if visualize else False
        print(visualize)
        # # pred = [Tensor, [Tensor, Tensor, Tensor]]
        # # pred.shapes = [torch.Size([1, 5, 19320]), [torch.Size([1, 65, 92, 160]) torch.Size([1, 65, 46, 80]) torch.Size([1, 65, 23, 40])]]
        pred = model(im, augment=augment, visualize=visualize)
        # print(len(pred), pred[0].shape, len(pred[1]), pred[1][0].shape, pred[1][1].shape, pred[1][2].shape)

    # with dt[2]:
    #     pred = non_max_suppression(pred, conf_thres, out_thres, classes)

False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
