In [59]:
import numpy as np
import os
import torch
from pathlib import Path
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms as T
from PIL import Image
from collections import OrderedDict
from tqdm.notebook import tqdm
import cv2

import sys
sys.path.append("F:/skripsi/FAS-Skripsi-4")

from fas_simple_distill.model.divt.divt_mobilevit_v2 import DG_model
from face_detection import FaceDetection, FaceSelectionMethod

from pytorch_grad_cam import GradCAM, ScoreCAM, GradCAMPlusPlus, AblationCAM, XGradCAM, EigenCAM, HiResCAM
from pytorch_grad_cam.utils.image import show_cam_on_image
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget

sys.path.remove("F:/skripsi/FAS-Skripsi-4")

In [60]:
class crop_align_face:
    def __init__(
        self,
        use_cuda: bool = True,
        no_rotate: bool = True,
        crop_size: int = 256,
        scale: float = 0.9,
        select_method = FaceSelectionMethod.AREA,
    ) -> None:
        use_onnx = not use_cuda
        self.fd = FaceDetection(use_cuda, no_rotate, use_onnx)
        self.crop_size = crop_size
        self.scale = scale
        self.select_method = select_method

    def __call__(self, x):
        if isinstance(x, Image.Image):
            input_is_pil = True
            x = np.array(x)
            x = cv2.cvtColor(x, cv2.COLOR_RGB2BGR)
        else:
            input_is_pil = False
        
        dets, angle = self.fd.predict(x)
        x_crop, _ = self.fd.align_single_face(x, dets, angle, self.crop_size, self.scale, self.select_method)

        if input_is_pil:
            x_crop = cv2.cvtColor(x_crop, cv2.COLOR_BGR2RGB)
            x_crop = Image.fromarray(x_crop)
        
        return x_crop

eval_transform = T.Compose([
    crop_align_face(),
    T.ToTensor(),
    T.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
    )
])

RuntimeError: CUDA error: unknown error
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.

In [None]:
class SimpleGlobDataset(Dataset):
    def __init__(self, root, glob_patt, transform=None):
        self.root = root
        self.glob_patt = glob_patt
        self.images = sorted(list(Path(root).rglob(glob_patt)))
        self.transform = transform
        
    def __getitem__(self, index):
        img_path = self.images[index]
        img = Image.open(img_path).convert("RGB")
        if self.transform is not None:
            img = self.transform(img)
            
        return img, img_path
    
    def __len__(self):
        return len(self.images)

In [None]:
dataset_name = "casia_mfsd_FP"
model_name = "divt_mobilevits_ICMtoO"
device = "cuda"
camtype = "gradcam"
target_class = 0

dataset = SimpleGlobDataset(
    root=f"F:/skripsi/FAS-Skripsi-4/results/divt_mobilevits_ICMtoO/oulu_npu/FP",
    glob_patt="*.png",
    transform=eval_transform,
)

In [None]:
model = DG_model("F:/skripsi/FAS-Skripsi-4/fas_simple_distill/model/mobilevit_config/mobilevit_s.yaml")
ckpt = torch.load(f"F:/skripsi/FAS-Skripsi-4/evaluator/weights/{model_name}.pth")
state_dict = ckpt['model']

model.load_state_dict(state_dict)

# model = ModelWrapper(model)
model.eval()

DG_model(
  (backbone): Sequential(
    (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False, normalization=BatchNorm2d, activation=Swish)
    (1): Sequential(
      (0): InvertedResidual(in_channels=16, out_channels=32, stride=1, exp=4, dilation=1, skip_conn=False)
    )
    (2): Sequential(
      (0): InvertedResidual(in_channels=32, out_channels=64, stride=2, exp=4, dilation=1, skip_conn=False)
      (1): InvertedResidual(in_channels=64, out_channels=64, stride=1, exp=4, dilation=1, skip_conn=True)
      (2): InvertedResidual(in_channels=64, out_channels=64, stride=1, exp=4, dilation=1, skip_conn=True)
    )
    (3): Sequential(
      (0): InvertedResidual(in_channels=64, out_channels=96, stride=2, exp=4, dilation=1, skip_conn=False)
      (1): MobileViTBlock(
      	 Local representations
      		 Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False, normalization=BatchNorm2d, activation=Swish)
      		 Conv2d(96, 144, kernel_size

In [None]:
if camtype == "gradcam++":
    if target_class == 0:
        dst_path = f"./results/gradcam++/spoof_map/{model_name}-{dataset_name}"
    else:
        dst_path = f"./results/gradcam++/live_map/{model_name}-{dataset_name}"
elif camtype == "gradcam":
    if target_class == 0:
        dst_path = f"./results/gradcam/spoof_map/{model_name}-{dataset_name}"
    else:
        dst_path = f"./results/gradcam/live_map/{model_name}-{dataset_name}"
elif camtype == "hirescam":
    if target_class == 0:
        dst_path = f"./results/hirescam/spoof_map/{model_name}-{dataset_name}"
    else:
        dst_path = f"./results/hirescam/live_map/{model_name}-{dataset_name}"
else:
    raise RuntimeError("camtype should be either 'gradcam', 'gradcam++', or 'hirescam'")

if not os.path.exists(dst_path):
    os.makedirs(dst_path)
dst_path = Path(dst_path)

In [None]:
main_model = model.backbone
main_model

Sequential(
  (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False, normalization=BatchNorm2d, activation=Swish)
  (1): Sequential(
    (0): InvertedResidual(in_channels=16, out_channels=32, stride=1, exp=4, dilation=1, skip_conn=False)
  )
  (2): Sequential(
    (0): InvertedResidual(in_channels=32, out_channels=64, stride=2, exp=4, dilation=1, skip_conn=False)
    (1): InvertedResidual(in_channels=64, out_channels=64, stride=1, exp=4, dilation=1, skip_conn=True)
    (2): InvertedResidual(in_channels=64, out_channels=64, stride=1, exp=4, dilation=1, skip_conn=True)
  )
  (3): Sequential(
    (0): InvertedResidual(in_channels=64, out_channels=96, stride=2, exp=4, dilation=1, skip_conn=False)
    (1): MobileViTBlock(
    	 Local representations
    		 Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False, normalization=BatchNorm2d, activation=Swish)
    		 Conv2d(96, 144, kernel_size=(1, 1), stride=(1, 1), bias=False)
    	 Global repre

In [None]:
# target_layers = [main_model.backbone[1]]
target_layers = []

for layer in main_model:
    target_layers.append(layer)
target_layers

[Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False, normalization=BatchNorm2d, activation=Swish),
 Sequential(
   (0): InvertedResidual(in_channels=16, out_channels=32, stride=1, exp=4, dilation=1, skip_conn=False)
 ),
 Sequential(
   (0): InvertedResidual(in_channels=32, out_channels=64, stride=2, exp=4, dilation=1, skip_conn=False)
   (1): InvertedResidual(in_channels=64, out_channels=64, stride=1, exp=4, dilation=1, skip_conn=True)
   (2): InvertedResidual(in_channels=64, out_channels=64, stride=1, exp=4, dilation=1, skip_conn=True)
 ),
 Sequential(
   (0): InvertedResidual(in_channels=64, out_channels=96, stride=2, exp=4, dilation=1, skip_conn=False)
   (1): MobileViTBlock(
   	 Local representations
   		 Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False, normalization=BatchNorm2d, activation=Swish)
   		 Conv2d(96, 144, kernel_size=(1, 1), stride=(1, 1), bias=False)
   	 Global representations with patch size of 2x2
   		 Trans

In [None]:
reshape_transform = None
if camtype == "gradcam++":
    cam = GradCAMPlusPlus(model=model, target_layers=target_layers, use_cuda=True, reshape_transform=reshape_transform)
elif camtype == "gradcam":
    cam = GradCAM(model=model, target_layers=target_layers, use_cuda=True, reshape_transform=reshape_transform)
elif camtype == "hirescam":
    cam = HiResCAM(model=model, target_layers=target_layers, use_cuda=True, reshape_transform=reshape_transform)
else:
    raise RuntimeError("camtype should be either 'gradcam', 'gradcam++', or 'hirescam'")

In [None]:
TARGET_CLASS = [ClassifierOutputTarget(target_class)]
crop_face = crop_align_face()

for imgten, imgpath in tqdm(dataset):
    grayscale_cam = cam(input_tensor=imgten[None, ...], targets=TARGET_CLASS)

    cam_map = grayscale_cam[0]
    
    img_ori = Image.open(imgpath).convert("RGB")
    img_ori = crop_face(img_ori)
    img_ori = np.asarray(img_ori) / 255.0
    img_cam = Image.fromarray(show_cam_on_image(img_ori, cam_map, use_rgb=True))

    save_path = dst_path.joinpath(*imgpath.parts[-2:])
    save_path.parent.mkdir(parents=True, exist_ok=True)
    img_cam.save(save_path.with_suffix(".jpg"))

  0%|          | 0/193 [00:00<?, ?it/s]