Dataset

In [1]:
import os
from PIL import Image
import json

def load_images_from_directory(root_path: str):
    """
    Load images from a directory with subfolders named after ImageNet labels.
    Return a list of (image, label, filename) triples.
    """
    dataset = []
    
    # Iterate over each subfolder
    for label in os.listdir(root_path):
        label_path = os.path.join(root_path, label)
        
        # Check if it's indeed a folder
        if os.path.isdir(label_path):
            
            # Iterate over each image in the subfolder
            for image_file in os.listdir(label_path):
                image_path = os.path.join(label_path, image_file)
                
                # Check if it's an image file
                if image_path.lower().endswith(('.png', '.jpg', '.jpeg')):
                    img = Image.open(image_path)
                    dataset.append((img, label, image_file))  # Add image filename here
    
    return dataset


current_dir = "/home/workstation/code/XAImethods/hf_cam_dev"

dataset_path = f"{current_dir}/ImageNet-Mini/images"
dataset = load_images_from_directory(dataset_path)


with open(f"{current_dir}/ImageNet-Mini/imagenet_class_index.json", "r") as f:
    imagenet_class_index = json.load(f)


label_to_index_description = {v[0]: (k, v[1]) for k, v in imagenet_class_index.items()}


functions

In [2]:
import warnings
warnings.filterwarnings('ignore')
from codecarbon import track_emissions
from torchvision import transforms
from datasets import load_dataset
from pytorch_grad_cam import run_dff_on_image
from pytorch_grad_cam import (
    GradCAM, HiResCAM, ScoreCAM, GradCAMPlusPlus,
    AblationCAM, XGradCAM, EigenCAM, EigenGradCAM,
    LayerCAM, FullGrad, GradCAMElementWise
)
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
from pytorch_grad_cam.utils.image import show_cam_on_image
from PIL import Image
import numpy as np
import cv2
import torch
from typing import List, Callable, Optional
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("Using CUDA!")
else:
    device = torch.device("cpu")
    print("Using CPU!")
# dataset = load_dataset("huggingface/cats-image")
# image = dataset["test"]["image"][0]
# img_tensor = transforms.ToTensor()(image)

""" Model wrapper to return a tensor"""
class HuggingfaceToTensorModelWrapper(torch.nn.Module):
    def __init__(self, model):
        super(HuggingfaceToTensorModelWrapper, self).__init__()
        self.model = model

    def forward(self, x):
        return self.model(x).logits

""" Translate the category name to the category index.
    Some models aren't trained on Imagenet but on even larger datasets,
    so we can't just assume that 761 will always be remote-control.

"""
def category_name_to_index(model, category_name):
    name_to_index = dict((v, k) for k, v in model.config.id2label.items())
    return name_to_index[category_name]
    
""" Helper function to run GradCAM on an image and create a visualization.
    (note to myself: this is probably useful enough to move into the package)
    If several targets are passed in targets_for_gradcam,
    e.g different categories,
    a visualization for each of them will be created.
    
"""
# def run_grad_cam_on_image(model: torch.nn.Module,
#                           target_layer: torch.nn.Module,
#                           targets_for_gradcam: List[Callable],
#                           reshape_transform: Optional[Callable],
#                           input_tensor: torch.nn.Module=img_tensor,
#                           input_image: Image=image,
#                           method: Callable=GradCAM):
#     with method(model=HuggingfaceToTensorModelWrapper(model),
#                  target_layers=[target_layer],
#                  reshape_transform=reshape_transform) as cam:

#         # Replicate the tensor for each of the categories we want to create Grad-CAM for:
#         repeated_tensor = input_tensor[None, :].repeat(len(targets_for_gradcam), 1, 1, 1)

#         batch_results = cam(input_tensor=repeated_tensor,
#                             targets=targets_for_gradcam)
#         results = []
#         for grayscale_cam in batch_results:
#             visualization = show_cam_on_image(np.float32(input_image)/255,
#                                               grayscale_cam,
#                                               use_rgb=True)
#             # Make it weight less in the notebook:
#             visualization = cv2.resize(visualization,
#                                        (visualization.shape[1]//2, visualization.shape[0]//2))
#             results.append(visualization)
#         return np.hstack(results)
    

# Define the CAM algorithm you want to use
# Options: GradCAM, HiResCAM, ScoreCAM, GradCAMPlusPlus, AblationCAM, XGradCAM, 
# EigenCAM, EigenGradCAM, LayerCAM, FullGrad, GradCAMElementWise
CAM_ALGORITHM = GradCAM
cam_algorithm_name = CAM_ALGORITHM.__name__

@track_emissions()
def run_grad_cam_on_image(model: torch.nn.Module,
                          target_layer: torch.nn.Module,
                          targets_for_gradcam: List[Callable],
                          input_tensor: torch.nn.Module,
                          input_image: Image,
                          reshape_transform: Optional[Callable] = None,
                          method: Callable = CAM_ALGORITHM):
    with method(model=HuggingfaceToTensorModelWrapper(model),
                target_layers=[target_layer],
                reshape_transform=reshape_transform) as cam:

        # Replicate the tensor for each of the categories we want to create Grad-CAM for:
        repeated_tensor = input_tensor[None, :].repeat(len(targets_for_gradcam), 1, 1, 1)

        batch_results = cam(input_tensor=repeated_tensor,
                            targets=targets_for_gradcam)
        results = []
        grayscale_cams = []
        for grayscale_cam in batch_results:
            visualization = show_cam_on_image(np.float32(input_image) / 255,
                                              grayscale_cam,
                                              use_rgb=True)
            # Make it weight less in the notebook:
            visualization = cv2.resize(visualization,
                                       (visualization.shape[1] // 2, visualization.shape[0] // 2))
            results.append(visualization)
            grayscale_cams.append(grayscale_cam)
        return np.hstack(results), grayscale_cams
    
def print_top_categories(model, img_tensor, top_k=5):
    logits = model(img_tensor.unsqueeze(0)).logits
    indices = logits.cpu()[0, :].detach().numpy().argsort()[-top_k :][::-1]
    for i in indices:
        print(f"Predicted class {i}: {model.config.id2label[i]}")

# Generate targets_for_gradcam based on model's predictions
def get_top_k_targets(model, input_tensor, k=5):
    logits = model(input_tensor.unsqueeze(0)).logits
    top_k_indices = logits[0].argsort(descending=True)[:k].cpu().numpy()
    return [ClassifierOutputTarget(index) for index in top_k_indices]



Using CUDA!


In [3]:
import os
from tqdm import tqdm
from collections import defaultdict
import gc
###################
from transformers import CvtForImageClassification
from functools import partial
import torch
from PIL import Image
import datetime

###################

def reshape_transform_cvt_huggingface(tensor, model, width, height):
    tensor = tensor[:, 1 :, :]
    tensor = tensor.reshape(tensor.size(0),
                            height,
                            width,
                            tensor.size(-1))
    
    # https://github.com/huggingface/transformers/blob/a2c90a7f7b1f8a2a8217c962a04a1a65638121d5/src/transformers/models/cvt/modeling_cvt.py#L699
    norm = model.layernorm(tensor)
    return norm.transpose(2, 3).transpose(1, 2)

def reshape_gradcam_transform_cvt_huggingface(tensor, model, width, height):
    tensor = tensor[:, 1 :, :]
    tensor = tensor.reshape(tensor.size(0),
                            height,
                            width,
                            tensor.size(-1))
    return tensor.transpose(2, 3).transpose(1, 2)


model_name = "microsoft/cvt-13"

# Initialize tracking variables
start_time = datetime.datetime.now()



def ensure_rgb(img):
    if img.mode != 'RGB':
        return img.convert('RGB')
    return img

def is_valid_image_file(filepath):
    """Check if the file is a valid image file."""
    try:
        with Image.open(filepath) as img:
            img.verify()  # verify that it is a valid image
        return True
    except:
        return False

BATCH_SIZE = 100
num_batches = len(dataset) // BATCH_SIZE + (1 if len(dataset) % BATCH_SIZE != 0 else 0)

save_dir = f"{current_dir}/results/{model_name}/{cam_algorithm_name}"

if not os.path.exists(save_dir):
    os.makedirs(save_dir)


for batch_num in tqdm(range(num_batches)):
    start_idx = batch_num * BATCH_SIZE
    end_idx = min((batch_num + 1) * BATCH_SIZE, len(dataset))

    model = CvtForImageClassification.from_pretrained("microsoft/cvt-13").to(device)

    target_layer_dff = model.cvt.encoder.stages[-1].layers[-1]
    target_layer_gradcam = model.cvt.encoder.stages[-1].layers[-2]
    

    transform = transforms.ToTensor()


    for idx in range(start_idx, end_idx):
        img, label, filename = dataset[idx]
        try:
            torch.cuda.empty_cache()
            img = ensure_rgb(img)
            resize_transform = transforms.Resize((480, 640))
            img = resize_transform(img)
            img_tensor = transform(img).to(device)
            #print(img_tensor.shape)
            reshape_transform = partial(reshape_transform_cvt_huggingface,
                                        model=model,
                                        width=img_tensor.shape[2]//16,
                                        height=img_tensor.shape[1]//16)
            reshape_transform_gradcam = partial(reshape_gradcam_transform_cvt_huggingface,
                                        model=model,
                                        width=img_tensor.shape[2]//16,
                                        height=img_tensor.shape[1]//16)

            index_description = label_to_index_description.get(label)
            if index_description is None:
                print(f"Warning: Label '{label}' not found in the JSON file!")
                continue

            index_str, description = index_description
            index = int(index_str)
            dynamic_targets_for_gradcam = [ClassifierOutputTarget(index)]

            

            # print("Input tensor shape:", img_tensor.shape)
            # print("Calculated width:", img_tensor.shape[2]//32)
            # print("Calculated height:", img_tensor.shape[1]//32)


            gradcam_result, grayscale_cams = run_grad_cam_on_image(
                model=model,
                target_layer=target_layer_gradcam,
                targets_for_gradcam=dynamic_targets_for_gradcam,
                input_tensor=img_tensor,
                input_image=img,
                reshape_transform=reshape_transform_gradcam
            )

            logits = model(img_tensor.unsqueeze(0)).logits
            top_indices = logits[0].argsort(descending=True)[:5].cpu().numpy()
            predictions = {index: {"score": logits[0][index].item(), "label": model.config.id2label[index]} for index in top_indices}
            
            img_dir = os.path.join(save_dir, filename.rsplit('.', 1)[0])
            if not os.path.exists(img_dir):
                os.makedirs(img_dir)

            img_name = os.path.join(img_dir, "original.jpg")
            gradcam_name = os.path.join(img_dir, "gradcam.jpg")
            grayscale_name = os.path.join(img_dir, "grayscale.jpg")
            grayscale_npy_name = os.path.join(img_dir, "grayscale.npy")
            scores_name = os.path.join(img_dir, "scores.npy")
            info_name = os.path.join(img_dir, "info.txt")

            img.save(img_name)
            Image.fromarray(gradcam_result).save(gradcam_name)
            Image.fromarray((grayscale_cams[0] * 255).astype(np.uint8)).save(grayscale_name)
            np.save(grayscale_npy_name, grayscale_cams[0])

            scores = [data["score"] for _, data in predictions.items()]
            np.save(scores_name, scores)

            with open(info_name, 'w') as f:
                for index, data in predictions.items():
                    label = data["label"]
                    score = data["score"]
                    f.write(f"Class {index} ({label}): {score:.2f}\n")

        except RuntimeError as e:
            if "CUDA out of memory" in str(e):
                print(f"CUDA OutOfMemoryError encountered for file: {filename}")
            else:
                raise e

    # del model
    torch.cuda.empty_cache()
    gc.collect()


end_time = datetime.datetime.now()






  0%|          | 0/39 [00:00<?, ?it/s][codecarbon INFO @ 17:01:56] [setup] RAM Tracking...
[codecarbon INFO @ 17:01:56] [setup] GPU Tracking...
[codecarbon INFO @ 17:01:56] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 17:01:56] [setup] CPU Tracking...
[codecarbon INFO @ 17:01:58] CPU Model on constant consumption mode: Intel(R) Core(TM) i9-9900K CPU @ 3.60GHz
[codecarbon INFO @ 17:01:58] >>> Tracker's metadata:
[codecarbon INFO @ 17:01:58]   Platform system: Linux-5.15.90.1-microsoft-standard-WSL2-x86_64-with-glibc2.35
[codecarbon INFO @ 17:01:58]   Python version: 3.10.9
[codecarbon INFO @ 17:01:58]   Available RAM : 15.576 GB
[codecarbon INFO @ 17:01:58]   CPU count: 16
[codecarbon INFO @ 17:01:58]   CPU model: Intel(R) Core(TM) i9-9900K CPU @ 3.60GHz
[codecarbon INFO @ 17:01:58]   GPU count: 1
[codecarbon INFO @ 17:01:58]   GPU model: 1 x NVIDIA GeForce RTX 4090
[codecarbon INFO @ 17:02:05] 
Graceful stopping: collecting and writing information.
Please Allow for a few seconds..

KeyboardInterrupt: 