# Attacking CLIP for image classification

In this notebook we show how to use the experimental tools in ART to attack the CLIP model.


In [1]:
import sys
import numpy as np
import torch

from art.estimators.hf_mm import HFMMPyTorch, MultiModalHuggingFaceInput
from art.attacks.evasion import ProjectedGradientDescent


MEAN = np.asarray([0.48145466, 0.4578275, 0.40821073])
STD = np.asarray([0.26862954, 0.26130258, 0.27577711])

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def get_data():
    """
    We get sample data from the coco dataset.
    """
    from PIL import Image
    import requests
    
    image_list = ['000000039769.jpg',
                  '000000000285.jpg',
                  '000000002006.jpg',
                  '000000002149.jpg']

    # Freetext description of the content of the classes we will try and sort the pictures into.
    text = ["a photo of a cat", "a photo of a bear", "a photo of a car", "a photo of a bus", "apples"]

    # Ground truth labels mapping the images into one of the free-text categories. 
    # Note, we do not have an image of a car in this sample of data
    labels = torch.tensor(np.asarray([0, 1, 3, 4]))

    input_list = []
    for fname in image_list:
        url = 'http://images.cocodataset.org/val2017/' + fname
        input_list.append(Image.open(requests.get(url, stream=True).raw))

    return input_list, text, labels

In [3]:
input_list, text, labels = get_data()

In [4]:
def norm_bound_eps(eps_bound=None):
    """
    Helper function to normalise the l_infinity bounds from 0 - 1 into z normalization.
    """
    if eps_bound is None:
        eps_bound = np.asarray([8 / 255, 8 / 255, 8 / 255])
    eps_bound = np.abs(eps_bound / STD)
    return eps_bound

In [5]:
def attack(input_list, text, labels):
    """
    We now attack the clip model by perturbing the input images using ARTs tools.
    """
    from transformers import CLIPProcessor, CLIPModel

    model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
    processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

    loss_fn = torch.nn.CrossEntropyLoss()
    inputs = processor(text=text, images=input_list, return_tensors="pt", padding=True)
    original_images = []
    for i in range(3):
        original_images.append(inputs["pixel_values"][i].clone().cpu().detach().numpy())
    original_images = np.concatenate(original_images)

    art_classifier = HFMMPyTorch(
        model, 
        loss=loss_fn,
        nb_classes=5,
        clip_values=(np.min(original_images), np.max(original_images)), 
        input_shape=(3, 224, 224)
    )

    art_input = MultiModalHuggingFaceInput(**inputs)
    clean_preds = art_classifier.predict(art_input)
    clean_acc = np.sum(np.argmax(clean_preds, axis=1) == labels.cpu().detach().numpy()) / len(labels)
    print('The clean accuracy is ', clean_acc)

    attack = ProjectedGradientDescent(
        art_classifier,
        max_iter=10,
        eps=np.ones((3, 224, 224)) * np.reshape(norm_bound_eps(), (3, 1, 1)),
        eps_step=np.ones((3, 224, 224)) * 0.1,
    )
    x_adv = attack.generate(art_input, labels)
    adv_preds = art_classifier.predict(x_adv)
    adv_acc = np.sum(np.argmax(adv_preds, axis=1) == labels.cpu().detach().numpy()) / len(labels)

    print('The adversarial accuracy is ', clean_acc)


In [6]:
# Running the attack we see the performance drop from 100% to 0%.
attack(input_list, text, labels)

2023-10-17 06:11:36.199655: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-10-17 06:11:36.232269: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-10-17 06:11:36.232299: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-10-17 06:11:36.232327: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-10-17 06:11:36.240857: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: A

The clean accuracy is  1.0


PGD - Iterations: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 10.61it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 39.43it/s]

The adversarial accuracy is  1.0



