# End to End Object Detection with Transformers in ART

Demo for applying the DEtection TRansformer (DETR) estimator in ART for object detection and attacking the Detection Transformer using the Robust DPatch method.

### Define imports, constants and helper functions

In [1]:
import sys 
!{sys.executable} -m pip install --user adversarial-robustness-toolbox
!{sys.executable} -m pip install --user torch
!{sys.executable} -m pip install --user pillow
!{sys.executable} -m pip install --user fiftyone



from art.estimators.object_detection.pytorch_detection_transformer import PyTorchDetectionTransformer
from art.attacks.evasion.adversarial_patch.adversarial_patch_pytorch import AdversarialPatchPyTorch
from torchvision.transforms import transforms
import PIL.Image
import numpy as np
import torch
import cv2
import matplotlib.pyplot as plt
import requests
import pandas as pd

COCO_CLASSES = [
    'N/A', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A',
    'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
    'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack',
    'umbrella', 'N/A', 'N/A', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
    'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
    'skateboard', 'surfboard', 'tennis racket', 'bottle', 'N/A', 'wine glass',
    'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
    'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake',
    'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table', 'N/A',
    'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard',
    'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A',
    'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier',
    'toothbrush'
]

def extract_predictions(predictions_, conf_thresh):
    predictions_class = [COCO_CLASSES[i] for i in list(predictions_["labels"])]
    if len(predictions_class) < 1:
        return [], [], []
        
    predictions_boxes = [[(i[0], i[1]), (i[2], i[3])] for i in list(predictions_["boxes"])]
    predictions_score = list(predictions_["scores"])

    threshold = conf_thresh
    predictions_t = [predictions_score.index(x) for x in predictions_score if x > threshold]
    if len(predictions_t) > 0:
        predictions_t = predictions_t
    else:
        return [], [], []
        
    predictions_boxes = [predictions_boxes[i] for i in predictions_t]
    predictions_class = [predictions_class[i] for i in predictions_t]
    predictions_scores = [predictions_score[i] for i in predictions_t]
    return predictions_class, predictions_boxes, predictions_scores

def plot_image_with_boxes(img, boxes, pred_cls, title):
    text_size = 2
    text_th = 2
    rect_th = 2

    for i in range(len(boxes)):
        cv2.rectangle(img, (int(boxes[i][0][0]), int(boxes[i][0][1])), (int(boxes[i][1][0]), int(boxes[i][1][1])),
                      color=(0, 255, 0), thickness=rect_th)
        cv2.putText(img, pred_cls[i], (int(boxes[i][0][0]), int(boxes[i][0][1])), cv2.FONT_HERSHEY_SIMPLEX, text_size,
                    (0, 255, 0), thickness=text_th)
    plt.figure()
    plt.axis("off")
    plt.title(title)
    plt.imshow(img)

def filter_boxes(predictions, conf_thresh):
    dictionary = {}

    boxes_list = []
    scores_list = []
    labels_list = []

    for i in range(len(predictions[0]["boxes"])):
        score = predictions[0]["scores"][i]
        if score >= conf_thresh:
            boxes_list.append(predictions[0]["boxes"][i])
            scores_list.append(predictions[0]["scores"][[i]])
            labels_list.append(predictions[0]["labels"][[i]])

    dictionary["boxes"] = np.vstack(boxes_list)
    dictionary["scores"] = np.hstack(scores_list)
    dictionary["labels"] = np.hstack(labels_list)

    y = [dictionary]

    return y

MEAN = [0.485, 0.456, 0.406]
STD = [0.229, 0.224, 0.225]
NUMBER_CHANNELS = 3
INPUT_SHAPE = (NUMBER_CHANNELS, 800, 800)

transform = transforms.Compose([
        transforms.Resize([INPUT_SHAPE[1], INPUT_SHAPE[2]], interpolation=transforms.InterpolationMode.BICUBIC),
        transforms.ToTensor()
    ])



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.2.1[0m[39;49m -> [0m[32;49m23.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3.10 install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.2.1[0m[39;49m -> [0m[32;49m23.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3.10 install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.2.1[0m[39;49m -> [0m[32;49m23.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3.10 install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip available: [0m[31;49m22.2.1[0m[39;49m -> [0m[32;49m23.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3.10 install --

In [2]:
PATCH_MAX_ITER = 100
WHITEBOX_MAX_ITER = 1

### Load COCO images and resize

In [3]:
urls_original = ['http://images.cocodataset.org/val2017/000000039769.jpg',
'http://images.cocodataset.org/val2017/000000397133.jpg',
'http://images.cocodataset.org/val2017/000000037777.jpg',
'http://images.cocodataset.org/val2017/000000454661.jpg',
'http://images.cocodataset.org/val2017/000000094852.jpg']


target_image_url = 'http://images.cocodataset.org/val2017/000000350002.jpg'

urls = ['http://images.cocodataset.org/val2017/000000084170.jpg',
        'http://images.cocodataset.org/val2017/000000088462.jpg',
        'http://images.cocodataset.org/val2017/000000124798.jpg',
        'http://images.cocodataset.org/val2017/000000138979.jpg',
        'http://images.cocodataset.org/val2017/000000142585.jpg',
        'http://images.cocodataset.org/val2017/000000155341.jpg',
        'http://images.cocodataset.org/val2017/000000192607.jpg',
        'http://images.cocodataset.org/val2017/000000269942.jpg',
        'http://images.cocodataset.org/val2017/000000376625.jpg',
        'http://images.cocodataset.org/val2017/000000385190.jpg',
        'http://images.cocodataset.org/val2017/000000350002.jpg'] #last image is target image


import fiftyone as fo
import fiftyone.zoo as foz



# TODO: IMPLEMENTOI ANNOTATIONS TIEDOSTON LATAAMINEN JA KÄYTÄ SITÄ ETTÄ SAAT GROUND TRUTH LABELIT

dataset = foz.load_zoo_dataset(
    "coco-2017",
    split="validation",
    max_samples=50,
    shuffle=True,
    seed=51,
    label_types=["detections", "segmentations"],
    classes=["person", "car"],
)
target_size = (640,480)
'''for sample in dataset:
    sample["image"].resize(target_size, method="antialias")  # You can choose the resampling method
    
    # Update the dimensions of the sample
    sample.info["width"] = new_width
    sample.info["height"] = new_height
'''
# Save the modified dataset
dataset.save()


#session = fo.launch_app(dataset.view()) # ei toimi koska puhti

# now input images from fiftyone to the coco_images list:
#PIL.Image.open(requests.get(url, stream=True).raw)

# Get a sample from the dataset (you can choose any sample)
# put all to list instead of first 


# You can now work with the PIL image
coco_images = []

for sample in dataset:
    # Get the file path to the image
    image_path = sample.filepath
    
    # Open the image using PIL
    pil_image = PIL.Image.open(image_path)
    
    # Add the PIL image to the list
    im=transform(pil_image).numpy()
    coco_images.append(im)
    #pil_image.show()
coco_images = np.array(coco_images)

Downloading split 'validation' to '/users/antilaan/fiftyone/coco-2017/validation' if necessary
Found annotations at '/users/antilaan/fiftyone/coco-2017/raw/instances_val2017.json'
Sufficient images already downloaded
Existing download of split 'validation' is sufficient
Loading existing dataset 'coco-2017-validation-50'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use


### Create the detector

In [4]:
detector = PyTorchDetectionTransformer(channels_first=True, preprocessing=(MEAN, STD), input_shape=INPUT_SHAPE, clip_values=(0,1))

Using cache found in /users/antilaan/.cache/torch/hub/facebookresearch_detr_main


### Test detector on COCO images

In [None]:
print("Testing detector on COCO images")
results = detector.predict(coco_images)
for i in range(len(results)):
    preds = extract_predictions(results[i], 0.8)
    im = (torch.from_numpy(coco_images)[i].numpy().transpose(1,2,0)*255).astype(np.uint8)
    plot_image_with_boxes(img=im.copy(), boxes=preds[1], pred_cls=preds[0], title="Predictions on image without patch")
    print(preds[2])

Testing detector on COCO images


### PGD Attack
Targeted PGD attack using the last image above of elephants to create adversarial examples that force DETR to see elephants in other images.

In [None]:
from art.attacks.evasion import ProjectedGradientDescent

attack = ProjectedGradientDescent(detector, targeted=True, max_iter=WHITEBOX_MAX_ITER, eps=0.1) #change iter

dets = detector.predict(coco_images)
filtered_dets = [filter_boxes([t], 0.8)[0] for t in dets]

# Tähän for looppi jossa lasketaan KAIKILLE KYMMENELLE PGD 
x = coco_images[[0]]
target = filtered_dets[-1:] #orignal: -1:
count=0
print("Starting PGD attack")
for image in coco_images:
    x = coco_images[[count]]
    adv = attack.generate(x, y=target)

    adv_dets = detector.predict(adv)
    preds = extract_predictions(adv_dets[0], 0.8)
    print(preds)
    im = adv[0].transpose(1,2,0)
    plot_image_with_boxes(img=im.copy(), boxes=preds[1], pred_cls=preds[0], title=f"Detections on PGD generated Adversarial Image")
    count +=1


print("PGD attack done")

In [None]:
#untargeted PGD

In [None]:
from art.attacks.evasion import ProjectedGradientDescent
print("Startring untargeted PGD")

attack = ProjectedGradientDescent(detector, targeted=False, max_iter=WHITEBOX_MAX_ITER, eps=0.1) #change iter

dets = detector.predict(coco_images)
filtered_dets = [filter_boxes([t], 0.8)[0] for t in dets]

# Tähän for looppi jossa lasketaan KAIKILLE KYMMENELLE PGD 
x = coco_images[[0]]
target = filtered_dets[-1:] #orignal: -1:
count=0
for image in coco_images:
    x = coco_images[[count]]
    adv = attack.generate(x)

    adv_dets = detector.predict(adv)
    preds = extract_predictions(adv_dets[0], 0.8)
    im = adv[0].transpose(1,2,0)
    plot_image_with_boxes(img=im.copy(), boxes=preds[1], pred_cls=preds[0], title=f"Detections on PGD generated Adversarial Image")
    count +=1
print("Untargeted PGD done")


# FGSM attack

In [None]:
from art.attacks.evasion import FastGradientMethod
print("Starting FGSM")

attack = FastGradientMethod(detector, eps=0.1)

dets = detector.predict(coco_images)
filtered_dets = [filter_boxes([t], 0.8)[0] for t in dets]

# Tähän for looppi jossa lasketaan KAIKILLE KYMMENELLE FGSM 
x = coco_images[[0]]
target = filtered_dets[-1:] #orignal: -1:
count=0
for image in coco_images:
    x = coco_images[[count]]
    adv = attack.generate(x, y=target)

    adv_dets = detector.predict(adv)
    preds = extract_predictions(adv_dets[0], 0.8)
    im = adv[0].transpose(1,2,0)
    plot_image_with_boxes(img=im.copy(), boxes=preds[1], pred_cls=preds[0], title=f"Detections on FGSM generated Adversarial Image")
    count +=1
print("FGSM done")
