### Define imports, constants and helper functions

In [None]:
from art.estimators.object_detection.pytorch_detection_transformer import PyTorchDetectionTransformer
from art.attacks.evasion.adversarial_patch.adversarial_patch_pytorch import AdversarialPatchPyTorch
from torchvision.transforms import transforms
import PIL.Image
import numpy as np
import torch
import cv2
import matplotlib.pyplot as plt
import requests
import pandas as pd
import fiftyone as fo

COCO_CLASSES = [
    'N/A', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A',
    'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
    'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack',
    'umbrella', 'N/A', 'N/A', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
    'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
    'skateboard', 'surfboard', 'tennis racket', 'bottle', 'N/A', 'wine glass',
    'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
    'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake',
    'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table', 'N/A',
    'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard',
    'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A',
    'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier',
    'toothbrush'
]

def extract_predictions(predictions_, conf_thresh):
    predictions_class = [COCO_CLASSES[i] for i in list(predictions_["labels"])]
    if len(predictions_class) < 1:
        return [], [], []
        
    predictions_boxes = [[(i[0], i[1]), (i[2], i[3])] for i in list(predictions_["boxes"])]
    predictions_score = list(predictions_["scores"])

    threshold = conf_thresh
    predictions_t = [predictions_score.index(x) for x in predictions_score if x > threshold]
    if len(predictions_t) > 0:
        predictions_t = predictions_t
    else:
        return [], [], []
        
    predictions_boxes = [predictions_boxes[i] for i in predictions_t]
    predictions_class = [predictions_class[i] for i in predictions_t]
    predictions_scores = [predictions_score[i] for i in predictions_t]
    return predictions_class, predictions_boxes, predictions_scores

def plot_image_with_boxes(img, boxes, pred_cls, title, save, filename):
    text_size = 2
    text_th = 2
    rect_th = 2

    for i in range(len(boxes)):
        cv2.rectangle(img, (int(boxes[i][0][0]), int(boxes[i][0][1])), (int(boxes[i][1][0]), int(boxes[i][1][1])),
                      color=(0, 255, 0), thickness=rect_th)
        cv2.putText(img, pred_cls[i], (int(boxes[i][0][0]), int(boxes[i][0][1])), cv2.FONT_HERSHEY_SIMPLEX, text_size,
                    (0, 255, 0), thickness=text_th)
    plt.figure()
    plt.axis("off")
    plt.title(title)
    
    plt.imshow(img)
    if save == True and filename != None:
       print(filename)
       print("image saved")
       plt.savefig(filename)

def filter_boxes(predictions, conf_thresh):
    dictionary = {}

    boxes_list = []
    scores_list = []
    labels_list = []

    for i in range(len(predictions[0]["boxes"])):
        score = predictions[0]["scores"][i]
        if score >= conf_thresh:
            boxes_list.append(predictions[0]["boxes"][i])
            scores_list.append(predictions[0]["scores"][[i]])
            labels_list.append(predictions[0]["labels"][[i]])

    dictionary["boxes"] = np.vstack(boxes_list)
    dictionary["scores"] = np.hstack(scores_list)
    dictionary["labels"] = np.hstack(labels_list)

    y = [dictionary]

    return y

MEAN = [0.485, 0.456, 0.406]
STD = [0.229, 0.224, 0.225]
NUMBER_CHANNELS = 3
INPUT_SHAPE = (NUMBER_CHANNELS, 800, 800)

transform = transforms.Compose([
        transforms.Resize([INPUT_SHAPE[1], INPUT_SHAPE[2]], interpolation=transforms.InterpolationMode.BICUBIC),
        transforms.ToTensor()
    ])


In [None]:
name="yolo_patch_on_detr_transfer"
dataset_dir="./coco_resized/validation/"
dataset_type = fo.types.COCODetectionDataset
dataset = fo.Dataset.from_dir(dataset_dir, dataset_type, name=name)
dataset.persistent = False
#predictions_view = dataset.take(2, seed=51)
predictions_view = dataset.take(50, seed=51)

### Load COCO images and resize

In [None]:
coco_images = []
image_ids = []
image_sizes = []
unsized_images = []
import PIL
import os
for sample in predictions_view:
    image_id = os.path.basename(sample.filepath)
    image_id = image_id.replace('.jpg', '')
    image_ids.append(image_id)
    
    im = PIL.Image.open(sample.filepath)
    
    
    im = im.convert('RGB')

    image_sizes.append(im.size)
    im = transform(im).numpy()
    
    coco_images.append(im)
coco_images = np.array(coco_images)


### Create the detector

In [None]:
detector = PyTorchDetectionTransformer(channels_first=True, preprocessing=(MEAN, STD), input_shape=INPUT_SHAPE, clip_values=(0,1))

### Test detector on COCO images

In [None]:
import os
directory_name = f"run_images_{name}"

try:
   os.makedirs(directory_name)
except OSError as e:
   if e.errno != errno.EEXIST:
       raise


In [None]:
results = detector.predict(coco_images)
for i in range(len(results)):
    preds_orig = extract_predictions(results[i], 0.8)
    im = (torch.from_numpy(coco_images)[i].numpy().transpose(1,2,0)*255).astype(np.uint8)
    plot_image_with_boxes(img=coco_images[i].transpose(1,2,0).copy(), boxes=preds_orig[1], pred_cls=preds_orig[0], title="Predictions on image without patch", save=True, filename=f"{directory_name}/benign{i}")


In [None]:
from tqdm import tqdm

dets = detector.predict(coco_images)
y = [filter_boxes([t], 0.8)[0] for t in dets]

x = coco_images[:-1]
target = [y[-1] for i in range(len(coco_images[:-1]))]

rotation_max=0.0
scale_min=0.5
scale_max=1
distortion_scale_max=0.0
learning_rate=1.99
max_iter=1
batch_size=16
patch_shape=(3, 200, 200)
patch_location=(400,400)
patch_type="square"
optimizer="adam"

attack = AdversarialPatchPyTorch(estimator=detector, rotation_max=rotation_max, 
                      scale_min=scale_min, scale_max=scale_max, distortion_scale_max=distortion_scale_max,
                      learning_rate=learning_rate, max_iter=max_iter, batch_size=batch_size, patch_location=patch_location,
                      patch_shape=patch_shape, patch_type=patch_type, verbose=False, targeted=False)

loss_history = []
count = 0
for i in tqdm(range(2)):
    patch = attack.generate(x[[0]], y[:1])
    patched_images = attack.apply_patch(x, scale=0.4) # 0.1

_y = detector.predict(transfer)
for i in range(len(_y)):
    preds = extract_predictions(_y[i], 0.5)
    # Otetaan bbox talteen:
    boxes = preds[1]
    labels = preds[0]
    scores = preds[2]
    detections = []
    for j, count in enumerate(range(len(boxes))):      
       # And the image dimensions are known
       image_width = 800
       image_height = 800
       normalized_bbox = [
       boxes[j][0][0] / image_width, # xmin
       boxes[j][0][1] / image_height, # ymin
       boxes[j][1][0] / image_width, # xmax
       boxes[j][1][1] / image_height  # ymax
       ]
       # Modified code to convert to (x0, y0, w, h) format
       x0 = normalized_bbox[0]
       y0 = normalized_bbox[1]
       x1 = normalized_bbox[2]
       y1 = normalized_bbox[3]
    
       w = x1 - x0
       h = y1 - y0
       new_bbox = [x0, y0, w, h]

       tensors = [torch.tensor(value) for value in new_bbox]
       tensors = [tensor.float() for tensor in tensors]
       
       scalar_value = scores[j]

        # Convert to tensor
       tensor_value = torch.tensor(scalar_value)
       score=tensor_value
       detections.append(
           fo.Detection(
               label=labels[j],
               bounding_box=tensors,
               confidence=score
           )
       )
    plot_image_with_boxes(img=patched_images[i].transpose(1,2,0).copy(), boxes=preds[1], pred_cls=preds[0], title="Predictions on image with patch", save=True, filename=f"{directory_name}/patched{i}")
    sample = dataset[f"/scratch/project_2008539/coco_resized/validation/data/{image_ids[i]}.jpg"]
    sample["detr"] = fo.Detections(detections=detections)
    sample.save()
print("finished adding predictions")

In [None]:
from fiftyone import ViewField as F


In [None]:
high_conf_view = predictions_view.filter_labels("detr", F("confidence") > 0.75, only_matches=False)

In [None]:
print(high_conf_view)

In [None]:
# Print a prediction from the view to verify that its confidence is > 0.75
sample = high_conf_view.first()
print(sample.detr)

In [None]:
# Evaluate the predictions in the `yolo` field of our `high_conf_view`
# with respect to the objects in the `ground_truth` field
results = predictions_view.evaluate_detections(
    "detr",
    gt_field="detections",
    eval_key="yolo_eval",
    compute_mAP=True,
)


In [None]:
# Get the 10 most common classes in the dataset
counts = dataset.count_values("detections.detections.label")
classes_top10 = sorted(counts, key=counts.get, reverse=True)[:10]

# Print a classification report for the top-10 classes
results.print_report(classes=classes_top10)

In [None]:
print(results.mAP())

In [None]:
result = results.mAP()

In [None]:
file = open(f"{name}.txt", "w")
print(f"saved as: {file}")
#convert variable to string
result = str(result)
file.write("result = " + result + "\n")

#close file
file.close()