## Test Image Augmentation

In [None]:
from datasets import load_dataset
from functools import partial
from PIL import ImageDraw as PImageDraw
from torchvision import tv_tensors
from torchvision.transforms import v2 as T
from transformers import AutoImageProcessor

from finetune_utils.Detr import DetrDataLoader

### Load HF Dataset

In [None]:
MODEL_NAME = "microsoft/conditional-detr-resnet-50"
DATASET_NAME = "acervos-digitais/ft-0915"

In [None]:
ft0915_ds = load_dataset(DATASET_NAME)

In [None]:
categories = ft0915_ds["train"].features["objects"].feature["category"].names

id2label = {index: x for index, x in enumerate(categories, start=0)}
label2id = {v: k for k, v in id2label.items()}

### Test HF Dataset

In [None]:
img_id = 11
image = ft0915_ds["train"][img_id]["image"]
annotations = ft0915_ds["train"][img_id]["objects"]
draw = PImageDraw.Draw(image)

In [None]:
for box,class_idx in zip(annotations["bbox"], annotations["category"]):
  x, y, w, h = tuple(box)
  x1, y1 = int(x), int(y)
  x2, y2 = int(x + w), int(y + h)

  draw.rectangle((x, y, x + w, y + h), outline="red", width=1)
  draw.text((x+2, y), id2label[class_idx], fill=(0,0,0))
  draw.text((x+2, y-12), id2label[class_idx], fill=(255,0,255))

display(image)

### Define Image transforms

In [None]:
image_transform = T.Compose([
  T.RandomHorizontalFlip(p=0.5),
  T.RandomAdjustSharpness(sharpness_factor=2, p=0.5),
  T.RandomAutocontrast(p=0.5),
  T.RandomEqualize(p=0.5),
  T.RandomApply(transforms=[T.RandomAffine(degrees=(-20, 20), translate=(0.1, 0.1), scale=(0.95, 1.05),shear=(-15, 15))], p=0.5),
  T.RandomApply(transforms=[T.ColorJitter(brightness=0.5, hue=0.3)], p=0.5)
])

In [None]:
def transform_batch(examples, transform, image_processor, return_pixel_mask=False):
  images = []
  annotations = []
  for image_id, image, objects in zip(examples["image_id"], examples["image"], examples["objects"]):
    iw, ih = image.size
    objects["bbox"] = tv_tensors.BoundingBoxes(objects["bbox"], format="XYWH", canvas_size=(ih, iw))
    image = tv_tensors.Image(image.convert("RGB"))

    # apply augmentations
    if transform is not None:
      image, bboxes, categories = transform(image, objects["bbox"], objects["category"])
      objects["bbox"] = bboxes
      objects["category"] = categories

    images.append(image)

    # format annotations in COCO format
    formatted_annotations = DetrDataLoader.as_coco(image_id, objects)
    annotations.append(formatted_annotations)

  # Apply the image processor transformations: resizing, rescaling, normalization
  result = image_processor(images=images, annotations=annotations, return_tensors="pt")

  if not return_pixel_mask:
    result.pop("pixel_mask", None)

  return result

### Apply Image transforms

In [None]:
detr_processor = AutoImageProcessor.from_pretrained(MODEL_NAME)

detr_processor.image_mean = 0
detr_processor.image_std = 1

train_transform = partial(transform_batch, transform=image_transform, image_processor=detr_processor, return_pixel_mask=True)
# train_transform = partial(transform_batch, transform=None, image_processor=detr_processor, return_pixel_mask=True)
train_ds = ft0915_ds["train"].with_transform(train_transform)

In [None]:
dsi = iter(train_ds)

for cnt in range(1):
  dsi = iter(train_ds)
  for skip in range(15):
    next(dsi)
  data = next(dsi)
  image = T.ToPILImage()(data["pixel_values"])
  image.save(f"img_{cnt}.jpg")
  draw = PImageDraw.Draw(image)
  boxes = data["labels"]["boxes"].tolist()
  labels = data["labels"]["class_labels"].tolist()
  iw,ih = image.size

  for box,label in zip(boxes, labels):
    cx, cy, w, h = tuple(box)
    x0, y0 = int((cx - w/2)*iw), int((cy - h/2)*ih)
    x1, y1 = int((cx + w/2)*iw), int((cy + h/2)*ih)

    draw.rectangle((x0, y0, x1, y1), outline="white", width=3)
    draw.text((x0+2, y0), id2label[label], fill=(0,0,0))
    draw.text((x0+2, y0-12), id2label[label], fill=(255,0,255))

  display(image)