## Finetune DETR model

In [None]:
import gc
import torch

from PIL import ImageDraw as PImageDraw
from pytorch_lightning import Trainer, loggers as PLLoggers
from torchvision.transforms import v2 as T

from finetune_utils.finetune_0915 import FTUtils
from finetune_utils.Detr import Detr, DetrDataLoader

### Create DataLoader

In [None]:
MODEL_NAME = "microsoft/conditional-detr-resnet-50"
DATASET_NAME = "acervos-digitais/ft-0915"

In [None]:
mDL = DetrDataLoader(DATASET_NAME, MODEL_NAME)

### Test Dataset

In [None]:
img_id = 11
image = mDL.ds["train"][img_id]["image"]
annotations = mDL.ds["train"][img_id]["objects"]
draw = PImageDraw.Draw(image)

In [None]:
for box,class_idx in zip(annotations["bbox"], annotations["category"]):
  x, y, w, h = tuple(box)
  x1, y1 = int(x), int(y)
  x2, y2 = int(x + w), int(y + h)

  draw.rectangle((x, y, x + w, y + h), outline="red", width=1)
  draw.text((x+2, y), FTUtils.ID2LABEL[class_idx], fill=(0,0,0))
  draw.text((x+2, y-12), FTUtils.ID2LABEL[class_idx], fill=(255,0,255))

display(image)

### Test DataLoader

In [None]:
train_dl = iter(mDL.getTrain())
val_dl = iter(mDL.getValidation())

img_id = 2
for i in range(img_id):
  next(train_dl)

batch = next(train_dl)
for p in range(len(batch["pixel_values"])):
  pxs = batch["pixel_values"][p]
  image = T.ToPILImage()(pxs)
  iw,ih = image.size
  draw = PImageDraw.Draw(image)

  labels = batch["labels"][p]
  for box,class_idx in zip(labels["boxes"].tolist(), labels["class_labels"].tolist()):
    cx, cy, w, h = tuple(box)
    x0, y0 = int((cx - w/2)*iw), int((cy - h/2)*ih)
    x1, y1 = int((cx + w/2)*iw), int((cy + h/2)*ih)

    draw.rectangle((x0, y0, x1, y1), outline="white", width=3)
    draw.text((x0+2, y0), FTUtils.ID2LABEL[class_idx], fill=(0,0,0))
    draw.text((x0+2, y0-12), FTUtils.ID2LABEL[class_idx], fill=(255,0,255))

  display(image)

In [None]:
try:
  del model
except:
  pass

gc.collect()
torch.cuda.empty_cache()

In [None]:
model = Detr(model_name=MODEL_NAME, dataloader=mDL, lr=1e-5, lr_backbone=1e-5, weight_decay=1e-4)

In [None]:
cp = torch.load("lightning_logs/e105+151+64-augm3/checkpoints/epoch=63-step=4800.ckpt")
model.load_state_dict(cp["state_dict"])

In [None]:
mLogger = PLLoggers.TensorBoardLogger(save_dir=".", version="e2-augm3")
trainer = Trainer(accelerator="gpu", max_epochs=2, gradient_clip_val=0.1, logger=mLogger)
trainer.fit(model)

In [None]:
print(model.eval_detr(thresholds=0.3))

### Save to HF Hub

In [None]:
OUTPUT_MODEL_NAME = "acervos-digitais/conditional-detr-resnet-50-ft-0915-e256-augm3"

In [None]:
model.model.push_to_hub(OUTPUT_MODEL_NAME)
model.processor.push_to_hub(OUTPUT_MODEL_NAME)

### Test Model

In [None]:
import torch

from datasets import load_dataset
from PIL import ImageDraw as PImageDraw
from transformers import AutoImageProcessor, AutoModelForObjectDetection

from finetune_utils.Detr import Detr
from finetune_utils.finetune_0915 import FTUtils

MODEL_NAME = OUTPUT_MODEL_NAME

In [None]:
ft0915_ds = load_dataset("acervos-digitais/ft-0915")

detr_processor = AutoImageProcessor.from_pretrained(MODEL_NAME)
model = AutoModelForObjectDetection.from_pretrained(
  MODEL_NAME,
  id2label=FTUtils.ID2LABEL,
  label2id=FTUtils.LABEL2ID,
).to("cuda")

In [None]:
print(Detr.eval_dataset(model, detr_processor, list(ft0915_ds["train"])))
print(Detr.eval_dataset(model, detr_processor, list(ft0915_ds["test"])))

In [None]:
print(Detr.eval_dataset(model, detr_processor, list(ft0915_ds["train"]), thresholds=0.3))
print(Detr.eval_dataset(model, detr_processor, list(ft0915_ds["test"]), thresholds=0.3))

In [None]:
thresholds = [0.27, 0.27, 0.55]
print(Detr.eval_dataset(model, detr_processor, list(ft0915_ds["train"]), thresholds=thresholds))
print(Detr.eval_dataset(model, detr_processor, list(ft0915_ds["test"]), thresholds=thresholds))

In [None]:
thresholds = [0.3, 0.3, 0.4]
print(Detr.eval_dataset(model, detr_processor, list(ft0915_ds["train"]), thresholds=thresholds))
print(Detr.eval_dataset(model, detr_processor, list(ft0915_ds["test"]), thresholds=thresholds))

### Parameter Tuning

In [None]:
import numpy as np

thresholds = []
results = []

for t0 in np.arange(0.35, 0.56, 0.05):
  for t1 in np.arange(0.35, 0.56, 0.05):
    for t2 in np.arange(0.35, 0.56, 0.05):
      thresholds.append([t0, t1, t2])
      res = Detr.eval_dataset(model, detr_processor, list(ft0915_ds["test"]), thresholds=[t0, t1, t2])
      results.append(res)

In [None]:
thresholds_ = [[round(t0,3), round(t1,3), round(t2,3)] for t0,t1,t2 in thresholds]
results_ = [[round(t0,3), round(t1,3)] for t0,t1 in results]
tr = list(zip(thresholds_, results_))

display("by precision",
        sorted(tr, key=lambda x: x[1][0], reverse=True)[:30])

# display("by recall",
#         sorted(tr, key=lambda x: x[1][1], reverse=True)[:5])

display("by recall*precision",
        sorted(tr, key=lambda x: x[1][0]*x[1][1], reverse=True)[:20])

## See some images

In [None]:
for r in list(ft0915_ds["test"])[:48]:
  img = r["image"]
  iw, ih = img.size
  draw = PImageDraw.Draw(img)

  inputs = detr_processor(images=img, return_tensors="pt")
  pixel_values = inputs["pixel_values"].to("cuda")

  with torch.no_grad():
    outputs = model(pixel_values=pixel_values, pixel_mask=None)

  ppo = detr_processor.post_process_object_detection(outputs,
                                                     target_sizes=[(ih, iw)],
                                                     threshold=0.25)[0]

  labels_list = [l.item() for l in ppo["labels"]]
  scores_list = [round(s.item(),4) for s in ppo["scores"]]

  print("pred:", [(FTUtils.ID2LABEL[l],s) for l,s in zip(labels_list, scores_list)])
  print("labels:", [FTUtils.ID2LABEL[c] for c in r["objects"]["category"]])

  for l,b,s in zip(ppo["labels"], ppo["boxes"], ppo["scores"]):
    draw.rectangle(((b[0], b[1]), (b[2], b[3])), outline=(255, 0, 0), width=2)

  display(img)