## Object Detection


## Owl2
- https://huggingface.co/google/owlv2-base-patch16
- https://huggingface.co/google/owlv2-large-patch14
- https://huggingface.co/google/owlv2-large-patch14-ensemble

#### Results from experiments
- Use larger images and smaller models

In [None]:
import json

from os import listdir, makedirs, path

from PIL import Image as PImage, ImageOps as PImageOps, ImageDraw as PImageDraw

from params.detect import OBJS_LABELS_IN as OBJS_LABELS, OBJS_THOLDS
from models.Owlv2 import Owlv2

In [None]:
WIKI_DATA_DIR = "./metadata/json/MuseuPaulista"
WIKI_INFO_PATH = path.join(WIKI_DATA_DIR, "museu_paulista.json")

WIKI_OBJECT_DIR = path.join(WIKI_DATA_DIR, "objects")
makedirs(WIKI_OBJECT_DIR, exist_ok=True)

IMG_DIR = "../../imgs/MuseuPaulista"
IMG_DIR_500 = path.join(IMG_DIR, "500")
IMG_DIR_900 = path.join(IMG_DIR, "900")

In [None]:
wiki_data = {}

if (path.isfile(WIKI_INFO_PATH)):
  with open(WIKI_INFO_PATH, "r") as ifp:
    wiki_data = json.load(ifp)

qids = sorted(list(wiki_data.keys()))

In [None]:
owl = Owlv2("google/owlv2-base-patch16")

In [None]:
for cnt,qid in enumerate(qids[:10]):
  if cnt % 100 == 0:
    print(cnt)

  input_file_path = path.join(IMG_DIR_900, f"{qid}.jpg")
  output_file_path = path.join(WIKI_OBJECT_DIR, f"{qid}.json")

  if path.isfile(output_file_path):
    continue

  image = PImageOps.exif_transpose(PImage.open(input_file_path).convert("RGB"))

  image_boxes = []
  for labels,tholds in zip(OBJS_LABELS, OBJS_THOLDS):
    obj_boxes = owl.all_objects(image, labels, tholds)
    image_boxes += obj_boxes

  with open(output_file_path, "w", encoding="utf-8") as of:
    json.dump(image_boxes, of, sort_keys=True, separators=(',',':'), ensure_ascii=False)

### Visualize

In [None]:
obj_files = sorted([f for f in listdir(WIKI_OBJECT_DIR) if f.endswith(".json")])

In [None]:
for fname in obj_files:
  with open(path.join(WIKI_OBJECT_DIR, fname), "r") as inp:
    iboxes = json.load(inp)

  if len(iboxes) < 1:
    continue

  image_file_path = path.join(IMG_DIR_900, fname.replace(".json", ".jpg"))
  image = PImageOps.exif_transpose(PImage.open(image_file_path).convert("RGB"))
  iw,ih = image.size
  draw = PImageDraw.Draw(image)

  for box in iboxes:
    label, (x0,y0,x1,y1) = box["label"], box["box"]
    draw.rectangle(((x0*iw, y0*ih), (x1*iw, y1*ih)), outline=(255, 0, 0), width=2)

  display(image)

## SigLip2

Large patch 16x16:
- https://huggingface.co/google/siglip2-large-patch16-384
- https://huggingface.co/google/siglip2-large-patch16-512

Giant 16x16:
- https://huggingface.co/google/siglip2-giant-opt-patch16-256
- https://huggingface.co/google/siglip2-giant-opt-patch16-384


