# Detect Objects

- People + Trees
- Bus Stops + Signs

In [None]:
import json

from os import listdir, makedirs, path
from PIL import Image as PImage, ImageDraw as PImageDraw

from utils.detect_utils import OBJECT_THRESHOLDS
from models.Dino import Dino
from models.Ocr import Ocr
from models.Yolo import Yolo

In [None]:
OBJ_DIR = "./data/objs"
IMG_DIR = "./imgs"
TECHNIQUES = ["address", "address_coords", "heading", "station_coords"]

## People + Trees

In [None]:
OBJS = "people_trees"
model = Dino()

for t in TECHNIQUES:
  makedirs(path.join(OBJ_DIR, t, OBJS), exist_ok=True)
  fnames = sorted(f for f in listdir(path.join(IMG_DIR, t)) if f.endswith("jpg"))

  for idx,f in enumerate(fnames):
    if idx>1024 and idx%16==0: print(idx, "/", len(fnames))

    img_file = path.join(IMG_DIR, t, f)
    obj_file = path.join(OBJ_DIR, t, OBJS, f.replace(".jpg", ".json"))

    if path.isfile(obj_file): continue

    img = PImage.open(img_file)

    objs = model.iou_objects(img, OBJECT_THRESHOLDS)

    with open(obj_file, "w") as ofp:
      json.dump({ "dino" : objs }, ofp)

## Bus Stops

In [None]:
OBJS = "bus_stop"

OBJECT_THRESHOLDS_YOLO = {
  "bus_stop": 0.25,
  "bus_sign": 0.25,
}

model = Yolo("./models/bus_20260201_yolo.pt")

for t in TECHNIQUES:
  makedirs(path.join(OBJ_DIR, t, OBJS), exist_ok=True)
  fnames = sorted(f for f in listdir(path.join(IMG_DIR, t)) if f.endswith("jpg"))

  for idx,f in enumerate(fnames):
    if idx>0 and idx%16==0: print(idx, "/", len(fnames))

    img_file = path.join(IMG_DIR, t, f)
    obj_file = path.join(OBJ_DIR, t, OBJS, f.replace(".jpg", ".json"))

    if path.isfile(obj_file): continue

    img = PImage.open(img_file)

    objs = model.all_objects(img, OBJECT_THRESHOLDS_YOLO)

    with open(obj_file, "w") as ofp:
      json.dump({ "yolo" : objs }, ofp)

## Text

In [None]:
OBJS = "text"

OCR_THRESHOLDS = {
  "area": 500,
  "score": 0.02
}

model = Ocr()

for t in TECHNIQUES[3:]:
  makedirs(path.join(OBJ_DIR, t, OBJS), exist_ok=True)
  fnames = sorted(f for f in listdir(path.join(IMG_DIR, t)) if f.endswith("jpg"))

  for idx,f in enumerate(fnames):
    if idx>0 and idx%16==0: print(idx, "/", len(fnames))

    img_file = path.join(IMG_DIR, t, f)
    obj_file = path.join(OBJ_DIR, t, OBJS, f.replace(".jpg", ".json"))

    if path.isfile(obj_file): continue

    words = model.top_words(img_file, OCR_THRESHOLDS["area"], OCR_THRESHOLDS["score"])
    lwords = [w.lower().strip().strip(".?!*,;:'\"") for w in words]

    with open(obj_file, "w") as ofp:
      json.dump({ "ocr" : lwords }, ofp)

## Combine JSONs

object labels -> imgids -> boxes :

```json
"bus_sign": {
  "address/1000": [{"score": 0.36, "label": "bus_sign", "box": [0.31, 0.40, 0.33, 0.45]}],
  "address/1001": [{"score": 0.42, "label": "bus_sign", "box": [0.36, 0.46, 0.40, 0.54]}],
  ...
},

"tree": {
  ...
},
```

bus stop id -> object labels -> imgids -> box max score and box count :

```json
"1000": {
  "bus_sign": {
    "address/1000": {"score": 0.355, "count": 1},
    "heading/1000": {"score": 0.194, "count": 2},
    ...
  },
  "tree": {
    ...
  }
  ...
},

"1001": {
  ...
},
```

In [None]:
import json

from os import listdir, path

OBJ_DIR = "./data/objs"
TECHNIQUES = ["address", "address_coords", "heading", "station_coords"]

In [None]:
LABEL = {
  "palm": "tree",
  "palm tree": "tree",
  "palm person tree": "person tree",
}

# file id example: "address/1052"
label2id = {}
label2idboxes = {}
id2boxes = {}
stop2labelbox = {}

for t in TECHNIQUES:
  obj_dirs = sorted([d for d in listdir(path.join(OBJ_DIR, t)) if path.isdir(path.join(OBJ_DIR, t, d))])
  for obj_dir in obj_dirs:
    if obj_dir == "text": continue
    obj_dir_path = path.join(OBJ_DIR, t, obj_dir)
    obj_files = sorted([f for f in listdir(obj_dir_path) if str(path.join(obj_dir_path, f)).endswith(".json")])
    for f in obj_files:
      fpath = path.join(obj_dir_path, f)
      sid = f.replace(".json", "")
      with open(fpath, "r") as ifp:
        fid = t + "/" + sid
        obj_data = json.load(ifp)
        stop2labelbox[sid] = stop2labelbox.get(sid, {})
        for model,boxes in obj_data.items():
          for b in boxes:
            b["label"] = LABEL.get(b["label"], b["label"])
            box_label = b["label"]
            b["box"] = [round(x, 4) for x in b["box"]]
            label2id[box_label] = label2id.get(box_label, []) + [fid]
            id2boxes[fid] = id2boxes.get(fid, []) + [b]

            label2idboxes[box_label] = label2idboxes.get(box_label, {})
            label2idboxes[box_label][fid] = label2idboxes[box_label].get(fid, []) + [b]

            stop2labelbox[sid][box_label] = stop2labelbox[sid].get(box_label, {})
            stop2labelbox[sid][box_label][fid] = stop2labelbox[sid][box_label].get(fid, {"score":0, "count":0})
            stop2labelbox[sid][box_label][fid]["count"] += 1
            stop2labelbox[sid][box_label][fid]["score"] = max(stop2labelbox[sid][box_label][fid]["score"], b["score"])

In [None]:
with open("./data/objs/label2boxes.json", "w") as ofp:
  json.dump(label2idboxes, ofp)

In [None]:
with open("./data/objs/stop2label.json", "w") as ofp:
  json.dump(stop2labelbox, ofp)

## Combine OCR JSONs

word -> imgids

In [None]:
import json
import re

from os import listdir, path

symbols = r"[!@#$%^&*().?!,;:0-9\-+\\/'\"]"
spaces = r"[ ]+"
repeats = r"([a-z])\1{N}"

OBJ_DIR = "./data/objs"
TECHNIQUES = ["address", "address_coords", "heading", "station_coords"]

In [None]:
# file id example: "address/1052"
word2id = {}

for t in TECHNIQUES:
  obj_dir_path = path.join(OBJ_DIR, t, "text")
  obj_files = sorted([f for f in listdir(obj_dir_path) if str(path.join(obj_dir_path, f)).endswith(".json")])
  for f in obj_files:
    fpath = path.join(obj_dir_path, f)
    with open(fpath, "r") as ifp:
      fid = t + "/" + f.replace('.json', '')
      obj_data = json.load(ifp)
      for model,words in obj_data.items():
        words = list(set(words))
        for raw in words:
          if raw == "google": continue
          word = re.sub(spaces, " ", re.sub(symbols, "", raw)).strip()
          lw = len(word)
          most_repeats = re.match(repeats.replace("N", f"{lw-3},{lw-1}"), word)
          if (lw < 4) or (word == "google") or (most_repeats is not None): continue

          word2id[word] = word2id.get(word, []) + [fid]

In [None]:
with open("./data/objs/word2images.json", "w") as ofp:
  json.dump(word2id, ofp)

## Stats

In [None]:
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

with open("./data/objs/stop2label.json", "r") as ifp:
  data = json.load(ifp)

In [None]:
obj_cnt = {}
tech2obj_cnt = {}
stop_id2obj_cnt = {}
stop_id2max_obj_cnt = {}

for stop_id,label2id in data.items():
  stop_id2obj_cnt[stop_id] = stop_id2obj_cnt.get(stop_id, {})
  stop_id2max_obj_cnt[stop_id] = stop_id2max_obj_cnt.get(stop_id, {})
  for obj_label,fid2info in label2id.items():
    obj_cnt[obj_label] = obj_cnt.get(obj_label, 0)
    stop_id2obj_cnt[stop_id][obj_label] = stop_id2obj_cnt[stop_id].get(obj_label, 0)
    stop_id2max_obj_cnt[stop_id][obj_label] = stop_id2max_obj_cnt[stop_id].get(obj_label, {"count":0, "score":0})

    for fid,cnt_info in fid2info.items():
      technique, sid = fid.split("/")
      assert sid == stop_id
      tech2obj_cnt[technique] = tech2obj_cnt.get(technique, {})
      tech2obj_cnt[technique][obj_label] = tech2obj_cnt[technique].get(obj_label, 0)

      obj_cnt[obj_label] += cnt_info["count"]
      stop_id2obj_cnt[stop_id][obj_label] += cnt_info["count"]
      tech2obj_cnt[technique][obj_label] += cnt_info["count"]

      max_count = stop_id2max_obj_cnt[stop_id][obj_label]["count"]
      max_score = stop_id2max_obj_cnt[stop_id][obj_label]["score"]
      if (cnt_info["count"] > max_count or
          (cnt_info["count"] == max_count and cnt_info["score"] > max_score)):
        stop_id2max_obj_cnt[stop_id][obj_label] = {
          "count": cnt_info["count"],
          "score": cnt_info["score"],
          "from": technique,
        }

In [None]:
stop_df = pd.read_csv("./data/stops.tgh.csv")
noll = stop_df[stop_df["google.lat"] == 0]

stop_df.loc[noll.index, "google.lat"] = stop_df.loc[noll.index, "city.lat"]
stop_df.loc[noll.index, "google.lon"] = stop_df.loc[noll.index, "city.lon"]

stop_df = stop_df.drop(columns=["city.lat","city.lon","city_google.distance"])
stop_df.columns = ["id", "address", "neighborhood", "lat", "lon"]

for stop_id,label2max_count in stop_id2max_obj_cnt.items():
  for label in ["bus_stop", "person", "person tree", "bus_sign", "tree"]:
    if label in label2max_count:
      technique = label2max_count[label]["from"]
      row = stop_df[stop_df["id"] == int(stop_id)]
      stop_df.loc[row.index, "image"] = f"{technique}/{stop_id}.jpg"
      break

stop_df = stop_df[~stop_df["image"].isna()]

stop_dict = stop_df.replace({ np.nan: None }).to_dict(orient="records")

with open("./data/stops.tgh.json", "w") as ofp:
  json.dump(stop_dict, ofp, ensure_ascii=False)

### Plots

In [None]:
xl = sorted(obj_cnt.keys())

plt.bar(xl, [obj_cnt[k] for k in xl])
plt.show()

In [None]:
xl = sorted(tech2obj_cnt.keys())
yl = [yl for yl in sorted(tech2obj_cnt["address_coords"].keys()) if yl != "person tree"]

ys = np.array([
  [tech2obj_cnt[kx].get(ky, 0) for kx in xl]
  for ky in yl
])

x = np.arange(len(xl))
width = 1.0 / (len(yl) + 1)

plt.figure(figsize=(9,6))

for idx,y in enumerate(ys):
  plt.bar(x + (idx-2) * width, y, width, label=yl[idx])

plt.xticks(x, xl)
plt.legend(loc="upper left")
plt.grid(axis="y")
# plt.ylim((0,2000))
plt.show()

In [None]:
label2tech_cnt = {}

for stop_id,label2max_count in stop_id2max_obj_cnt.items():
  for label,count_info in label2max_count.items():
    label2tech_cnt[label] = label2tech_cnt.get(label, {})
    tech = count_info["from"]
    label2tech_cnt[label][tech] = label2tech_cnt[label].get(tech, 0)
    label2tech_cnt[label][tech] += 1

unique_obj_counts = {k:sum(v.values()) for k,v in label2tech_cnt.items()}
display(unique_obj_counts)

xl = sorted(label2tech_cnt["tree"].keys())
yl = [yl for yl in sorted(label2tech_cnt.keys()) if yl != "person tree"]

ys = np.array([
  [label2tech_cnt[ky].get(kx, 0) for kx in xl]
  for ky in yl
])

x = np.arange(len(xl))
width = 1.0 / (len(yl) + 1)

plt.figure(figsize=(9,6))

for idx,y in enumerate(ys):
  plt.bar(x + (idx-2) * width, y, width, label=yl[idx])

plt.xticks(x, xl)
plt.legend(loc="upper left")
plt.grid(axis="y")
# plt.ylim((0,2000))
plt.show()


In [None]:
stop_id2img_id = {v[0]:v[1] for v in stop_df[["id", "image"]].values}

unique_technique_cnts = {}

for imgid in stop_id2img_id.values():
  tech = imgid.split("/")[0]
  unique_technique_cnts[tech] = unique_technique_cnts.get(tech, 0) + 1

display(unique_technique_cnts)