# Caption

## Ollama: Llama3.2-Vision

In [None]:
import base64
import json

from os import makedirs, path

from models.LlamaVision import LlamaVision
from models.EnPt import EnPt

In [None]:
WIKI_DATA_DIR = "./metadata/json/MuseuPaulista"
WIKI_INFO_PATH = path.join(WIKI_DATA_DIR, "museu_paulista.json")

WIKI_CAPTION_DIR = path.join(WIKI_DATA_DIR, "captions")
makedirs(WIKI_CAPTION_DIR, exist_ok=True)

IMG_DIR = "../../imgs/MuseuPaulista"
IMG_DIR_500 = path.join(IMG_DIR, "500")
IMG_DIR_900 = path.join(IMG_DIR, "900")

OLLAMA_URL = "http://127.0.0.1:11434"

In [None]:
wiki_data = {}

if (path.isfile(WIKI_INFO_PATH)):
  with open(WIKI_INFO_PATH, "r") as ifp:
    wiki_data = json.load(ifp)

qids = sorted(list(wiki_data.keys()))

In [None]:
llama_vision = LlamaVision()

In [None]:
for cnt,qid in enumerate(qids[:10]):
  if cnt % 100 == 0:
    print(cnt)

  img_path = path.join(IMG_DIR_900, f"{qid}.jpg")
  caption_path = path.join(WIKI_CAPTION_DIR, f"{qid}.json")

  with open(img_path, "rb") as ifp:
    img_data = ifp.read()
    img = base64.b64encode(img_data).decode()
    llama_vision_caption_en = llama_vision.caption(img)
    llama_vision_caption_pt = {k:[EnPt.translate(w) for w in v] for k,v in llama_vision_caption_en.items()}

    cap_data = {
      "llama3.2": {
        "en" : llama_vision_caption_en,
        "pt":llama_vision_caption_pt
      }
    }

    with open(caption_path, "w", encoding="utf-8") as ofp:
      json.dump(cap_data, ofp, sort_keys=True, separators=(',',':'), ensure_ascii=False)