# Download Metadat/Images

De várias fontes...

## [Museu Paulista](https://pt.wikipedia.org/wiki/Wikipédia:GLAM/Museu_Paulista)

Queries:
- negative: https://w.wiki/C$kL
- positive: https://w.wiki/C$kK

In [None]:
import json

from os import path, listdir, makedirs

from wiki_utils import Wikidata

from params.collections import MUSEUMS

In [None]:
wiki_museum = MUSEUMS["pinacoteca"]

In [None]:
WIKI_DATA_DIR = f"./metadata/json/{wiki_museum['dir']}"
WIKI_INFO_PATH = path.join(WIKI_DATA_DIR, f"{wiki_museum['file']}.json")

makedirs(WIKI_DATA_DIR, exist_ok=True)

In [None]:
wiki_data = {}

if (path.isfile(WIKI_INFO_PATH)):
  with open(WIKI_INFO_PATH, "r") as ifp:
    wiki_data = json.load(ifp)

### Prep Metadata

In [None]:
defval = {"value": "unknown"}

category = "painting"
location = wiki_museum['label']
cQuery = Wikidata.prep_category_query(category, location)
cResults = Wikidata.run_query(cQuery)

for cnt,result in enumerate(cResults):
  if cnt % 100 == 0:
    print(cnt)

  id = result["qid"]["value"]

  cat = {
    "en": result["cat_en"]["value"],
    "pt": result["cat_pt"]["value"]
  }

  if id in wiki_data:
    for l in ["en", "pt"]:
      mcategories = set(wiki_data[id]["categories"][l])
      mcategories.add(cat[l])
      wiki_data[id]["categories"][l] = list(mcategories)
    continue

  dResultsEn = Wikidata.run_depicts_query(id, "en")
  dResultsPt = Wikidata.run_depicts_query(id, "pt")

  wiki_data[id] = {
    "id": result["qid"]["value"],
    "categories": {
      "en": [cat["en"]],
      "pt": [cat["pt"]]
    },
    "depicts": {
      "en": [d["depictsLabel"]["value"] for d in dResultsEn],
      "pt":[d["depictsLabel"]["value"] for d in dResultsPt]
    },
    "title": result["itemLabel"]["value"],
    "date": result.get("date", defval)["value"],
    "creator": result.get("creatorLabel", defval)["value"],
    "image": result["image"]["value"]
  }

In [None]:
with open(WIKI_INFO_PATH, "w") as ofp:
  json.dump(wiki_data, ofp, separators=(',',':'), sort_keys=True, ensure_ascii=False)

### Download Images

In [None]:
IMG_DIR = f"../../imgs/{wiki_museum['dir']}"

IMG_DIR_FULL = path.join(IMG_DIR, "full")
IMG_DIR_900 = path.join(IMG_DIR, "900")
IMG_DIR_500 = path.join(IMG_DIR, "500")

makedirs(IMG_DIR_FULL, exist_ok=True)
makedirs(IMG_DIR_900, exist_ok=True)
makedirs(IMG_DIR_500, exist_ok=True)

In [None]:
wiki_data = {}

if (path.isfile(WIKI_INFO_PATH)):
  with open(WIKI_INFO_PATH, "r") as ifp:
    wiki_data = json.load(ifp)

In [None]:
for cnt, (qid, info) in enumerate(wiki_data.items()):
  if cnt % 100 == 0:
    print(cnt)

  img_path_full = path.join(IMG_DIR_FULL, f"{qid}.jpg")
  img_path_900 = path.join(IMG_DIR_900, f"{qid}.jpg")
  img_path_500 = path.join(IMG_DIR_500, f"{qid}.jpg")
  img_url = info["image"]

  if (not path.isfile(img_path_full)) or (not path.isfile(img_path_900)) or (not path.isfile(img_path_500)):
    try:
      pimg = Wikidata.download_image(img_url)
    except Exception as e:
      print(qid)
      print(img_url)
      print(e)
      continue

  if (not path.isfile(img_path_full)):
    pimg.thumbnail([4096, 4096])
    pimg.save(img_path_full)

  if (not path.isfile(img_path_900)):
    pimg.thumbnail([900, 900])
    pimg.save(img_path_900)

  if (not path.isfile(img_path_500)):
    pimg.thumbnail([500, 500])
    pimg.save(img_path_500)

## [Brasiliana Museus](https://brasiliana.museus.gov.br/)

Queries:
- [pinturas](https://brasiliana.museus.gov.br/classificacao/02-artes-visuais-cinematografica-02-6-pintura/?perpage=96&view_mode=masonry&paged=1&order=ASC&orderby=date&fetch_only_meta=&fetch_only=thumbnail%2Ccreation_date%2Ctitle%2Cdescription&taxquery%5B0%5D%5Btaxonomy%5D=tnc_tax_27&taxquery%5B0%5D%5Bterms%5D%5B0%5D=1076&taxquery%5B0%5D%5Bcompare%5D=IN)

In [None]:
import json

from os import path, listdir, makedirs

from brasiliana_utils import Brasiliana

from models.EnPt import PtEn

from params.collections import MUSEUMS

In [None]:
museum_info = MUSEUMS["brasiliana"]

In [None]:
MUSEUM_DATA_DIR = f"./metadata/json/{museum_info['dir']}"
MUSEUM_INFO_PATH = path.join(MUSEUM_DATA_DIR, f"{museum_info['file']}.json")

makedirs(MUSEUM_DATA_DIR, exist_ok=True)

In [None]:
museum_data = {}

if (path.isfile(MUSEUM_INFO_PATH)):
  with open(MUSEUM_INFO_PATH, "r") as ifp:
    museum_data = json.load(ifp)

### Prep Metadata

In [None]:
category = "painting"
qResults = Brasiliana.run_category_query(category)

In [None]:
for cnt,result in enumerate(qResults):
  if cnt % 100 == 0:
    print(cnt)

  id = result["id"]

  item_data = {
    "id": result["id"],
    "image": result["document"]["value"]
  }

  for k,v in Brasiliana.ITEM_DATA_FIELDS.items():
    item_data[k] = result["data"][v]["value"]
    if v in Brasiliana.FIELDS_TO_TRANSLATE:
      if len(item_data[k]["pt"]) > 0:
        item_data[k]["en"] = PtEn.translate(item_data[k]["pt"])

  museum_data[id] = museum_data.get(id, {}) | item_data

In [None]:
with open(MUSEUM_INFO_PATH, "w") as ofp:
  json.dump(museum_data, ofp, separators=(',',':'), sort_keys=True, ensure_ascii=False)

### Download Images

In [None]:
IMG_DIR = f"../../imgs/{museum_info['dir']}"

IMG_DIR_FULL = path.join(IMG_DIR, "full")
IMG_DIR_900 = path.join(IMG_DIR, "900")
IMG_DIR_500 = path.join(IMG_DIR, "500")

makedirs(IMG_DIR_FULL, exist_ok=True)
makedirs(IMG_DIR_900, exist_ok=True)
makedirs(IMG_DIR_500, exist_ok=True)

In [None]:
museum_data = {}

if (path.isfile(MUSEUM_INFO_PATH)):
  with open(MUSEUM_INFO_PATH, "r") as ifp:
    museum_data = json.load(ifp)

In [None]:
for cnt, (qid, info) in enumerate(museum_data.items()):
  if cnt % 100 == 0:
    print(cnt)

  img_path_full = path.join(IMG_DIR_FULL, f"{qid}.jpg")
  img_path_900 = path.join(IMG_DIR_900, f"{qid}.jpg")
  img_path_500 = path.join(IMG_DIR_500, f"{qid}.jpg")
  img_url = info["image"]

  if (not path.isfile(img_path_full)) or (not path.isfile(img_path_900)) or (not path.isfile(img_path_500)):
    try:
      pimg = Brasiliana.download_image(img_url)
    except Exception as e:
      print(qid)
      print(img_url)
      print(e)
      continue

  if (not path.isfile(img_path_full)):
    pimg.thumbnail([4096, 4096])
    pimg.save(img_path_full)

  if (not path.isfile(img_path_900)):
    pimg.thumbnail([900, 900])
    pimg.save(img_path_900)

  if (not path.isfile(img_path_500)):
    pimg.thumbnail([500, 500])
    pimg.save(img_path_500)