# Pre-Process

In [None]:
from params.collections import MUSEUMS

from Museum import Museum

## Color Palette

In [None]:
for name,info in MUSEUMS.items():
  print("color:", name)
  Museum.get_colors(info)

## Embeddings

In [None]:
for name,info in MUSEUMS.items():
  print("embeddings:", name)
  Museum.get_embeddings(info)

## t-SNE

In [None]:
import json

from os import listdir, makedirs, path

from embeddings import pca_kmeans, tsne_kmeans
from embeddings import plot_clusters, visualize_pca_clusters, visualize_tsne_clusters

from params.collections import MUSEUMS

In [None]:
museum_info = MUSEUMS["brasiliana"]

In [None]:
MUSEUM_DATA_DIR = f"./metadata/json/{museum_info['dir']}"
MUSEUM_INFO_PATH = path.join(MUSEUM_DATA_DIR, f"{museum_info['file']}.json")

MUSEUM_EMBED_DIR = path.join(MUSEUM_DATA_DIR, "embeddings")

IMG_DIR = f"../../imgs/{museum_info['dir']}"
IMG_DIR_500 = path.join(IMG_DIR, "500")

In [None]:
embeding_files = sorted([f for f in listdir(MUSEUM_EMBED_DIR) if f.endswith(".json")])

raw_embs = []
image_paths = []

for idx, io_file in enumerate(embeding_files):
  qid = io_file.replace(".json", "")
  img_file = io_file.replace(".json", ".jpg")

  embeding_path = path.join(MUSEUM_EMBED_DIR, io_file)
  with open(embeding_path, "r", encoding="utf8") as f:
    m_embs = json.load(f)

  raw_embs.append(m_embs[qid]["clip"])
  image_paths.append(path.join(IMG_DIR_500, img_file))

In [None]:
pca_vals, pca_clusters, pca_centers = pca_kmeans(raw_embs, n_clusters=3)
plot_clusters(pca_clusters, pca_vals, title="clip pca")

In [None]:
tsne3_vals, tsne3_clusters, tsne3_centers = tsne_kmeans(raw_embs, n_clusters=4, n_components=3)
plot_clusters(tsne3_clusters, tsne3_vals, title="clip tsne 3D")

In [None]:
tsne2_vals, tsne2_clusters, tsne2_centers = tsne_kmeans(raw_embs, n_clusters=4, n_components=2)
plot_clusters(tsne2_clusters, tsne2_vals, title="clip tsne 2D")

In [None]:
visualize_pca_clusters(raw_embs, image_paths, n_clusters=3, grid_dim=6)

In [None]:
visualize_tsne_clusters(raw_embs, image_paths, n_clusters=5, grid_dim=6)