## Image Embeddings

In [None]:
import json
import matplotlib.pyplot as plt
import numpy as np

from os import listdir, makedirs, path

from sklearn.cluster import KMeans
from sklearn.manifold import TSNE
from sklearn.metrics import silhouette_score
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

from PIL import Image as PImage, ImageOps as PImageOps

from parameters.arquigrafia import IMAGES_PATH
from parameters.embeddings import EMBEDDINGS_PATH

from models.embedding_models import Clip, EfficientNet, ResNet, Vit

makedirs(EMBEDDINGS_PATH, exist_ok=True)

### Run embeddings

In [None]:
input_files = sorted([f for f in listdir(IMAGES_PATH) if f.endswith("jpg")])

for idx, io_file in list(enumerate(input_files))[:]:
  input_file_path = path.join(IMAGES_PATH, io_file)
  output_file_path = path.join(EMBEDDINGS_PATH, io_file.replace(".jpg", ".json"))

  if path.isfile(output_file_path):
    continue

  if idx % 100 == 0:
    print(idx, IMAGES_PATH, io_file)

  image = PImageOps.exif_transpose(PImage.open(input_file_path).convert("RGB"))

  raw_embs = {}

  raw_embs["clip"] = Clip.get_embedding(image).tolist()
  raw_embs["efficient"] = EfficientNet.get_embedding(image).tolist()
  raw_embs["resnet"] = ResNet.get_embedding(image).tolist()
  raw_embs["vit"] = Vit.get_embedding(image).tolist()

  image_embs = {"raw": raw_embs}

  with open(output_file_path, "w", encoding="utf-8") as of:
    json.dump(image_embs, of, sort_keys=True, separators=(',',':'), ensure_ascii=False)

### Load Embeddings

In [None]:
input_files = sorted([f for f in listdir(IMAGES_PATH) if f.endswith("jpg")])

raw_embs = {}

for idx, io_file in enumerate(input_files):
  output_file_path = path.join(EMBEDDINGS_PATH, io_file.replace(".jpg", ".json"))

  if not path.isfile(output_file_path):
    continue

  if idx % 100 == 0:
    print(idx, IMAGES_PATH, io_file)

  with open(output_file_path, "r", encoding="utf8") as f:
    image_embs = json.load(f)

  for k,e in image_embs["raw"].items():
    if k not in raw_embs:
      raw_embs[k] = []
    raw_embs[k].append(e)

### Cluster

In [None]:
def pca_kmeans(emb_raw, n_clusters=8, n_components=128):
  mPCA = PCA(n_components=n_components)
  mCluster = KMeans(n_clusters=n_clusters)

  emb_reduced = mPCA.fit_transform(StandardScaler().fit_transform(emb_raw))
  emb_clusters = mCluster.fit_predict(emb_reduced)

  return emb_reduced, emb_clusters, mCluster.cluster_centers_

def tsne_kmeans(emb_raw, n_clusters=8, n_components=3, perplexity=30):
  mTSNE = TSNE(n_components=n_components, perplexity=perplexity)
  mCluster = KMeans(n_clusters=n_clusters)

  emb_reduced = mTSNE.fit_transform(StandardScaler().fit_transform(emb_raw))
  emb_clusters = mCluster.fit_predict(emb_reduced)

  return emb_reduced, emb_clusters, mCluster.cluster_centers_

In [None]:
def plot_clusters(clusters, pcas, title=""):
  sizes = [0 if c < 0 else 24 for c in clusters]
  dims = pcas.shape[1]
  plot_dims = min(dims, 3)

  for i in range(plot_dims):
    for j in range(i+1, plot_dims):
      plt.scatter(pcas[:,i], pcas[:,j],
                  marker='o', s=sizes, c=clusters, alpha=0.35, cmap="tab10", edgecolors="none")
      plt.title(title)
      plt.show()

  # 3D
  if dims > 2:
    fig = plt.figure(figsize=(8, 8))
    ax = fig.add_subplot(projection='3d')
    ax.scatter(pcas[:,0], pcas[:,1], pcas[:,2],
              marker='o', s=sizes, c=clusters, alpha=0.35, cmap="tab10", edgecolors="none")
    ax.set_title(title)
    plt.show()

In [None]:
def plot_cluster_elbows(embs, title, min_clusters=2, max_clusters=10):
  sil = []
  ncs = range(min_clusters, max_clusters+1)
  for nc in ncs:
    _pca, _clusters, _ = pca_kmeans(embs, n_clusters=nc)
    _sil = silhouette_score(_pca, _clusters)
    sil.append(_sil)

  plt.plot(ncs, sil)
  plt.title(f"{title} - Silhouette Score")
  plt.show()

In [None]:
def plot_pca_elbows(embs, title):
  sil = []
  ncs = [min(len(embs), len(embs[0]), 2 ** i) for i in range(11)]
  ncs.append(min(len(embs), len(embs[0])))
  for nc in ncs:
    _pca, _clusters, _ = pca_kmeans(embs, n_components=nc, n_clusters=8)
    _sil = silhouette_score(_pca, _clusters)
    sil.append(_sil)

  plt.plot(ncs, sil)
  plt.title(f"{title} - Silhouette Score")
  plt.show()

In [None]:
for m in ["clip", "efficient", "resnet", "vit"]:
  plot_cluster_elbows(raw_embs[m], m)

In [None]:
for m in ["clip", "efficient", "resnet", "vit"]:
  plot_pca_elbows(raw_embs[m], m)

In [None]:
for m in ["clip", "efficient", "resnet", "vit"]:
  _pca, _clusters, _ = pca_kmeans(raw_embs[m])
  plot_clusters(_clusters, _pca, title=m)

In [None]:
clip_pca, clip_clusters, clip_centers = pca_kmeans(raw_embs["clip"])

In [None]:
efficient_pca, efficient_clusters, efficient_centers = pca_kmeans(raw_embs["efficient"])

In [None]:
resnet_pca, resnet_clusters, resnet_centers = pca_kmeans(raw_embs["resnet"])

In [None]:
vit_pca, vit_clusters, vit_centers = pca_kmeans(raw_embs["vit"])

In [None]:
input_files = sorted([f for f in listdir(IMAGES_PATH) if f.endswith("jpg")])

In [None]:
m_pca, m_clusters, m_centers = pca_kmeans(raw_embs["resnet"])

for c in np.unique(m_clusters):
  cluster_center = m_centers[c]
  cluster_idxs = np.where(m_clusters == c)[0]
  cluster_pcas = m_pca[cluster_idxs]
  pca_center_dists = np.linalg.norm(cluster_pcas - cluster_center, axis=1)
  cluster_idxs_sorted = cluster_idxs[pca_center_dists.argsort()]

  fig, axes = plt.subplots(nrows=8, ncols=8)
  fig.set_size_inches(10, 10)
  fig.set_dpi(72)

  fig.suptitle(f"Cluster {c}")
  for ciidx, ax in enumerate(axes.flat):
    iidx = cluster_idxs_sorted[ciidx]
    img = PImage.open(path.join(IMAGES_PATH, input_files[iidx])).convert("RGB")
    img = PImageOps.exif_transpose(img).resize((128,128))
    ax.imshow(img)
    ax.axis("off")

  plt.tight_layout()
  plt.show()

### Load Clip Embeddings

In [None]:
input_files = sorted([f for f in listdir(IMAGES_PATH) if f.endswith("jpg")])

clip_embs = {}

for idx, io_file in enumerate(input_files):
  output_file_path = path.join(EMBEDDINGS_PATH, io_file.replace(".jpg", ".json"))
  img_id = io_file.replace(".jpg", "")

  if not path.isfile(output_file_path):
    continue

  if idx % 100 == 0:
    print(idx, IMAGES_PATH, io_file)

  with open(output_file_path, "r", encoding="utf8") as f:
    image_embs = json.load(f)

  clip_embs[img_id] = {}
  clip_embs[img_id]["raw"] = [round(x, 8) for x in image_embs["raw"]["clip"]]

In [None]:
clip_raw_vals = [e["raw"] for e in clip_embs.values()]

In [None]:
clip_pca_vals, clip_pca_clusters, clip_pca_centers = pca_kmeans(clip_raw_vals)
plot_clusters(clip_pca_clusters, clip_pca_vals, title="clip pca")

In [None]:
clip_tsne_vals, clip_tsne_clusters, clip_tsne_centers = tsne_kmeans(clip_raw_vals)
plot_clusters(clip_tsne_clusters, clip_tsne_vals, title="clip tsne")

In [None]:
for idx, io_file in enumerate(input_files):
  img_id = io_file.replace(".jpg", "")
  clip_embs[img_id]["pca"] = [round(float(x),8) for x in clip_pca_vals[idx]]
  clip_embs[img_id]["tsne"] = [round(float(x),8) for x in clip_tsne_vals[idx]]

In [None]:
output_file_path = "/kaggle/working/embeddings_clip.json"
with open(output_file_path, "w", encoding="utf-8") as of:
    json.dump(clip_embs, of, sort_keys=True, separators=(',',':'), ensure_ascii=False)

In [None]:
imgs = [
  PImage.open(IMAGES_PATH+"/100.jpg"),
  PImage.open(IMAGES_PATH+"/101.jpg"),
  PImage.open(IMAGES_PATH+"/1000.jpg"),
  PImage.open(IMAGES_PATH+"/1001.jpg"),
  PImage.open(IMAGES_PATH+"/1010.jpg"),
  PImage.open(IMAGES_PATH+"/1011.jpg"),
]

img = imgs[0]

for i in imgs:
  display(i.resize((128,128)))

In [None]:
emb = ResNet.get_embedding(imgs)
print(emb.shape)

emb_diff = (emb - emb[1]).pow(2).sum(dim=1).pow(0.5)
emb_diff.argsort()

In [None]:
emb = ResNet.get_embedding(img)
print(emb.shape)

In [None]:
emb = EfficientNet.get_embedding(imgs)
print(emb.shape)

emb_diff = (emb - emb[1]).pow(2).sum(dim=1).pow(0.5)
emb_diff.argsort()

In [None]:
emb = EfficientNet.get_embedding(img)
print(emb.shape)

In [None]:
emb = Vit.get_embedding(imgs)
print(emb.shape)

emb_diff = (emb - emb[1]).pow(2).sum(dim=1).pow(0.5)
emb_diff.argsort()

In [None]:
emb = Vit.get_embedding(img)
print(emb.shape)

In [None]:
emb = Clip.get_embedding(imgs)
print(emb.shape)

emb_diff = (emb - emb[1]).pow(2).sum(dim=1).pow(0.5)
emb_diff.argsort()

In [None]:
emb = Clip.get_embedding(img)
print(emb.shape)