In [4]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Local VectorDB

In [1]:
from src.storage.client import DatabaseManager

db_manager = DatabaseManager()
db_manager.create_client(remove_if_exists=True)

In [2]:
from src.storage.collection import ImageCollection

img_dim = 2
img_similiary_metric = "COSINE"
img_collection = ImageCollection(db_manager.get_client(), dimension=img_dim, metric_type=img_similiary_metric)

# Insert with Dummy Embeddings

In [3]:
from src.storage.dataset import Dataset

dataset = Dataset(
    img_folder_path="./data/img_align_celeba",
    img_identity_map_path="./data/identity_CelebA.txt"
)

img_identity_collection = dataset.images_by_identity()

keys_list = list(img_identity_collection.keys())
for key in keys_list[:2]:  # Using slicing to get the first five keys
    print(f'{key}: {img_identity_collection[key]}\n')

2880: ['./data/img_align_celeba/000001.jpg', './data/img_align_celeba/000404.jpg', './data/img_align_celeba/003415.jpg', './data/img_align_celeba/004390.jpg', './data/img_align_celeba/018062.jpg', './data/img_align_celeba/025244.jpg', './data/img_align_celeba/027771.jpg', './data/img_align_celeba/039393.jpg', './data/img_align_celeba/047978.jpg', './data/img_align_celeba/049142.jpg', './data/img_align_celeba/052385.jpg', './data/img_align_celeba/052623.jpg', './data/img_align_celeba/053184.jpg', './data/img_align_celeba/053311.jpg', './data/img_align_celeba/055834.jpg', './data/img_align_celeba/058188.jpg', './data/img_align_celeba/061431.jpg', './data/img_align_celeba/068154.jpg', './data/img_align_celeba/084705.jpg', './data/img_align_celeba/090937.jpg', './data/img_align_celeba/096324.jpg', './data/img_align_celeba/100990.jpg', './data/img_align_celeba/103728.jpg', './data/img_align_celeba/108341.jpg', './data/img_align_celeba/110376.jpg', './data/img_align_celeba/122439.jpg', './da

In [4]:
id1_images = img_identity_collection[2880]
id2_images = img_identity_collection[2937]

print(f"# Celebrity-ID 2880  #img={len(id1_images)}")
print(f"# Celebrity-ID 2937  #img={len(id2_images)}")

# Celebrity-ID 2880  #img=30
# Celebrity-ID 2937  #img=30


In [5]:
# TODO: DUMMY Embedding replace with real img2vec embedding!
import numpy as np

np.random.seed(7)

def embedding_celebrity_id1(img_path: str, dim: int):
    vector = np.random.normal(size=dim, loc=[100, 100], scale=10)
    return vector

def embedding_celebrity_id2(img_path: str, dim: int):
    vector = np.random.normal(size=dim, loc=[-100, 30], scale=50)
    return vector

In [24]:
# TODO: dummy embeddings for the first three images of celebrities 1 and 2
dummy_data = []

dummy_data.extend([{'celeb_id': 1, 'img_path': img_path, 'vector': embedding_celebrity_id1(img_path, dim=img_dim) }
                   for img_path in id1_images[:5]])
dummy_data.extend([{'celeb_id': 2, 'img_path': img_path, 'vector': embedding_celebrity_id2(img_path, dim=img_dim) }
                   for img_path in id2_images[:5]])
dummy_data

[{'celeb_id': 1,
  'img_path': './data/img_align_celeba/000001.jpg',
  'vector': array([97.57250921, 85.46758588])},
 {'celeb_id': 1,
  'img_path': './data/img_align_celeba/000404.jpg',
  'vector': array([105.54580312, 101.23880905])},
 {'celeb_id': 1,
  'img_path': './data/img_align_celeba/003415.jpg',
  'vector': array([102.74459924,  84.73475468])},
 {'celeb_id': 1,
  'img_path': './data/img_align_celeba/004390.jpg',
  'vector': array([116.50699691, 101.54335535])},
 {'celeb_id': 1,
  'img_path': './data/img_align_celeba/018062.jpg',
  'vector': array([ 96.12860057, 120.29072221])},
 {'celeb_id': 2,
  'img_path': './data/img_align_celeba/000002.jpg',
  'vector': array([-102.26930149,  -42.53393496])},
 {'celeb_id': 2,
  'img_path': './data/img_align_celeba/011437.jpg',
  'vector': array([-120.26139277,  -84.4157551 ])},
 {'celeb_id': 2,
  'img_path': './data/img_align_celeba/016335.jpg',
  'vector': array([-47.53017253,   9.17628407])},
 {'celeb_id': 2,
  'img_path': './data/img_ali

In [7]:
img_collection.insert(dummy_data)

{'insert_count': 6, 'ids': [455058925097058304, 455058925097058305, 455058925097058306, 455058925097058307, 455058925097058308, 455058925097058309], 'cost': 0}

# Query Vector DB

In [8]:
# Celeb-ID 1 dummy query vector embedding
query_vector = embedding_celebrity_id1("uploaded_image.jpg", img_dim)
query_vector

array([105.05299374,  97.38643585])

In [9]:
# output max 2 entries
result = img_collection.search(query_vectors=[query_vector], limit=2)

In [10]:
for hits in result:
    for hit in hits:
        hit_id, hit_distance = hit['id'], hit['distance']
        print(f"{hit_id=}, {hit_distance=}")

        celeb_id, img_path = hit['entity']['celeb_id'], hit['entity']['img_path']
        print(f" > celeb_id={celeb_id}")
        print(f" > img_path={img_path}")
        print()

hit_id=455058925097058305, hit_distance=0.9984222054481506
 > celeb_id=1
 > img_path=./data/img_align_celeba/000404.jpg

hit_id=455058925097058304, hit_distance=0.9979906678199768
 > celeb_id=1
 > img_path=./data/img_align_celeba/000001.jpg



In [21]:
result[0]

[{'id': 455058925097058305,
  'distance': 0.9984222054481506,
  'entity': {'celeb_id': 1,
   'img_path': './data/img_align_celeba/000404.jpg',
   'vector': [100.32820129394531, 104.07516479492188]}},
 {'id': 455058925097058304,
  'distance': 0.9979906678199768,
  'entity': {'celeb_id': 1,
   'img_path': './data/img_align_celeba/000001.jpg',
   'vector': [116.90525817871094, 95.34062957763672]}}]

In [11]:
from src.embeddings.pretrained import FaceEmbeddings

em = FaceEmbeddings()

embedding = em.get_embedding(path="data/img_align_celeba/000001.jpg")
print(embedding)

Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
model ignore: /Users/thomasraunegger/.insightface/models/buffalo_m/1k3d68.onnx landmark_3d_68
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
model ignore: /Users/thomasraunegger/.insightface/models/buffalo_m/2d106det.onnx landmark_2d_106
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/thomasraunegger/.insightface/models/buffalo_m/det_2.5g.onnx detection [1, 3, '?', '?'] 127.5 128.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
model ignore: /Users/thomasraunegger/.insightface/models/buffalo_m/genderage.onnx genderage
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/thomasraunegger/.insightface/models/buffalo_m/w600k_r50.onnx recognition ['None', 3, 112, 112] 127.5 127.5
set det-size: (640, 640)


array([ 7.29187429e-02,  8.79215151e-02, -4.11809683e-02,  5.32334531e-03,
        8.37004930e-02, -5.01929857e-02,  1.57786813e-02, -3.62129486e-03,
       -5.04453890e-02, -3.48747522e-02,  1.85586954e-03,  4.91254739e-02,
       -4.53895554e-02, -3.29272710e-02, -1.21375784e-01, -2.49558724e-02,
       -5.77462204e-02, -4.01164480e-02, -2.60713720e-03,  3.47793959e-02,
       -7.03772483e-03,  4.72001992e-02, -4.03232202e-02, -1.06775258e-02,
       -6.43924624e-02,  1.71399284e-02, -9.35843866e-03,  4.89064306e-02,
       -1.71282850e-02,  3.32633257e-02, -4.36806828e-02,  1.24259980e-03,
        1.62741616e-01, -8.59953910e-02, -2.42856275e-02,  4.91061360e-02,
        1.33769000e-02, -2.25466490e-02,  1.36992941e-02,  5.58406971e-02,
       -3.48553360e-02, -2.00011898e-02, -8.03039409e-03,  1.72506496e-02,
        3.30759585e-02,  5.08579165e-02,  1.40875764e-03,  9.87517368e-03,
        1.17490264e-02, -1.14871599e-02,  5.55703510e-03, -3.37671116e-02,
        5.84146790e-02,  