In [1]:
%load_ext autoreload
%autoreload 2

# Local VectorDB

In [1]:
from src.storage.client import DatabaseManager

db_manager = DatabaseManager()
db_manager.create_client(remove_if_exists=True)

In [2]:
from src.storage.collection import ImageCollection

img_dim = 512
img_similiary_metric = "COSINE"
img_collection = ImageCollection(db_manager.get_client(), dimension=img_dim, metric_type=img_similiary_metric)

# Insert Embeddings

In [3]:
from src.embeddings.pretrained import FaceEmbeddings
from src.storage.dataset import Dataset

em = FaceEmbeddings()

dataset = Dataset(
    img_folder_path="./data/img_align_celeba",
    img_identity_map_path="./data/identity_CelebA.txt"
)

img_identity_collection = dataset.images_by_identity()

keys_list = list(img_identity_collection.keys())
for key in keys_list[:2]:  # Using slicing to get the first five keys
    print(f'{key}: {img_identity_collection[key]}\n')

Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
model ignore: /home/alex/.insightface/models/buffalo_s/1k3d68.onnx landmark_3d_68
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
model ignore: /home/alex/.insightface/models/buffalo_s/2d106det.onnx landmark_2d_106
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /home/alex/.insightface/models/buffalo_s/det_500m.onnx detection [1, 3, '?', '?'] 127.5 128.0
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
model ignore: /home/alex/.insightface/models/buffalo_s/genderage.onnx genderage
Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /home/alex/.insightface/models/buffalo_s/w600k_mbf.onnx recognition ['None', 3, 112, 112] 127.5 127.5
set det-size: (640, 640)
2880: ['./data/img_align_celeba/000001.jpg', './data/img_align_celeb

In [4]:
counter = 0
data = []

for celebrity_id in list(img_identity_collection.keys()):
    # Extend the data list with the celebrity ID, image path and a random vector (for now)
    data.extend(
        [
            {
                'celeb_id': celebrity_id,
                'img_path': img_path,
                'vector': em.get_embedding(path=img_path)
             }
            for img_path in img_identity_collection[celebrity_id][:5]
        ]
    )
    # Break after the first two celebrity IDs just to make sure it works as expected
    counter = counter + 1
    if counter == 5:
        break

In [5]:
data

[{'celeb_id': 2880,
  'img_path': './data/img_align_celeba/000001.jpg',
  'vector': array([-1.70279536e-02, -7.99110830e-02,  8.12005028e-02,  4.69671749e-02,
         -5.50703108e-02,  3.44057344e-02, -2.47755647e-02, -9.05997213e-03,
         -1.66148003e-02,  6.55048639e-02, -6.41638860e-02,  6.75162254e-03,
         -6.28141984e-02,  5.96703365e-02,  6.05062069e-03,  1.32727074e-02,
         -1.53193166e-02, -4.89652157e-02, -3.47750783e-02, -7.85027221e-02,
         -2.64537036e-02, -2.79622003e-02,  6.00087084e-03,  6.53448254e-02,
         -1.85057335e-02,  2.49041822e-02,  6.49571344e-02, -5.22831567e-02,
          5.44828512e-02,  6.14354983e-02, -4.57557384e-03,  1.35326525e-02,
         -5.15108928e-02, -7.04545900e-02,  3.18055116e-02,  6.21082447e-03,
         -6.68223798e-02, -3.82706039e-02, -4.39109951e-02,  3.23894620e-02,
          3.90737094e-02, -7.77765140e-02, -7.06836507e-02,  4.37517054e-02,
         -8.75677764e-02, -3.75570394e-02,  3.63902114e-02,  9.06359497

In [6]:
img_collection.insert(data)

{'insert_count': 25, 'ids': [455146647896457216, 455146647896457217, 455146647896457218, 455146647896457219, 455146647896457220, 455146647896457221, 455146647896457222, 455146647896457223, 455146647896457224, 455146647896457225, 455146647896457226, 455146647896457227, 455146647896457228, 455146647896457229, 455146647896457230, 455146647896457231, 455146647896457232, 455146647896457233, 455146647896457234, 455146647896457235, 455146647896457236, 455146647896457237, 455146647896457238, 455146647896457239, 455146647896457240], 'cost': 0}

# Query Vector DB

In [15]:
celebrity_id = 9295

# Celeb-ID 1 query vector embedding
query_vector = em.get_embedding(path=img_identity_collection[celebrity_id][10])
query_vector

array([-2.19094548e-02,  3.56321819e-02,  9.98362228e-02, -4.69097160e-02,
       -7.57556558e-02, -3.11212055e-02, -1.50399143e-02, -5.38482554e-02,
        7.88982660e-02,  8.77124742e-02, -2.74403542e-02, -3.85986753e-02,
       -4.20579351e-02,  4.04883251e-02,  1.46115115e-02, -5.95541447e-02,
       -4.87278998e-02,  7.34511716e-03,  1.49061568e-02, -4.71028686e-02,
       -9.91486013e-03,  3.74555564e-03,  3.78346443e-02,  2.20807400e-02,
        6.99632019e-02,  2.02943329e-02, -7.03185201e-02, -4.12158035e-02,
       -3.47282775e-02, -3.03129572e-02, -9.21143405e-03, -1.20333452e-02,
        3.49684134e-02,  4.42640185e-02, -5.11276117e-03,  4.14904244e-02,
       -8.69397819e-03, -7.87083339e-03,  1.60562526e-02, -7.82208331e-03,
       -2.93548722e-02, -1.23641547e-02,  1.03151724e-02,  2.71753464e-02,
        1.80910435e-02, -5.55295870e-03, -3.91308777e-02, -7.36109316e-02,
       -2.80657317e-02,  6.34450745e-03, -2.09335587e-03, -4.19454277e-02,
       -1.35975154e-02,  

In [16]:
# output max 2 entries
result = img_collection.search(query_vectors=[query_vector], limit=5)

In [17]:
for hits in result:
    for hit in hits:
        hit_id, hit_distance = hit['id'], hit['distance']
        print(f"{hit_id=}, {hit_distance=}")

        celeb_id, img_path = hit['entity']['celeb_id'], hit['entity']['img_path']
        print(f" > celeb_id={celeb_id}")
        print(f" > img_path={img_path}")
        print()

hit_id=455146647896457239, hit_distance=0.4329594671726227
 > celeb_id=9295
 > img_path=./data/img_align_celeba/016680.jpg

hit_id=455146647896457238, hit_distance=0.41477036476135254
 > celeb_id=9295
 > img_path=./data/img_align_celeba/014427.jpg

hit_id=455146647896457240, hit_distance=0.3502352833747864
 > celeb_id=9295
 > img_path=./data/img_align_celeba/026162.jpg

hit_id=455146647896457236, hit_distance=0.2926146984100342
 > celeb_id=9295
 > img_path=./data/img_align_celeba/000005.jpg

hit_id=455146647896457224, hit_distance=0.06818772852420807
 > celeb_id=2937
 > img_path=./data/img_align_celeba/017121.jpg



In [18]:
result[0]

[{'id': 455146647896457239,
  'distance': 0.4329594671726227,
  'entity': {'celeb_id': 9295,
   'img_path': './data/img_align_celeba/016680.jpg',
   'vector': [0.0028901430778205395,
    0.11355150490999222,
    0.08416572213172913,
    0.048379961401224136,
    -0.05742311850190163,
    -0.02248903177678585,
    -0.006451957859098911,
    -0.033324796706438065,
    0.03338199481368065,
    0.015504986047744751,
    -0.024137074127793312,
    -0.05719401687383652,
    -0.07414713501930237,
    -0.013198482804000378,
    -0.03628082945942879,
    -0.024480514228343964,
    -0.0743715688586235,
    0.03042081743478775,
    -0.009984602220356464,
    -0.009694404900074005,
    -0.04199519753456116,
    0.015895260497927666,
    -0.05156148225069046,
    -0.009907947853207588,
    0.051396213471889496,
    0.04438859596848488,
    0.013241210952401161,
    -0.00867944210767746,
    -0.012734821997582912,
    -0.12816780805587769,
    -0.014178120531141758,
    -0.026360897347331047,
    -0