In [None]:
!gdown https://drive.google.com/uc?id=13zidA52_u1N1CDuQtSyg0-SNvVZu-Cyp

Downloading...
From: https://drive.google.com/uc?id=13zidA52_u1N1CDuQtSyg0-SNvVZu-Cyp
To: /content/CACD2000_embeddings.tar.gz
100% 3.99G/3.99G [00:39<00:00, 102MB/s]


In [None]:
!tar -xf CACD2000_embeddings.tar.gz

In [None]:
!gdown https://drive.google.com/uc?id=1JWTqMEiUZ2yNUJJl_5Ctq8SuskVocn51&export=download

Downloading...
From: https://drive.google.com/uc?id=1JWTqMEiUZ2yNUJJl_5Ctq8SuskVocn51
To: /content/CACD2000_refined.tar
100% 1.99G/1.99G [00:29<00:00, 68.6MB/s]


In [None]:
!tar -xf CACD2000_refined.tar

In [None]:
!pip install annoy

In [None]:
!pip install deepface

In [None]:
from deepface import DeepFace
import json

def image_to_embedding(image_path):
    try:
        embedding_json = {}
        embedding_json['image_name'] = image_path
        embedding_objs = DeepFace.represent(img_path=image_path)
        embedding_json.update(embedding_objs[0])
        return embedding_json
    except:
        print("Error at " + image_path)
        return None

Directory  /root /.deepface created
Directory  /root /.deepface/weights created


In [None]:
from annoy import AnnoyIndex

def calculate_similarity_scores(target_embedding, other_embeddings, n_neighbors=10):
    f = len(target_embedding)
    t = AnnoyIndex(f, metric='euclidean')
    ntree = 50

    for i, vector in enumerate(other_embeddings):
        t.add_item(i, vector)
    t.build(ntree)

    similar_img_ids, distances = t.get_nns_by_vector(target_embedding, n_neighbors, include_distances=True)
    return similar_img_ids, distances


In [None]:
def process_and_calculate_similarity(target_image_path, other_image_paths):
    target_embedding = image_to_embedding(target_image_path)
    if target_embedding is None:
        return None

    other_embeddings = [image_to_embedding(image_path) for image_path in other_image_paths if image_path != target_image_path]
    other_embeddings = [emb for emb in other_embeddings if emb is not None]

    if len(other_embeddings) < 10:
        print("Error: Could not process all 10 images.")
        return None

    similar_img_ids, distances = calculate_similarity_scores(target_embedding['embedding'], [emb['embedding'] for emb in other_embeddings])

    return distances


In [None]:
# Example usage:
target_image_path = "/content/CACD2000/58_Olivia_Hussey_0015.jpg"  # Replace with target image path
other_image_paths = ["/content/CACD2000/59_David_Keith_0002.jpg",
                     "/content/CACD2000/59_Barbara_Niven_0001.jpg",
                     "/content/CACD2000/59_Anjelica_Huston_0001.jpg",
                     "/content/CACD2000/58_Tony_Todd_0002.jpg",
                     "/content/CACD2000/58_Patrick_Bergin_0002.jpg",
                     "/content/CACD2000/58_Patricia_Richardson_0003.jpg",
                     "/content/CACD2000/58_Pamela_Sue_Martin_0003.jpg",
                     "/content/CACD2000/58_Pam_Dawber_0001.jpg",
                     "/content/CACD2000/58_Ornella_Muti_0002.jpg",
                     "/content/CACD2000/58_Oprah_Winfrey_0001.jpg"
                     ]

similarities = process_and_calculate_similarity(target_image_path, other_image_paths)

if similarities is not None:
    print(similarities)


[0.6252131462097168, 0.7003781795501709, 0.7080273628234863, 0.7092623710632324, 0.724470317363739, 0.7511918544769287, 0.755162239074707, 0.7579216361045837, 0.8266392350196838, 0.8495576977729797]
