In [None]:
# !pip install chromadb

In [2]:
# Imports
import os
import torch
import numpy as np
from PIL import Image
from torchvision import transforms
from facenet_pytorch import MTCNN, InceptionResnetV1
from sklearn.preprocessing import LabelEncoder, Normalizer
import chromadb
from chromadb.utils import embedding_functions

In [3]:
# Paths
base_path =r"C:\Users\nasir\OneDrive\Desktop\facedata_croped"
all_identities = os.listdir(base_path)
print(f"Identities found: {len(all_identities)}")


Identities found: 49


In [4]:
# Face detector
mtcnn = MTCNN(image_size=160, margin=0)

In [5]:
# Function to extract face
def extract_face(img_path):
    img = Image.open(img_path).convert('RGB')
    face = mtcnn(img)
    if face is not None:
        return face
    return None

In [6]:
# Load faces
data = []
labels = []
for label, identity in enumerate(all_identities):
    identity_path = os.path.join(base_path, identity)
    images = os.listdir(identity_path)
    for img_name in images:
        img_path = os.path.join(identity_path, img_name)
        face = extract_face(img_path)
        if face is not None:
            data.append(face)
            labels.append(label)
print(f"Total faces loaded: {len(data)}")

Total faces loaded: 1148


In [18]:
# Stack data
faces = torch.stack(data)
labels = np.array(labels)

In [19]:
# Load FaceNet model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
facenet = InceptionResnetV1(pretrained='vggface2').eval().to(device)

In [20]:
# Get embeddings
embeddings = []
with torch.no_grad():
    for face in faces:
        face = face.unsqueeze(0).to(device)
        embedding = facenet(face)
        embeddings.append(embedding.cpu().numpy())
embeddings = np.vstack(embeddings)

In [None]:

from chromadb.utils import embedding_functions

import chromadb
# Define a path for persistent storage for your ChromaDB
# Make sure this directory exists or can be created by the script.
db_path = r"C:\Users\nasir\OneDrive\Desktop\my_chroma_db"
client = chromadb.PersistentClient(path=db_path) 

In [28]:
# Add embeddings to ChromaDB
for idx, (embedding, label) in enumerate(zip(embeddings_norm, labels_enc)):
    collection.add(
        ids=[str(idx)],
        embeddings=[embedding.tolist()],
        metadatas=[{"label": str(label)}],
        documents=[all_identities[label]]
    )
print("Embeddings stored in ChromaDB.")

Embeddings stored in ChromaDB.


In [25]:
# Predict function using ChromaDB
def predict_identity_chroma(image_path, facenet, mtcnn, in_encoder, collection, device, all_identities, threshold=0.6):
    img = Image.open(image_path).convert('RGB')
    face = mtcnn(img)
    if face is not None:
        face = face.unsqueeze(0).to(device)
        with torch.no_grad():
            embedding = facenet(face).cpu().numpy()
        embedding = in_encoder.transform(embedding)

        result = collection.query(
            query_embeddings=embedding.tolist(),
            n_results=1
        )

        if result and result['distances'][0][0] < threshold:
            label = result['metadatas'][0][0]['label']
            name = result['documents'][0][0]
            similarity = 1 - result['distances'][0][0]
            return name, similarity * 100
        else:
            return None, None
    else:
        return None, None
# Example usage
image_path = r"C:\Users\nasir\OneDrive\Pictures\Screenshots\Screenshot 2025-07-04 204102.png" # Change this to your test image
identity, confidence = predict_identity_chroma(
    image_path, facenet, mtcnn, in_encoder, collection, device, all_identities
)

if identity is not None:
    print(f'Predicted Identity: {identity}, Confidence: {confidence:.2f}%')
else:
    print("No face detected or identity not found.")

Predicted Identity: Elizabeth Olsen, Confidence: 78.46%


In [13]:
import joblib
joblib.dump(all_identities, 'all_identities_CDB.pkl')

['all_identities_CDB.pkl']