# Face ID Retrieval System for NADRA
**Assignment:** Develop a robust system that retrieves a person's CNIC and information using only their image.  
**Dataset:** LFW (from Kaggle)  
**Requirements:**
- One-shot recognition
- Use matching and non-matching pairs for training
- Modular inference pipeline with FAISS/KNN
- Ability to switch between custom and pretrained backbone
- Evaluation of model performance

## Install Packages & Imports

In [None]:
# ------------------------ Install Required Packages ------------------------
!pip install kagglehub faiss-cpu torch torchvision

# ------------------------ Imports ------------------------
import os, random, zipfile
from PIL import Image
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import torchvision.models as models
import numpy as np
import faiss
import kagglehub

## Device & Dataset Download

In [None]:
# ------------------------ Device ------------------------
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

In [None]:
# ------------------------ Download LFW Dataset ------------------------
print("Downloading LFW dataset from Kaggle...")
path = kagglehub.dataset_download("jessicali9530/lfw-dataset")
dataset_dir = path + "/lfw-deepfunneled/lfw-deepfunneled"

if not os.path.exists(dataset_dir):
    zip_path = path + "/lfw-deepfunneled.zip"
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(path)
print("Dataset ready at:", dataset_dir)


## Matching & Non-Matching Pairs

In [None]:
# ------------------------ Pair Creation Functions ------------------------
def create_matching_pairs_with_filtering(dataset_dir, max_pairs=5000):
    matching_pairs, labels = [], []
    for person_name in os.listdir(dataset_dir):
        person_dir = os.path.join(dataset_dir, person_name)
        images = [os.path.join(person_dir, img) for img in os.listdir(person_dir) if img.endswith('.jpg')]
        if len(images) > 1:
            for i in range(len(images)):
                for j in range(i+1, len(images)):
                    matching_pairs.append((images[i], images[j]))
                    labels.append(1)
                if len(matching_pairs) >= max_pairs:
                    break
        if len(matching_pairs) >= max_pairs:
            break
    return matching_pairs[:max_pairs], labels[:max_pairs]

def create_non_matching_pairs_balanced(dataset_dir, num_matching_pairs, max_pairs=5000):
    non_matching_pairs, non_matching_labels = [], []
    people = os.listdir(dataset_dir)
    num_to_generate = min(num_matching_pairs, len(people)*(len(people)-1)//2, max_pairs)
    for _ in range(num_to_generate):
        person_1 = random.choice(people)
        person_2 = random.choice([p for p in people if p != person_1])
        images_1 = [os.path.join(dataset_dir, person_1, img) for img in os.listdir(os.path.join(dataset_dir, person_1)) if img.endswith('.jpg')]
        images_2 = [os.path.join(dataset_dir, person_2, img) for img in os.listdir(os.path.join(dataset_dir, person_2)) if img.endswith('.jpg')]
        if images_1 and images_2:
            img1, img2 = random.choice(images_1), random.choice(images_2)
            non_matching_pairs.append((img1, img2))
            non_matching_labels.append(0)
        if len(non_matching_pairs) >= max_pairs:
            break
    return non_matching_pairs[:max_pairs], non_matching_labels[:max_pairs]

# ------------------------ Generate Pairs ------------------------
matching_pairs, matching_labels = create_matching_pairs_with_filtering(dataset_dir, max_pairs=5000)
non_matching_pairs, non_matching_labels = create_non_matching_pairs_balanced(dataset_dir, len(matching_pairs), max_pairs=5000)
all_pairs = matching_pairs + non_matching_pairs
all_labels = matching_labels + non_matching_labels
print(f"Matching pairs: {len(matching_pairs)}, Non-matching pairs: {len(non_matching_pairs)}")
print(f"Total pairs: {len(all_pairs)}")

## Dataset Class & DataLoader

In [None]:
# ------------------------ Dataset Class ------------------------
class LFWPairsDataset(Dataset):
    def __init__(self, pairs, labels, transform=None):
        self.pairs = pairs
        self.labels = labels
        self.transform = transform
    def __len__(self):
        return len(self.pairs)
    def __getitem__(self, idx):
        img1_path, img2_path = self.pairs[idx]
        label = self.labels[idx]
        img1 = Image.open(img1_path).convert("RGB")
        img2 = Image.open(img2_path).convert("RGB")
        if self.transform:
            img1 = self.transform(img1)
            img2 = self.transform(img2)
        return img1, img2, torch.tensor(label, dtype=torch.float32)


## Siamese Network & Training

In [None]:
# ------------------------ Transforms and DataLoader ------------------------
transform = transforms.Compose([transforms.Resize((100,100)), transforms.ToTensor()])
dataset = LFWPairsDataset(all_pairs, all_labels, transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

In [None]:
# ------------------------ Pretrained Backbone ------------------------
class Backbone(nn.Module):
    def __init__(self, embedding_dim=128, pretrained=True):
        super(Backbone, self).__init__()
        resnet = models.resnet18(pretrained=pretrained)
        self.features = nn.Sequential(*list(resnet.children())[:-1])
        self.fc = nn.Linear(resnet.fc.in_features, embedding_dim)
    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x


In [None]:
# ------------------------ Contrastive Loss ------------------------
class ContrastiveLoss(nn.Module):
    def __init__(self, margin=1.0):
        super(ContrastiveLoss, self).__init__()
        self.margin = margin
    def forward(self, output1, output2, label):
        euclidean_distance = nn.functional.pairwise_distance(output1, output2)
        loss = label * torch.pow(euclidean_distance, 2) + (1-label) * torch.pow(torch.clamp(self.margin - euclidean_distance, min=0.0),2)
        return loss.mean()

In [15]:
# ------------------------ Siamese Network Training ------------------------
class SiameseNetwork(nn.Module):
    def __init__(self, embedding_dim=128, pretrained=True):
        super(SiameseNetwork, self).__init__()
        self.backbone = Backbone(embedding_dim=embedding_dim, pretrained=pretrained)
    def forward_once(self, x):
        return self.backbone(x)
    def forward(self, x1, x2):
        return self.forward_once(x1), self.forward_once(x2)

model = SiameseNetwork().to(device)
criterion = ContrastiveLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

print("Starting training...")
for epoch in range(5):  # fewer epochs to see output quickly
    model.train()
    total_loss = 0
    for i, (img1, img2, labels) in enumerate(dataloader):
        img1, img2, labels = img1.to(device), img2.to(device), labels.to(device)
        optimizer.zero_grad()
        out1, out2 = model(img1, img2)
        loss = criterion(out1, out2, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        if i % 50 == 0:
            print(f"Epoch {epoch+1}, Batch {i}, Loss: {loss.item():.4f}")
    print(f"Epoch {epoch+1} completed, Average Loss: {total_loss/len(dataloader):.4f}")



Using device: cuda
Downloading LFW dataset from Kaggle...
Using Colab cache for faster access to the 'lfw-dataset' dataset.
Dataset ready at: /kaggle/input/lfw-dataset/lfw-deepfunneled/lfw-deepfunneled




Matching pairs: 5000, Non-matching pairs: 5000
Total pairs: 10000
Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:00<00:00, 234MB/s]


Starting training...
Epoch 1, Batch 0, Loss: 22.4602
Epoch 1, Batch 50, Loss: 0.2165
Epoch 1, Batch 100, Loss: 1.1209
Epoch 1, Batch 150, Loss: 0.1901
Epoch 1, Batch 200, Loss: 0.1562
Epoch 1, Batch 250, Loss: 0.1562
Epoch 1, Batch 300, Loss: 0.1090
Epoch 1 completed, Average Loss: 1.8603
Epoch 2, Batch 0, Loss: 0.1030
Epoch 2, Batch 50, Loss: 0.0826
Epoch 2, Batch 100, Loss: 0.0885
Epoch 2, Batch 150, Loss: 0.0262
Epoch 2, Batch 200, Loss: 0.0947
Epoch 2, Batch 250, Loss: 0.0211
Epoch 2, Batch 300, Loss: 0.0243
Epoch 2 completed, Average Loss: 0.0760
Epoch 3, Batch 0, Loss: 0.1170
Epoch 3, Batch 50, Loss: 0.0697
Epoch 3, Batch 100, Loss: 0.0081
Epoch 3, Batch 150, Loss: 0.0078
Epoch 3, Batch 200, Loss: 0.0050
Epoch 3, Batch 250, Loss: 0.0266
Epoch 3, Batch 300, Loss: 0.0599
Epoch 3 completed, Average Loss: 0.0418
Epoch 4, Batch 0, Loss: 0.0459
Epoch 4, Batch 50, Loss: 0.0236
Epoch 4, Batch 100, Loss: 0.0493
Epoch 4, Batch 150, Loss: 0.0176
Epoch 4, Batch 200, Loss: 0.0221
Epoch 4, Bat

## Build Embeddings Database (FAISS/KNN)

In [22]:
#------------------------------Embeddings--------------------------------
def build_embedding_db(model, image_dict, transform, device):
    model.eval()
    embeddings, ids = [], []
    for person, imgs in image_dict.items():
        for img_path in imgs:
            img = Image.open(img_path).convert('RGB')
            img = transform(img).unsqueeze(0).to(device)
            with torch.no_grad():
                emb = model.forward_once(img).cpu().numpy()  # <-- fix here
            embeddings.append(emb)
            ids.append(person)
    embeddings = np.vstack(embeddings).astype('float32')
    faiss_index = faiss.IndexFlatL2(embeddings.shape[1])
    faiss_index.add(embeddings)
    knn_index = embeddings
    return embeddings, ids, faiss_index, knn_index


In [23]:
def recognize_face(model, img_path, transform, device, ids, knn_index=None, faiss_index=None, method='faiss', k=1):
    model.eval()
    img = Image.open(img_path).convert('RGB')
    img = transform(img).unsqueeze(0).to(device)
    with torch.no_grad():
        emb = model.forward_once(img).cpu().numpy().astype('float32')  # <-- use forward_once
    if method=='faiss':
        D,I = faiss_index.search(emb,k)
        return [ids[i] for i in I[0]], D[0]
    elif method=='knn':
        distances = np.linalg.norm(knn_index - emb, axis=1)
        idx = np.argsort(distances)[:k]
        return [ids[i] for i in idx], distances[idx]

In [24]:
# ------------------------ Build Embedding Database ------------------------
# use only 1 image per person (single-shot)
image_dict = {}
for person in os.listdir(dataset_dir):
    person_dir = os.path.join(dataset_dir, person)
    images = [os.path.join(person_dir, img) for img in os.listdir(person_dir) if img.endswith('.jpg')]
    if images:
        image_dict[person] = [images[0]]  # take the first image only for embedding

print("Building embedding database...")
embeddings, ids, faiss_index, knn_index = build_embedding_db(model, image_dict, transform, device)
print("Embedding database ready.")

# ------------------------ Pick a random test image ------------------------
import random
test_person = random.choice(list(image_dict.keys()))
test_img_path = random.choice(image_dict[test_person])
print(f"Testing image from person: {test_person}")
Image.open(test_img_path).show()

# ------------------------ Predict using FAISS ------------------------
pred_ids_faiss, distances_faiss = recognize_face(model, test_img_path, transform, device, ids, faiss_index=faiss_index, method='faiss', k=1)
print("FAISS Prediction:", pred_ids_faiss, "Distance:", distances_faiss)

# ------------------------ Predict using KNN ------------------------
pred_ids_knn, distances_knn = recognize_face(model, test_img_path, transform, device, ids, knn_index=knn_index, method='knn', k=1)
print("KNN Prediction:", pred_ids_knn, "Distance:", distances_knn)

Building embedding database...
Embedding database ready.
Testing image from person: Eliane_Karp
FAISS Prediction: ['Eliane_Karp'] Distance: [0.]
KNN Prediction: ['Eliane_Karp'] Distance: [0.]


## Evaluation

In [25]:
import torch
from sklearn.metrics import accuracy_score, roc_auc_score

def evaluate_siamese(model, dataloader, device, threshold=0.5):
    """
    Evaluate Siamese Network on a dataloader of pairs
    Returns accuracy, ROC-AUC, and prints matching/non-matching performance
    """
    model.eval()
    all_labels = []
    all_preds = []

    with torch.no_grad():
        for img1, img2, labels in dataloader:
            img1, img2, labels = img1.to(device), img2.to(device), labels.to(device)
            out1, out2 = model(img1, img2)
            # Compute Euclidean distance
            dist = nn.functional.pairwise_distance(out1, out2)
            # Predict match if distance < threshold
            preds = (dist < threshold).float()
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())

    acc = accuracy_score(all_labels, all_preds)
    try:
        auc = roc_auc_score(all_labels, -np.array(all_preds))  # negative for distance
    except:
        auc = None
    print(f"Evaluation -> Accuracy: {acc*100:.2f}%, ROC-AUC: {auc}")
    return acc, auc


In [26]:

#using 10% of dataset for evaluation
from torch.utils.data import random_split

val_size = int(0.1 * len(dataset))
train_size = len(dataset) - val_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Evaluate
evaluate_siamese(model, val_loader, device, threshold=1.0)


Evaluation -> Accuracy: 93.90%, ROC-AUC: 0.057874762808349134


(0.939, np.float64(0.057874762808349134))

## Single-Shot Retrieval Evaluation

In [27]:
def evaluate_retrieval(model, image_dict, transform, device, method='faiss', k=1):
    """
    Evaluate single-shot recognition using FAISS or KNN
    Returns top-1 accuracy
    """
    # Build embeddings database
    embeddings, ids, faiss_index, knn_index = build_embedding_db(model, image_dict, transform, device)

    correct = 0
    total = 0

    for person, imgs in image_dict.items():
        for img_path in imgs:
            total += 1
            pred_ids, distances = recognize_face(model, img_path, transform, device, ids, knn_index=knn_index, faiss_index=faiss_index, method=method, k=k)
            if person in pred_ids:  # top-1 match
                correct += 1

    accuracy = correct / total
    print(f"{method.upper()} single-shot retrieval accuracy: {accuracy*100:.2f}%")
    return accuracy


In [28]:
# For demo, use a subset of image_dict if too large
subset_dict = {k: image_dict[k] for i, k in enumerate(list(image_dict.keys())[:50])}

# Evaluate FAISS
evaluate_retrieval(model, subset_dict, transform, device, method='faiss', k=1)

# Evaluate KNN
evaluate_retrieval(model, subset_dict, transform, device, method='knn', k=1)


FAISS single-shot retrieval accuracy: 100.00%
KNN single-shot retrieval accuracy: 100.00%


1.0