In [2]:
import os
import cv2
import numpy as np
from keras_facenet import FaceNet
from tqdm import tqdm
import random
from itertools import combinations
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
import numpy as np


In [3]:

data_dir = r"C:\Users\ishan\OneDrive\Desktop\face recog\cropped_faces"

embedder = FaceNet()
image_size = (160, 160)





In [4]:
def preprocess(img):
    img = cv2.resize(img, image_size)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = img.astype('float32')
    mean, std = img.mean(), img.std()
    img = (img - mean) / std
    return np.expand_dims(img, axis=0)

embeddings = []
labels = []

In [5]:

for person in os.listdir(data_dir):
    person_dir = os.path.join(data_dir, person)
    if not os.path.isdir(person_dir):
        continue
    for file in os.listdir(person_dir):
        img_path = os.path.join(person_dir, file)
        img = cv2.imread(img_path)
        if img is None:
            continue
        face_input = preprocess(img)
        embedding = embedder.embeddings(face_input)[0]
        embeddings.append(embedding)
        labels.append(person)

print(f"Extracted {len(embeddings)} embeddings.")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 127ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 109ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 107ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 84ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 88ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6

In [6]:

from collections import defaultdict
label_to_embs = defaultdict(list)
for emb, label in zip(embeddings, labels):
    label_to_embs[label].append(emb)

X = []
y = []

# Positive pairs (same person)
for label, embs in label_to_embs.items():
    if len(embs) < 2:
        continue
    for emb1, emb2 in combinations(embs, 2):
        X.append(np.concatenate([emb1, emb2]))
        y.append(1)

# Negative pairs (different people)
all_labels = list(label_to_embs.keys())
for _ in range(len(y)):  # generate as many negatives as positives
    label1, label2 = random.sample(all_labels, 2)
    emb1 = random.choice(label_to_embs[label1])
    emb2 = random.choice(label_to_embs[label2])
    X.append(np.concatenate([emb1, emb2]))
    y.append(0)

X = np.array(X)
y = np.array(y)
print(f"Generated {len(X)} pairs.")


Generated 635914 pairs.


In [7]:


# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# MLP model
model = Sequential([
    Dense(256, activation='relu', input_shape=(X.shape[1],)),
    Dropout(0.3),
    Dense(128, activation='relu'),
    Dropout(0.2),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer=Adam(1e-4), loss='binary_crossentropy', metrics=['accuracy'])

# Train
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.1)

# Evaluate
y_pred = model.predict(X_test).ravel()
y_pred_binary = (y_pred > 0.5).astype(int)
acc = accuracy_score(y_test, y_pred_binary)
print(f"Test Accuracy: {acc:.4f}")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m14309/14309[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m100s[0m 7ms/step - accuracy: 0.5000 - loss: 0.6933 - val_accuracy: 0.4978 - val_loss: 0.6932
Epoch 2/10
[1m14309/14309[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m93s[0m 7ms/step - accuracy: 0.4995 - loss: 0.6932 - val_accuracy: 0.4978 - val_loss: 0.6932
Epoch 3/10
[1m14309/14309[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m97s[0m 7ms/step - accuracy: 0.5004 - loss: 0.6932 - val_accuracy: 0.5022 - val_loss: 0.6931
Epoch 4/10
[1m14309/14309[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m95s[0m 7ms/step - accuracy: 0.4988 - loss: 0.6932 - val_accuracy: 0.4978 - val_loss: 0.6932
Epoch 5/10
[1m14309/14309[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m94s[0m 7ms/step - accuracy: 0.5007 - loss: 0.6932 - val_accuracy: 0.4978 - val_loss: 0.6932
Epoch 6/10
[1m14309/14309[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m107s[0m 7ms/step - accuracy: 0.5006 - loss: 0.6932 - val_accuracy: 0.5022 - val_loss: 0.6

In [9]:
import numpy as np
from sklearn.metrics import accuracy_score

# Split X into two embedding halves
emb1s = X[:, :512]
emb2s = X[:, 512:]

# Compute cosine similarity manually (much faster)
dot_products = np.sum(emb1s * emb2s, axis=1)
norms1 = np.linalg.norm(emb1s, axis=1)
norms2 = np.linalg.norm(emb2s, axis=1)
cosine_similarities = dot_products / (norms1 * norms2 + 1e-10)  # Avoid divide-by-zero

# Predict: 1 if similarity > threshold (e.g. 0.5)
y_pred = (cosine_similarities > 0.5).astype(int)

# Accuracy
acc = accuracy_score(y, y_pred)
print(f"Cosine similarity accuracy: {acc:.4f}")


Cosine similarity accuracy: 0.5000


In [11]:
import torch
print(torch.cuda.is_available())
print(torch.cuda.current_device())
print(torch.cuda.get_device_name(0))

True
0
NVIDIA GeForce GTX 1650


In [15]:
import tensorflow as tf
print(tf.__version__)
print("GPUs available:", tf.config.list_physical_devices('GPU'))


2.19.0
GPUs available: []
