In [1]:
!pip install tensorflow numpy




In [13]:
!pip install scikit-learn




In [2]:
import tensorflow as tf
import numpy as np

print("TensorFlow version:", tf.__version__)
print("NumPy version:", np.__version__)


TensorFlow version: 2.19.0
NumPy version: 2.1.3


In [3]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, Model
from tensorflow.keras.preprocessing.image import load_img, img_to_array


In [8]:
os.chdir(r"C:\Users\ramesh\anaconda3\envs\facenet_env")
print("Now in:", os.getcwd())


Now in: C:\Users\ramesh\anaconda3\envs\facenet_env


In [19]:
import os

data_path = r"C:\Users\ramesh\anaconda3\envs\facenet_env\dataset\train"

print("Path exists:", os.path.exists(data_path))
print("Subfolders (classes):", os.listdir(data_path))


Path exists: True
Subfolders (classes): ['Akshay Kumar_1.jpg', 'Akshay Kumar_2.jpg', 'Akshay Kumar_3.jpg', 'Alia Bhatt_1.jpg', 'Alia Bhatt_2.jpg', 'Alia Bhatt_3.jpg', 'Amitabh Bachchan_1.jpg', 'Amitabh Bachchan_2.jpg', 'Amitabh Bachchan_3.jpg']


In [20]:
import os
import shutil

src_dir = r"C:\Users\ramesh\anaconda3\envs\facenet_env\dataset\train"
for file in os.listdir(src_dir):
    if file.endswith((".jpg", ".png")):
        name = "_".join(file.split("_")[:-1])  # e.g., Akshay Kumar
        folder = os.path.join(src_dir, name)
        os.makedirs(folder, exist_ok=True)
        shutil.move(os.path.join(src_dir, file), os.path.join(folder, file))


In [21]:
print("Subfolders:", os.listdir(src_dir))
for folder in os.listdir(src_dir):
    path = os.path.join(src_dir, folder)
    print(f"{folder}: {len(os.listdir(path))} images")


Subfolders: ['Akshay Kumar', 'Alia Bhatt', 'Amitabh Bachchan']
Akshay Kumar: 3 images
Alia Bhatt: 3 images
Amitabh Bachchan: 3 images


In [28]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, Input
from sklearn.utils import shuffle

# ========== SETTINGS ==========
IMAGE_SIZE = (160, 160)
DATASET_PATH = r"C:\Users\ramesh\anaconda3\envs\facenet_env\dataset\train"
MARGIN = 0.5  # For triplet loss
EPOCHS = 10
BATCH_SIZE = 2

# ========== LOAD IMAGES ==========
def load_images_from_folder(base_path):
    data = {}
    for person in os.listdir(base_path):
        person_path = os.path.join(base_path, person)
        if os.path.isdir(person_path):
            images = []
            for img_name in os.listdir(person_path):
                img_path = os.path.join(person_path, img_name)
                img = cv2.imread(img_path)
                if img is not None:
                    img = cv2.resize(img, IMAGE_SIZE)
                    img = img.astype("float32") / 255.0
                    images.append(img)
            if len(images) >= 2:  # Need at least 2 per person
                data[person] = images
    return data

# ========== MAKE TRIPLETS ==========
def create_triplets(data_dict):
    anchors, positives, negatives = [], [], []
    people = list(data_dict.keys())
    
    for person in people:
        pos_images = data_dict[person]
        for i in range(len(pos_images) - 1):
            anchor = pos_images[i]
            positive = pos_images[i + 1]
            
            # Pick a negative from another class
            neg_person = np.random.choice([p for p in people if p != person])
            import random  # Add at the top if not already
            negative = random.choice(data_dict[neg_person])
            
            anchors.append(anchor)
            positives.append(positive)
            negatives.append(negative)
    
    print(f"Loaded triplets: {len(anchors)}")
    return np.array(anchors), np.array(positives), np.array(negatives)

# ========== EMBEDDING MODEL ==========
def build_embedding_model():
    inp = Input(shape=(160, 160, 3))
    x = layers.Conv2D(32, (3, 3), activation='relu')(inp)
    x = layers.MaxPooling2D()(x)
    x = layers.Conv2D(64, (3, 3), activation='relu')(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Flatten()(x)
    x = layers.Dense(128)(x)
    out = layers.Lambda(lambda y: tf.math.l2_normalize(y, axis=1))(x)
    return models.Model(inputs=inp, outputs=out)

# ========== TRIPLET LOSS ==========
def triplet_loss(y_true, y_pred):
    anchor, positive, negative = y_pred[:,0,:], y_pred[:,1,:], y_pred[:,2,:]
    pos_dist = tf.reduce_sum(tf.square(anchor - positive), axis=1)
    neg_dist = tf.reduce_sum(tf.square(anchor - negative), axis=1)
    basic_loss = pos_dist - neg_dist + MARGIN
    return tf.reduce_mean(tf.maximum(basic_loss, 0.0))

# ========== TRAINING MODEL ==========
import tensorflow as tf  # Add this if not already

def build_training_model(embedding_model):
    anchor_inp = layers.Input(name="anchor", shape=(160, 160, 3))
    pos_inp = layers.Input(name="positive", shape=(160, 160, 3))
    neg_inp = layers.Input(name="negative", shape=(160, 160, 3))

    anchor_embed = embedding_model(anchor_inp)
    pos_embed = embedding_model(pos_inp)
    neg_embed = embedding_model(neg_inp)

    # Replace K.expand_dims with tf.expand_dims
    anchor_embed = layers.Lambda(
        lambda x: tf.expand_dims(x, axis=1),
        output_shape=lambda s: (s[0], 1, s[1])
    )(anchor_embed)

    pos_embed = layers.Lambda(
        lambda x: tf.expand_dims(x, axis=1),
        output_shape=lambda s: (s[0], 1, s[1])
    )(pos_embed)

    neg_embed = layers.Lambda(
        lambda x: tf.expand_dims(x, axis=1),
        output_shape=lambda s: (s[0], 1, s[1])
    )(neg_embed)

    merged = layers.Concatenate(axis=1)([anchor_embed, pos_embed, neg_embed])

    model = models.Model(inputs=[anchor_inp, pos_inp, neg_inp], outputs=merged)
    model.compile(loss=triplet_loss, optimizer='adam')

    return model

# ========== RUN PIPELINE ==========
data = load_images_from_folder(DATASET_PATH)
anchors, positives, negatives = create_triplets(data)

# shuffle
anchors, positives, negatives = shuffle(anchors, positives, negatives, random_state=42)

embedding_model = build_embedding_model()
training_model = build_training_model(embedding_model)

y_dummy = np.zeros((anchors.shape[0], 1))  # Not used, required by Keras

training_model.fit(
    [anchors, positives, negatives],
    y_dummy,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS
)


Loaded triplets: 6
Epoch 1/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 1s/step - loss: 0.4960
Epoch 2/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1s/step - loss: 0.4737
Epoch 3/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1s/step - loss: 0.4672   
Epoch 4/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1s/step - loss: 0.3272
Epoch 5/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 757ms/step - loss: 0.0778
Epoch 6/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 766ms/step - loss: 0.0345
Epoch 7/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 610ms/step - loss: 0.0056
Epoch 8/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 846ms/step - loss: 0.0000e+00
Epoch 9/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 838ms/step - loss: 0.0125
Epoch 10/10
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 1s/step - loss: 0.00

<keras.src.callbacks.history.History at 0x1a659486800>

In [30]:
import os
import numpy as np
from PIL import Image
from tensorflow.keras.preprocessing.image import img_to_array
from sklearn.metrics.pairwise import cosine_similarity

# Constants
IMG_SIZE = 160
TRAIN_DIR = 'dataset/train'
TEST_DIR = 'dataset/test'

# Preprocessing Function
def preprocess_image(image_path):
    img = Image.open(image_path).convert('RGB')
    img = img.resize((IMG_SIZE, IMG_SIZE))
    img = img_to_array(img)
    img = img / 255.0  # Normalize
    return np.expand_dims(img, axis=0)

# 1. Get embeddings of training images
def get_train_embeddings(embedding_model, train_dir):
    embeddings = []
    labels = []

    for person_name in os.listdir(train_dir):
        person_folder = os.path.join(train_dir, person_name)
        if not os.path.isdir(person_folder):
            continue
        for img_name in os.listdir(person_folder):
            img_path = os.path.join(person_folder, img_name)
            img = preprocess_image(img_path)
            emb = embedding_model.predict(img)
            embeddings.append(emb[0])
            labels.append(person_name)
    return np.array(embeddings), labels

# 2. Get embedding of test image
def get_test_embedding(embedding_model, test_image_path):
    img = preprocess_image(test_image_path)
    emb = embedding_model.predict(img)
    return emb[0]

# 3. Predict
def predict_identity(test_embedding, train_embeddings, train_labels):
    similarities = cosine_similarity([test_embedding], train_embeddings)
    best_match_idx = np.argmax(similarities)
    return train_labels[best_match_idx], similarities[0][best_match_idx]

# ======= Run Testing =======
test_image_path = os.path.join(TEST_DIR, 'test.jpg')

# Step 1: Get embeddings from training data
train_embeddings, train_labels = get_train_embeddings(embedding_model, TRAIN_DIR)

# Step 2: Embed test image
test_embedding = get_test_embedding(embedding_model, test_image_path)

# Step 3: Predict
identity, score = predict_identity(test_embedding, train_embeddings, train_labels)

print(f"Predicted identity: {identity} (similarity: {score:.4f})")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 125ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 126ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 125ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 130ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 121ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 112ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 131ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 144ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 125ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 118ms/step
Predicted identity: Akshay Kumar (similarity: 0.9589)


In [33]:
!pip uninstall keras-facenet
!pip install keras-facenet --no-cache-dir




Collecting keras-facenet
  Downloading keras-facenet-0.3.2.tar.gz (10 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting mtcnn (from keras-facenet)
  Downloading mtcnn-1.0.0-py3-none-any.whl.metadata (5.8 kB)
Collecting lz4>=4.3.3 (from mtcnn->keras-facenet)
  Downloading lz4-4.4.4-cp310-cp310-win_amd64.whl.metadata (3.9 kB)
Downloading mtcnn-1.0.0-py3-none-any.whl (1.9 MB)
   ---------------------------------------- 0.0/1.9 MB ? eta -:--:--
   --------------------------------- ------ 1.6/1.9 MB 7.6 MB/s eta 0:00:01
   ---------------------------------------- 1.9/1.9 MB 8.1 MB/s eta 0:00:00
Downloading lz4-4.4.4-cp310-cp310-win_amd64.whl (99 kB)
Building wheels for collected packages: keras-facenet
  Building wheel for keras-facenet (setup.py): started
  Building wheel for keras-facenet (setup.py): finished with status 'done'
  Created wheel for keras-facenet: filename=keras_facenet-0.3.2-py3-none-any.whl size=10388 sha2

  DEPRECATION: Building 'keras-facenet' using the legacy setup.py bdist_wheel mechanism, which will be removed in a future version. pip 25.3 will enforce this behaviour change. A possible replacement is to use the standardized build interface by setting the `--use-pep517` option, (possibly combined with `--no-build-isolation`), or adding a `pyproject.toml` file to the source tree of 'keras-facenet'. Discussion can be found at https://github.com/pypa/pip/issues/6334


In [34]:
from keras_facenet import FaceNet
from sklearn.metrics.pairwise import cosine_similarity
import os
import numpy as np
from PIL import Image
from tensorflow.keras.preprocessing.image import img_to_array

IMG_SIZE = 160
TRAIN_DIR = 'dataset/train'
TEST_PATH = 'dataset/test/test.jpg'

# Preprocessing
def preprocess_image(image_path):
    img = Image.open(image_path).convert('RGB')
    img = img.resize((IMG_SIZE, IMG_SIZE))
    img = img_to_array(img)
    img = img / 255.0
    return np.expand_dims(img, axis=0)

# Load pretrained FaceNet
embedder = FaceNet()

# 1. Get embeddings for training images
def get_train_embeddings(train_dir):
    embeddings = []
    labels = []

    for person in os.listdir(train_dir):
        person_folder = os.path.join(train_dir, person)
        if not os.path.isdir(person_folder): continue

        for img_name in os.listdir(person_folder):
            img_path = os.path.join(person_folder, img_name)
            img = preprocess_image(img_path)
            emb = embedder.embeddings(img)[0]
            embeddings.append(emb)
            labels.append(person)

    return np.array(embeddings), labels

# 2. Get embedding for test image
def get_test_embedding(image_path):
    img = preprocess_image(image_path)
    return embedder.embeddings(img)[0]

# 3. Predict
def predict_identity(test_embedding, train_embeddings, train_labels):
    sims = cosine_similarity([test_embedding], train_embeddings)
    best_idx = np.argmax(sims)
    return train_labels[best_idx], sims[0][best_idx]

# Run
train_embeddings, train_labels = get_train_embeddings(TRAIN_DIR)
test_embedding = get_test_embedding(TEST_PATH)
pred_name, score = predict_identity(test_embedding, train_embeddings, train_labels)

print(f"Predicted: {pred_name} (similarity: {score:.3f})")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 11s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 236ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 258ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 233ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 286ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 263ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 244ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 250ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 257ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 257ms/step
Predicted: Amitabh Bachchan (similarity: 1.000)
