Import Statments and Parameters:

--------------------------------------------------------------------

IMPORTANT: Run these pip installs,

If using miniconda: conda create --name project_env python=3.11

pip install tensorflow keras numpy pandas requests Pillow scikit-learn matplotlib opencv-python jupyter ipykernel

Download dataset here: https://iris.di.ubi.pt/ubipr.html (Original version, Change DATASET_PATH accordingly)

--------------------------------------------------------------------

IMG_SIZE - All Images scaled to this size.

BATCH_SIZE - Groups pairs of images for siamese training.

EPOCHS - number of epochs in training.

DATASET_PATH - Path to dataset folder.

STEPS_PER_EPOCH - Steps per each epoch.

In [6]:
import tensorflow as tf
from keras import layers, Model
from keras.preprocessing import image
import numpy as np
import os
import random
import keras

# -----------------------------
# PARAMETERS
# -----------------------------
IMG_SIZE = (105, 105)
BATCH_SIZE = 32
EPOCHS = 4  # increase for better results

# Load DATASET_PATH from dataset_path.txt (expected in the notebook working directory)
dataset_txt = os.path.join(os.getcwd(), "dataset_path.txt")
if os.path.exists(dataset_txt):
    with open(dataset_txt, "r", encoding="utf-8") as f:
        DATASET_PATH = f.read().strip()
    if not DATASET_PATH:
        raise ValueError("dataset_path.txt is empty. Put the dataset path inside the file.")
else:
    raise FileNotFoundError(f"dataset_path.txt not found at {dataset_txt}. Create it with the dataset path.")
STEPS_PER_EPOCH = 50
# -----------------------------



Image Preparation Definitions:

load_images_by_filenames(dataset_path, img_size = IMG_SIZE) - Loads images from dataset_path scaled to IMG_SIZE, then splits into groups by person. Outputs dictionary where each person has a list of images.

siamese_batch_generator(images_dict, batch_size=BATCH_SIZE) - Chooses a case (same person, different person), if same person, get 2 images of them label 1, otherwise get 2 different people images and label 0. This is done so dynamically as training is happening, so as to avoid large memory usage.

make_tf_dataset(images_dict, batch_size=BATCH_SIZE) - Creates tensor flow dataset for image_dict for batch_generator.

In [3]:
# -----------------------------
# UTILITY FUNCTIONS
# -----------------------------
def load_images_by_filename(dataset_path, img_size=IMG_SIZE):
    """
    Load images and group them by person_id extracted from filename (e.g., C1_S1).
    """
    images_dict = {}
    for img_name in os.listdir(dataset_path):
        if img_name.lower().endswith((".png", ".jpg", ".jpeg")):
            person_id = "_".join(img_name.split("_")[:2])  # e.g., C1_S1
            img_path = os.path.join(dataset_path, img_name)
            img = image.load_img(img_path, target_size=img_size)
            img = image.img_to_array(img) / 255.0
            if person_id not in images_dict:
                images_dict[person_id] = []
            images_dict[person_id].append(img)
    if not images_dict:
        raise ValueError("No images found in dataset. Check DATASET_PATH and file names.")
    return images_dict

def siamese_batch_generator(images_dict, batch_size=BATCH_SIZE): 
    """ 
    Generate batches of pairs for Siamese network training. 
    Does so dynamically to avoid large memory usage. 
    Adds tiny noise to duplicate images to avoid exact zeros. 
    """ 
    person_ids = list(images_dict.keys()) 
    if not person_ids: 
        raise ValueError("No persons found in images_dict.") 
    while True: 
        X1 = np.zeros((batch_size, *IMG_SIZE, 3), dtype=np.float32) 
        X2 = np.zeros((batch_size, *IMG_SIZE, 3), dtype=np.float32) 
        y = np.zeros((batch_size,), dtype=np.float32) 
        for i in range(batch_size): 
            if random.random() < 0.5: # same class 
                person = random.choice(person_ids) 
                imgs = images_dict[person] 
                if len(imgs) < 2: 
                    img1 = imgs[0] + np.random.normal(0, 1e-3, size=imgs[0].shape) 
                    img2 = imgs[0] + np.random.normal(0, 1e-3, size=imgs[0].shape) 
                else: 
                    img1, img2 = random.sample(imgs, 2) 
                label = 1 
            else: # different class 
                if len(person_ids) > 1: 
                    person1, person2 = random.sample(person_ids, 2) 
                    img1 = random.choice(images_dict[person1]) 
                    img2 = random.choice(images_dict[person2]) 
                    label = 0 
                else: # Only one person, duplicate image with tiny noise 
                    imgs = images_dict[person_ids[0]] 
                    img1 = imgs[0] + np.random.normal(0, 1e-3, size=imgs[0].shape) 
                    img2 = imgs[0] + np.random.normal(0, 1e-3, size=imgs[0].shape) 
                    label = 0.1 
            X1[i] = img1 
            X2[i] = img2 
            y[i] = label 
        yield (X1, X2), y


def make_tf_dataset(images_dict, batch_size=BATCH_SIZE):
    """
    Wrap the generator in a tf.data.Dataset with proper output_signature.
    """
    def gen():
        for batch in siamese_batch_generator(images_dict, batch_size):
            yield batch
    
    output_signature = (
        (
            tf.TensorSpec(shape=(batch_size, *IMG_SIZE, 3), dtype=tf.float32),
            tf.TensorSpec(shape=(batch_size, *IMG_SIZE, 3), dtype=tf.float32)
        ),
        tf.TensorSpec(shape=(batch_size,), dtype=tf.float32)
    )
    
    dataset = tf.data.Dataset.from_generator(gen, output_signature=output_signature)
    return dataset

Model Definition:

create_base_cnn(input_shape=(*IMG_SIZE, 3)) - Creates CNN for model to extract features.

euclidean_distance(vects) - Computes euclidean distance between 2 feature vectors to calculate similarity.

create_siamese_model(input_shape=(*IMG_SIZE, 3)) - Creates siamese model which takes 2 input images, extracts their features, and outputs feature euclidean distance.

contrastive_loss(y_true, y_pred, margin=1.0) - 
    Defines a loss metric with 2 goals:
    1. When images are of the same person, the loss is small.
    2. When images are of different people, the loss is large.
    This loss function penalizes the model when either of the above goals are not achieved.



In [4]:
# -----------------------------
# SIAMESE NETWORK MODEL
# -----------------------------
def create_base_cnn(input_shape=(*IMG_SIZE, 3)):
    """
    Base CNN to extract features from each image.
    """
    inp = layers.Input(shape=input_shape)
    x = layers.Conv2D(64, (10,10), activation='relu')(inp)
    x = layers.MaxPooling2D()(x)
    x = layers.Conv2D(128, (7,7), activation='relu')(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Conv2D(128, (4,4), activation='relu')(x)
    x = layers.MaxPooling2D()(x)
    x = layers.Conv2D(256, (4,4), activation='relu')(x)
    x = layers.Flatten()(x)
    x = layers.Dense(4096, activation='sigmoid')(x)
    return Model(inp, x)

@keras.saving.register_keras_serializable()
def euclidean_distance(vects):
    """
    Calculatees similarity in features of 2 images in terms of euclidean distance.
    """
    x, y = vects
    return tf.sqrt(tf.reduce_sum(tf.square(x - y), axis=1, keepdims=True))

def create_siamese_model(input_shape=(*IMG_SIZE, 3)):
    """
    Creates a model that takes 2 input images, extracts their features, and outputs the distance between them.
    """
    base_model = create_base_cnn(input_shape)
    input_a = layers.Input(shape=input_shape)
    input_b = layers.Input(shape=input_shape)
    feat_a = base_model(input_a)
    feat_b = base_model(input_b)
    distance = layers.Lambda(euclidean_distance)([feat_a, feat_b])
    model = Model([input_a, input_b], distance)
    return model

# -----------------------------
# CONTRASTIVE LOSS
# -----------------------------
def contrastive_loss(y_true, y_pred, margin=1.0):
    """
    Defines a loss metric with 2 goals:
    1. Distance small for same person.
    2. Distance large for different people.
    This loss function penalizes the model when either of the above goals are not achieved.
    """
    y_true = tf.cast(y_true, y_pred.dtype)
    square_pred = tf.square(y_pred)
    margin_square = tf.square(tf.maximum(margin - y_pred, 0))
    return tf.reduce_mean(y_true * square_pred + (1 - y_true) * margin_square)

# -----------------------------
# ACCURACY METRIC
# -----------------------------
@keras.saving.register_keras_serializable()
def siamese_accuracy(y_true, y_pred, threshold=0.5):
    """
    Definition of accuracy for siamese model.
    """
    y_pred_binary = tf.cast(y_pred < threshold, tf.float32)
    return tf.keras.metrics.binary_accuracy(y_true, y_pred_binary)


Image Preparation: Generates tensorflow dataset for training.

In [5]:
# -----------------------------
# IMAGE PREPARATION
# -----------------------------

images_dict = load_images_by_filename(DATASET_PATH)
for person, imgs in images_dict.items():
    print(person, len(imgs))  # sanity check

train_ds = make_tf_dataset(images_dict, BATCH_SIZE)

C100_S1 15
C100_S2 15
C101_S1 15
C101_S2 14
C102_S1 15
C102_S2 15
C103_S1 15
C103_S2 15
C104_S1 15
C104_S2 15
C105_S1 15
C105_S2 15
C106_S1 15
C106_S2 15
C107_S1 15
C107_S2 15
C108_S1 15
C108_S2 15
C109_S1 15
C109_S2 15
C10_S1 15
C10_S2 15
C110_S1 15
C110_S2 15
C111_S1 15
C111_S2 15
C112_S1 15
C112_S2 15
C113_S1 15
C114_S1 15
C115_S1 15
C116_S1 15
C117_S1 15
C118_S1 15
C119_S1 15
C11_S1 15
C120_S1 15
C121_S1 15
C121_S2 15
C122_S1 15
C122_S2 15
C123_S1 15
C124_S1 15
C125_S1 15
C125_S2 15
C126_S1 15
C126_S2 15
C127_S1 15
C127_S2 15
C128_S1 15
C128_S2 15
C129_S1 15
C12_S1 15
C130_S1 15
C131_S1 15
C132_S1 15
C133_S1 15
C134_S1 15
C135_S1 15
C136_S1 15
C137_S1 15
C138_S1 15
C139_S1 15
C13_S1 15
C140_S1 15
C141_S1 15
C142_S1 15
C143_S1 15
C144_S1 15
C145_S1 15
C146_S1 15
C147_S1 15
C147_S2 15
C148_S1 15
C148_S2 15
C149_S1 15
C149_S2 15
C14_S1 15
C150_S1 15
C150_S2 15
C151_S1 15
C151_S2 15
C152_S1 15
C152_S2 15
C153_S1 15
C154_S1 15
C155_S1 15
C156_S1 15
C157_S1 15
C158_S1 15
C159_S1 15
C15_S

Training:

In [7]:
# -----------------------------
# TRAINING
# -----------------------------
from keras.callbacks import EarlyStopping

early_stop = EarlyStopping(
    monitor='loss',        # or 'val_loss' if using validation
    patience=5,            # stop after 5 epochs with no improvement
    restore_best_weights=True
)

model = create_siamese_model()
model.compile(loss=contrastive_loss,
              optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
              metrics=[siamese_accuracy])
model.summary()

model.fit(
    train_ds,
    steps_per_epoch=STEPS_PER_EPOCH,
    epochs=EPOCHS,
    callbacks=[early_stop]
)




Epoch 1/4
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 790ms/step - loss: 0.1675 - siamese_accuracy: 0.7763
Epoch 2/4
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 778ms/step - loss: 0.1076 - siamese_accuracy: 0.8863
Epoch 3/4
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 780ms/step - loss: 0.0988 - siamese_accuracy: 0.9013
Epoch 4/4
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 771ms/step - loss: 0.0907 - siamese_accuracy: 0.9081


<keras.src.callbacks.history.History at 0x1fc57cd6270>

Save: Saves model to siamese_eye_model.keras

In [8]:
# -----------------------------
# SAVE MODEL
# -----------------------------
def save_siamese_model(model, save_path="siamese_eye_model"):
    """
    Saves the entire model (structure + weights + optimizer state)
    to a .keras format (recommended).
    """
    model.save(save_path)  # Creates a folder or .keras file
    print(f"Model saved to: {save_path}")

# Call it after training:
save_siamese_model(model, "siamese_eye_model.keras")  # Or a folder name


Model saved to: siamese_eye_model.keras


Load: Loads siamese_eye_model.keras.

Note: Must define siamese network functions euclidean_distance() and contrastive_loss(). (Run model definition section)

In [9]:
# -----------------------------
# LOAD MODEL
# -----------------------------
from keras.models import load_model

def euclidean_distance(vects):
    x, y = vects
    return tf.sqrt(tf.reduce_sum(tf.square(x - y), axis=1, keepdims=True))

def contrastive_loss(y_true, y_pred, margin=1.0):
    y_true = tf.cast(y_true, y_pred.dtype)
    square_pred = tf.square(y_pred)
    margin_square = tf.square(tf.maximum(margin - y_pred, 0))
    return tf.reduce_mean(y_true * square_pred + (1 - y_true) * margin_square)


def load_siamese_model(save_path="siamese_eye_model.keras"):
    """
    Loads a saved Siamese model that uses custom contrastive loss.
    """
    return load_model(save_path, custom_objects={"contrastive_loss": contrastive_loss})

# Example:
# loaded_model = load_siamese_model("siamese_eye_model.keras")
# print("Model loaded successfully!")


Eye Identification Definition:

load_gallery_embeddings(model, gallery_root, img_size=IMG_SIZE) - Returns a dictionary containing each person and a list of paths to their images.

identify_eye(model, query_img_path, gallery_dict, margin=1.0, threshold=70.0) - Preprocesses query image, then runs it through the model to compare similarity with each image in the gallery, then computes similarity based on distance output by model.

In [10]:

IMG_SIZE = (105, 105)

def load_gallery_embeddings(model, gallery_root, img_size=IMG_SIZE):
    """
    Loads all images from the gallery and computes embeddings.
    Returns a dict: {identity: [image_paths]}
    """
    gallery_dict = {}
    for identity in os.listdir(gallery_root):
        identity_path = os.path.join(gallery_root, identity)
        if os.path.isdir(identity_path):
            gallery_dict[identity] = [os.path.join(identity_path, f)
                                      for f in os.listdir(identity_path)
                                      if f.lower().endswith((".png", ".jpg", ".jpeg"))]
    return gallery_dict

def identify_eye(model, query_img_path, gallery_dict, margin=1.0, threshold=70.0):
    """
    Identify the identity of a query eye image.
    - model: trained Siamese network
    - query_img_path: path to the query image
    - gallery_dict: {identity: [list of image paths]}
    - margin: used for similarity scaling
    - threshold: minimum similarity (%) to accept as known identity
    """
    # Load and preprocess query image
    query_img = image.load_img(query_img_path, target_size=IMG_SIZE)
    query_img = image.img_to_array(query_img) / 255.0
    query_img = np.expand_dims(query_img, axis=0)

    identity_scores = {}

    for identity, img_paths in gallery_dict.items():
        similarities = []
        for g_path in img_paths:
            try:
                g_img = image.load_img(g_path, target_size=IMG_SIZE)
                g_img = image.img_to_array(g_img)/255.0
                g_img = np.expand_dims(g_img, axis=0)
                distance = float(model.predict([query_img, g_img], verbose=0)[0,0])
                similarity = (1 - np.tanh(distance / margin)) * 100
                similarities.append(similarity)
            except Exception as e:
                print(f"Skipping {g_path}: {e}")
        if similarities:
            # Take maximum similarity among images for this identity
            identity_scores[identity] = max(similarities)

    if not identity_scores:
        return "No gallery images found", 0.0

    # Determine the best match
    best_identity = max(identity_scores, key=identity_scores.get)
    best_score = identity_scores[best_identity]

    if best_score >= threshold:
        return best_identity, best_score
    else:
        return "Unknown", best_score


Compare Eyes: Uses identify_eye() to compare query_image with images in gallery and outputs highest similarity image.

------------------------------------------------------------------
IMPORTANT:

Run previous definition cells.

Gallery should contain subfolders for each person containing images of their eyes.

Subfolder name should be persons names/identifier.

------------------------------------------------------------------

In [20]:
model = load_siamese_model("siamese_eye_model.keras")

GALLERY_PATH = r"C:\Users\Chase\OneDrive\Desktop\Courses\AI\Project\Eyes"

gallery_dict = load_gallery_embeddings(model, GALLERY_PATH)

query_image = r"C:\Users\Chase\OneDrive\Desktop\Courses\AI\Project\Test3.jpg"


identity, confidence = identify_eye(model, query_image, gallery_dict, margin=1.0, threshold=80.0)

print(f"Identified as: {identity} with confidence {confidence:.2f}%")


Identified as: Chase with confidence 86.18%
