In [None]:
import keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout
from keras.optimizers import Adam
from keras.callbacks import TensorBoard

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import accuracy_score
import itertools

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import load_img, img_to_array
import tarfile


# Extract LFW dataset if not already extracted
if not os.path.exists('lfw'):
    with tarfile.open('/content/lfw-funneled.tgz', 'r:gz') as tar:
        tar.extractall()

# Function to load images and labels from LFW dataset
def load_lfw_dataset(data_dir):
    images = []
    labels = []

    person_folders = sorted(os.listdir(data_dir))
    label_map = {person: idx for idx, person in enumerate(person_folders)}

    for person in person_folders:
        person_dir = os.path.join(data_dir, person)
        if os.path.isdir(person_dir):
            for image_file in os.listdir(person_dir):
                image_path = os.path.join(person_dir, image_file)
                image = load_img(image_path, color_mode='grayscale', target_size=(112, 92))
                image = img_to_array(image) / 255.0  # Normalize pixel values
                images.append(image)
                labels.append(label_map[person])

    return np.array(images), np.array(labels)

# Load LFW dataset
data_dir = 'lfw_funneled'
images, labels = load_lfw_dataset(data_dir)

# Split dataset into train, validation, and test sets
x_train, x_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)
x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=0.1, random_state=42)

# Print dataset shapes for verification
print('x_train shape:', x_train.shape)
print('y_train shape:', y_train.shape)
print('x_valid shape:', x_valid.shape)
print('y_valid shape:', y_valid.shape)
print('x_test shape:', x_test.shape)
print('y_test shape:', y_test.shape)


x_train shape: (9527, 112, 92, 1)
y_train shape: (9527,)
x_valid shape: (1059, 112, 92, 1)
y_valid shape: (1059,)
x_test shape: (2647, 112, 92, 1)
y_test shape: (2647,)


In [None]:
def create_triplets(x, y, num_triplets=10000):
    triplets = []
    labels = []
    n_classes = np.max(y) + 1
    digit_indices = [np.where(y == i)[0] for i in range(n_classes)]

    for _ in range(num_triplets):
        # Select anchor
        anchor_class = np.random.randint(0, n_classes)

        # Check if there are samples in the anchor class
        if len(digit_indices[anchor_class]) == 0:
            continue

        anchor_idx = np.random.choice(digit_indices[anchor_class])
        anchor = x[anchor_idx]

        # Select positive
        positive_idx = np.random.choice(digit_indices[anchor_class])
        positive = x[positive_idx]

        # Select negative
        negative_class = (anchor_class + np.random.randint(1, n_classes)) % n_classes

        # Check if there are samples in the negative class
        if len(digit_indices[negative_class]) == 0:
            continue

        negative_idx = np.random.choice(digit_indices[negative_class])
        negative = x[negative_idx]

        triplets.append([anchor, positive, negative])
        labels.append([0])  # Dummy label, not used in training triplet loss

    return np.array(triplets), np.array(labels)


In [None]:
from keras.layers import Input
from keras.models import Model
from keras import backend as K

im_rows, im_cols = 112, 92

from keras.layers import concatenate

# Define base network
def build_base_network(input_shape):
    model = Sequential([
        Conv2D(64, (10, 10), activation='relu', input_shape=input_shape),
        MaxPooling2D(),
        Conv2D(128, (7, 7), activation='relu'),
        MaxPooling2D(),
        Conv2D(128, (4, 4), activation='relu'),
        MaxPooling2D(),
        Conv2D(256, (4, 4), activation='relu'),
        Flatten(),
        Dense(4096, activation='sigmoid')
    ])
    return model

# Define triplet Siamese network model
input_shape = (im_rows, im_cols, 1)
base_network = build_base_network(input_shape)

input_anchor = Input(shape=input_shape)
input_positive = Input(shape=input_shape)
input_negative = Input(shape=input_shape)

processed_anchor = base_network(input_anchor)
processed_positive = base_network(input_positive)
processed_negative = base_network(input_negative)

# Concatenate outputs
concatenated = concatenate([processed_anchor, processed_positive, processed_negative], axis=-1)

# Build model
model = Model(inputs=[input_anchor, input_positive, input_negative], outputs=concatenated)

# Triplet loss function
def triplet_loss(y_true, y_pred, alpha=0.2):
    anchor, positive, negative = y_pred[:, 0:4096], y_pred[:, 4096:8192], y_pred[:, 8192:12288]
    positive_distance = K.sum(K.square(anchor - positive), axis=-1)
    negative_distance = K.sum(K.square(anchor - negative), axis=-1)
    return K.sum(K.maximum(0.0, positive_distance - negative_distance + alpha))

# Compile model
model.compile(optimizer=Adam(learning_rate=0.0001), loss=triplet_loss)

model.summary()


Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_7 (InputLayer)        [(None, 112, 92, 1)]         0         []                            
                                                                                                  
 input_8 (InputLayer)        [(None, 112, 92, 1)]         0         []                            
                                                                                                  
 input_9 (InputLayer)        [(None, 112, 92, 1)]         0         []                            
                                                                                                  
 sequential_4 (Sequential)   (None, 4096)                 2636473   ['input_7[0][0]',             
                                                          6          'input_8[0][0]',       

In [None]:
# Create triplets for training and validation
train_triplets, train_labels = create_triplets(x_train, y_train)
valid_triplets, valid_labels = create_triplets(x_valid, y_valid)

# Check shapes or print to verify
print('train_triplets shape:', train_triplets.shape)
print('valid_triplets shape:', valid_triplets.shape)

train_triplets shape: (6128, 3, 112, 92, 1)
valid_triplets shape: (191, 3, 112, 92, 1)


In [None]:
history = model.fit(
    [train_triplets[:, 0], train_triplets[:, 1], train_triplets[:, 2]], train_labels,
    batch_size=128,
    epochs=50,
    validation_data=([valid_triplets[:, 0], valid_triplets[:, 1], valid_triplets[:, 2]], valid_labels)
)

# Evaluate the model if needed
# score = model.evaluate([x_test[:, 0], x_test[:, 1], x_test[:, 2]], y_test)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50

In [None]:
# Evaluate the model on test set
test_pairs, test_labels = create_pairs(x_test, y_test)
score = model.evaluate([test_pairs[:, 0], test_pairs[:, 1]], test_labels)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

# Plot training history (accuracy and loss)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()



In [None]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

def evaluate_triplet_model(model, test_triplets, y_true):
    # Predict distances for test triplets
    test_distances = model.predict([test_triplets[:, 0], test_triplets[:, 1], test_triplets[:, 2]])

    # Calculate distances
    anchor_positive_dist = np.linalg.norm(test_distances[:, :4096], axis=1)
    anchor_negative_dist = np.linalg.norm(test_distances[:, 4096:8192], axis=1)

    # Predictions based on distances
    y_pred = (anchor_positive_dist < anchor_negative_dist).astype(int)

    # Calculate metrics
    accuracy = accuracy_score(y_true, y_pred)
    print(f'Test Accuracy: {accuracy:.4f}')

    # Confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    print(f'Confusion Matrix:\n{cm}')

    # Classification report
    class_report = classification_report(y_true, y_pred)
    print(f'Classification Report:\n{class_report}')

    return accuracy, cm, class_report


In [None]:
# Evaluate the model on test data
accuracy, conf_matrix, class_report = evaluate_triplet_model(model, test_triplets, y_test)

# Optionally, plot the confusion matrix
plt.figure(figsize=(8, 6))
plot_confusion_matrix(conf_matrix, classes=['Not Same', 'Same'], normalize=True, title='Normalized Confusion Matrix')
plt.show()

# Print classification report
print(class_report)
