# Siamese networks using triplet loss

Demonstration of implementing Siamese networks with triplet loss on fashion mnist.

The network has an unsual architecture - we don't need to know ground truth because of how the data are structured.

In [None]:
from tensorflow.keras.datasets import fashion_mnist
import numpy as np
import random
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten, Input
from tensorflow.keras.models import Model
import tensorflow.keras.backend as K
import pandas as pd

## Loading the data
Load the data.

In [None]:
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()

Normalize colors to be between (0-1).

In [None]:
train_images = train_images/255.0
test_images = test_images/255.0

Set predictions class names.

In [None]:
class_names = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
               'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']

## Sort data by classes

Sort data by classes for easier triplets generation.

In [None]:
def sort_labels_by_classes(labels):
    result = []
    for i in range(len(class_names)):
        #  np.where returns the indices of elements in an input array where the given condition is satisfied
        result.append(np.where(labels == i)[0])
    return result

In [None]:
train_classes = sort_labels_by_classes(train_labels)
test_classes = sort_labels_by_classes(test_labels)
print('Train classes:')
display(train_classes)

## Create triplets

In [None]:
def create_triplets(data, labels):
    triplets_data = []
    class_count = len(class_names)
    # go per each of cloth class
    for i in range(len(labels)):
        # class for processing
        class_label_length = len(labels[i])
        # go for each of item in current cloth class
        for j in range(class_label_length - 1):
            # get the positive pair - n and n+1 item from current label
            idx1, idx2 = labels[i][j], labels[i][j + 1]
            # random generate increment from 1-9 to add to current class index
            inc = random.randrange(1, class_count)
            # add increment to class index and apply modulo by class count to get random negative class label index
            negative_label_index = (i + inc) % class_count
            # take random item from other label items to create a negative pair
            negative_sample = random.choice(labels[negative_label_index])
            # save negative pair and set label to 0
            triplets_data.append([data[idx1], data[idx2], data[negative_sample]])
    # numpy arrays are easier to work with, so type list into it
    return np.array(triplets_data)

Create triplets for training.

In [None]:
X_train = create_triplets(train_images, train_classes)

Create triplets for test.

In [None]:
X_test = create_triplets(test_images, test_classes)

Plot triplets.

In [None]:
def show_image(image):
    plt.figure()
    plt.imshow(image, cmap=plt.cm.binary)
    plt.colorbar()
    plt.grid(False)
    plt.show()

In [None]:
triplet = 6001
# show images at this index
show_image(X_train[triplet][0])
show_image(X_train[triplet][1])
show_image(X_train[triplet][2])

## Setting up network architecture

### Backbone network for Siamese network

Create backbone network that will be shared by all three branches.

In [None]:
def initialize_base_network():
    input = Input(shape=(28,28,))
    x = Flatten()(input)
    x = Dense(128, activation='relu')(x)
    x = Dense(128, activation='relu')(x)
    return Model(inputs=input, outputs=x)

In [None]:
embedding = initialize_base_network()
tf.keras.utils.plot_model(embedding, show_shapes=True)

### Setting the Siamese network in custom layer

Setting the Siamese network in custom layer to use different approach.

It is also easier to work with 🙂

In [None]:
class SiameseNet(tf.keras.layers.Layer):
    # set the backbone model in constructor
    def __init__(self, model):
        super().__init__()
        self.model = model

    def call(self, feat):
        # get feature vectors from anchor
        feats = self.model(feat[0])
        # from positive image
        pfeats = self.model(feat[1])
        # and from negative image
        nfeats = self.model(feat[2])
        # concatenate vectors to a matrix
        result = tf.stack([feats, pfeats, nfeats])
        return result

Define triplet loss in separate layer.

In [None]:
class TripletLoss(tf.keras.layers.Layer):
    # margin is settable hyperparameter in constructor
    def __init__(self, margin):
        self.margin = margin
        super().__init__()
        
    # function calculating distance between features
    def distance(self, x, y):
        sum_square = tf.reduce_sum(tf.square(x - y), axis=1, keepdims=True)
        return tf.sqrt(tf.maximum(sum_square, K.epsilon()))
    

    def call(self, features):
        # get anchor-positive distance
        pos = self.distance(features[0], features[1])
        # anchor-negative distance
        neg = self.distance(features[0], features[2])
        # difference between anchor positive and anchor negative distances
        loss = pos - neg
        # get overall loss
        return tf.maximum(loss + self.margin, 0.0)

We have triplet loss defined in output layer, so for the loss function we just sum network outputs - true loss.

In [None]:
def identity_loss(y_true, y_pred):
    return tf.reduce_mean(y_pred)

### Create Siamese network
Putting the network together.

In [None]:
# anchor branch
image_input = Input(shape=(28,28), name='image_input')
# positive image branch
positive_input = Input(shape=(28,28), name='positive_input')
# negative image branch
negative_input = Input(shape=(28,28), name='negative_input')

siamese = SiameseNet(embedding)([image_input, positive_input, negative_input])
loss = TripletLoss(margin=1.0)(siamese)
model = Model(inputs=[image_input, positive_input, negative_input], outputs=loss)
model.compile(optimizer = tf.keras.optimizers.Adam(), loss = identity_loss)
tf.keras.utils.plot_model(model, show_shapes=True)

### Traing the model

In [None]:
# we don't need labels, everything is handled inside triplet loss layer, so we just set labels to 1, they will not be used anyway
history = model.fit([X_train[:,0], X_train[:,1], X_train[:,2]], np.ones(X_train.shape[0]), batch_size=128, verbose=1, validation_data=([X_test[:,0], X_test[:,1], X_test[:,2]], np.ones(X_test.shape[0])), epochs=20)

In [None]:
def plot_metrics(metric_name, title, ylim=5):
    plt.title(title)
    plt.ylim(0,ylim)
    plt.plot(history.history[metric_name],color='blue',label=metric_name)
    plt.plot(history.history['val_' + metric_name],color='green',label='val_' + metric_name)
    plt.grid()

In [None]:
plot_metrics(metric_name='loss', title="Loss", ylim=0.2)

## Predictions

First we create pairs for evaluation model.

In [None]:
def create_pairs(data, labels):
    pairs_data = []
    pairs_labels = []
    class_count = len(class_names)
    # go per each of cloth class
    for i in range(len(labels)):
        # class for processing
        class_label_length = len(labels[i])
        # go for each of item in current cloth class
        for j in range(class_label_length - 1):
            # get the positive pair - n and n+1 item from current label
            idx1, idx2 = labels[i][j], labels[i][j + 1]
            # save to list and set label to 1
            pairs_data.append([data[idx1], data[idx2]])
            pairs_labels.append(1.0)

            # random generate increment from 1-9 to add to current class index
            inc = random.randrange(1, class_count)
            # add increment to class index and apply modulo by class count to get random negative class label index
            negative_label_index = (i + inc) % class_count
            # take random item from other label items to create a negative pair
            negative_sample = random.choice(labels[negative_label_index])
            # save negative pair and set label to 0
            pairs_data.append([data[idx1], data[negative_sample]])
            pairs_labels.append(0.0)
    # numpy arrays are easier to work with, so type list into it
    return np.array(pairs_data), np.array(pairs_labels)

Create positive/negative pairs for even/odd indices.

In [None]:
X_test, Y_test = create_pairs(test_images, test_classes)

Predict feature vectors.

In [None]:
left_pair = X_test[:,0]
left_pair_pred = embedding.predict(left_pair)

In [None]:
right_pair = X_test[:,1]
right_pair_pred = embedding.predict(right_pair)

### Positive pair distance descriptive statistics

In [None]:
positive_left_pred = left_pair_pred[0::2]
positive_right_pred = right_pair_pred[0::2]
positive_distances = np.linalg.norm(positive_left_pred - positive_right_pred, axis=1)

In [None]:
pd.Series(positive_distances).describe()

### Negative pair distance descriptive statistics

In [None]:
negative_left_pred = left_pair_pred[1::2]
negative_right_pred = right_pair_pred[1::2]
negative_distances = np.linalg.norm(negative_left_pred - negative_right_pred, axis=1)

In [None]:
pd.Series(negative_distances).describe()

Visualize using box plots.

In [None]:
fig = plt.figure()
ax = fig.add_axes([0,0,1, 1])
ax.boxplot([positive_distances, negative_distances])
plt.xticks([1, 2], ['Positive', 'Negative'])
ax.grid()
plt.show()

## Calculate model accuracy

In [None]:
def compute_accuracy(left_pred, right_pred, y_true):
    y_pred = np.linalg.norm(left_pair_pred - right_pair_pred, axis=1)
#     # 1 for the same - distance is smaller than 3.0, 0 for the different
    pred = y_pred < 7.0
    return np.mean(pred == y_true)

In [None]:
test_accuracy = compute_accuracy(embedding.predict(X_test[:,0]), embedding.predict(X_test[:,1]), Y_test)
print(f'Test accuracy: {test_accuracy*100:.2f}%')