# Siamese Network - MNIST Dataset

In [36]:
from tensorflow.keras.utils import to_categorical
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, Input, optimizers, initializers
from keras.datasets import mnist
from keras.models import Model, Sequential
import numpy as np
from sklearn.model_selection import train_test_split
from matplotlib import pyplot as plt
import keras.backend as K


In [37]:
(xTrain, yTrain), (xTest, yTest) = mnist.load_data()

In [38]:
xTrain=np.concatenate((xTrain,xTest))
yTrain=np.concatenate((yTrain,yTest))

In [72]:
xTrain=xTrain.astype('float32')/255

In [73]:
def make_pairs(labelList, imageArr, numLabels):
  pairs0=[]
  pairs1=[]
  pairLabels=[]
  for i in range(numLabels):
    curList=labelList[i]
    for idx in range(len(curList)):
      anchorImg=imageArr[curList[idx]]

      posIdx=curList[(idx+1)%len(curList)]
      posImg=imageArr[idx]

      pairs0.append(anchorImg)
      pairs1.append(posImg)      
      pairLabels.append(1)

      negLabel=np.random.choice(list(range(numLabels)), 2)
      negLabel=negLabel[0] if negLabel[0]!=idx else negLabel[1]
      negIdx=np.random.choice(labelList[negLabel], 1)
      negImg=imageArr[negIdx[0]]

      pairs0.append(anchorImg)
      pairs1.append(negImg)      
      pairLabels.append(0)

  pairs0=np.array(pairs0, dtype='float32')
  pairs1=np.array(pairs1, dtype='float32')
  pairLabels=np.array(pairLabels)

  return pairs0, pairs1, pairLabels

In [74]:
#Divide images by labels
numLabels=len(np.unique(yTrain))
labelList=[]
labelList=[np.where(yTrain==i)[0] for i in range(numLabels)]
pairs0, pairs1, pairLabels = make_pairs(labelList, xTrain, numLabels)

In [118]:
def tripletLoss(x, alpha = 0.3):
    anchor = x[0]
    positive = x[1]
    negative = x[2]
    pos_dist = K.abs(anchor-positive)
    neg_dist = K.abs(anchor-negative)
    loss = pos_dist-neg_dist+alpha
    loss = K.maximum(loss,0.0)
    return loss

def embeddingModel(inputShape):
  model=Sequential()
  model.add(Input(shape=inputShape))

  model.add(layers.Conv2D(32, (3, 3), activation='relu'))
  model.add(layers.BatchNormalization())
  model.add(layers.Activation(activation='relu'))

  model.add(layers.Conv2D(64, (3, 3), activation='relu'))
  model.add(layers.BatchNormalization())
  model.add(layers.Activation(activation='relu'))
  model.add(layers.MaxPooling2D())

  model.add(layers.Flatten())
  model.add(layers.Dense(128))
  model.add(layers.BatchNormalization())
  model.add(layers.Activation(activation='relu'))

  return model

def twoInputHead(inputShape):
  inp1 = Input(shape=inputShape)
  inp2 = Input(shape=inputShape)

  layer = layers.Lambda(lambda x: K.abs(x[0]-x[1]))
  # layer = layers.Lambda(lambda x: K.sum(K.square(x[0]-x[1]),axis=1, keepdims=True))
  dist=layer([inp1,inp2])
  out = layers.Dense(1,activation='sigmoid')(dist)

  return Model([inp1, inp2], out)

def threeInputHead(inputShape):
  inp1 = Input(shape=inputShape)  #anchor
  inp2 = Input(shape=inputShape)  #pos
  inp3 = Input(shape=inputShape)  #neg

  # layer = layers.Lambda(lambda x: K.sum(K.square(x[0]-x[1])))
  # layer = layers.Lambda(lambda x: K.abs(x[0]-x[1]))
  # posDist=layer([inp1,inp2])
  # negDist=layer([inp1,inp3])

  out=layers.Lambda(tripletLoss)([inp1, inp2, inp3])
  return Model(inputs = [inp1, inp2, inp3], outputs = out)

def pair_siamese(inputShape):

  baseModel=embeddingModel(inputShape)
  print(baseModel.summary())

  newShape=baseModel.output_shape[1]

  headModel=twoInputHead((newShape))
  print(headModel.summary())

  input1 = Input(shape=inputShape)
  input2 = Input(shape=inputShape)

  embedding1 = baseModel(input1)
  embedding2 = baseModel(input2)

  out = headModel([embedding1, embedding2])

  siameseNetwork = Model([input1, input2], out)
  return siameseNetwork

def triplet_siamese(inputShape):

  baseModel=embeddingModel(inputShape)
  print(baseModel.summary())

  newShape=baseModel.output_shape[1]

  headModel=threeInputHead((newShape))
  print(headModel.summary())

  input1 = Input(shape=inputShape)
  input2 = Input(shape=inputShape)
  input3 = Input(shape=inputShape)

  embedding1 = baseModel(input1)
  embedding2 = baseModel(input2)
  embedding3 = baseModel(input3)

  out = headModel([embedding1, embedding2, embedding3])
  # out = layers.Lambda(tripletLoss)(out)
 
  siameseNetwork = Model([input1, input2, input3], out)
  return siameseNetwork

In [82]:
K.clear_session()
siamese_net = pair_siamese((28,28,1))

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 26, 26, 32)        320       
_________________________________________________________________
batch_normalization (BatchNo (None, 26, 26, 32)        128       
_________________________________________________________________
activation (Activation)      (None, 26, 26, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 24, 24, 64)        18496     
_________________________________________________________________
batch_normalization_1 (Batch (None, 24, 24, 64)        256       
_________________________________________________________________
activation_1 (Activation)    (None, 24, 24, 64)        0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 12, 12, 64)        0

## Contrastive Loss

In [83]:
def contrastiveLoss(trueLabel, predVal):
  margin=1
  trueLabel = tf.cast(trueLabel, predVal.dtype)
  predValSq = K.square(predVal)
  class1PredVal=K.square(margin-predValSq)
  zero=tf.constant(0, predVal.dtype)
  class1Loss=K.maximum(zero, class1PredVal)
  loss = K.mean(trueLabel * class1Loss + (1 - trueLabel) * predValSq)
  return 0.5*loss

### Adam

In [None]:
siamese_net.compile(loss=contrastiveLoss,optimizer=keras.optimizers.Adam(learning_rate=0.001) ,metrics = ['accuracy'])

In [None]:
K.clear_session()
siamese_net.fit([pairs0,pairs1], pairLabels,
          batch_size=64,
          epochs=10,
          validation_split=0.3)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f2940045dd0>

### RMSProp

In [46]:
siamese_net.compile(loss=contrastiveLoss,optimizer=keras.optimizers.RMSprop(learning_rate=0.001) ,metrics = ['accuracy'])

In [47]:
K.clear_session()
siamese_net.fit([pairs0,pairs1], pairLabels,
          batch_size=64,
          epochs=10,
          validation_split=0.3)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f1b3e0b8f10>

### MiniBatch Gradient Descent

In [86]:
K.clear_session()
siamese_net.compile(loss=contrastiveLoss,optimizer=keras.optimizers.SGD(learning_rate=0.1) ,metrics = ['accuracy'])

In [87]:
siamese_net.fit([pairs0,pairs1], pairLabels,
          batch_size=64,
          epochs=10,
          validation_split=0.3)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f1b23164e90>

## Binary cross entropy

### Adam

In [55]:
siamese_net.compile(loss='binary_crossentropy',optimizer=keras.optimizers.Adam(learning_rate=0.001) ,metrics = ['accuracy'])

In [56]:
K.clear_session()
siamese_net.fit([pairs0,pairs1], pairLabels,
          batch_size=64,
          epochs=10,
          validation_split=0.3)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f1b601c5a50>

### RMSProp

In [68]:
siamese_net.compile(loss='binary_crossentropy',optimizer=keras.optimizers.RMSprop(learning_rate=0.001) ,metrics = ['accuracy'])

In [69]:
K.clear_session()
siamese_net.fit([pairs0,pairs1], pairLabels,
          batch_size=64,
          epochs=10,
          validation_split=0.3)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f1b252283d0>

### Mini Batch Gradient Descent

In [79]:
K.clear_session()
siamese_net.compile(loss='binary_crossentropy',optimizer=keras.optimizers.SGD(learning_rate=0.1) ,metrics = ['accuracy'])

In [80]:
siamese_net.fit([pairs0,pairs1], pairLabels,
          batch_size=64,
          epochs=10,
          validation_split=0.3)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f1b2384a190>

# Triplet Loss

In [119]:
def make_triplets(labelList, imageArr, numLabels):
  pairs0=[]
  pairs1=[]
  pairs2=[]
  # pairLabels=[]
  for i in range(numLabels):
    curList=labelList[i]
    for idx in range(len(curList)):
      anchorImg=imageArr[curList[idx]]

      posIdx=curList[(idx+1)%len(curList)]
      posImg=imageArr[idx]

      pairs0.append(anchorImg)
      pairs1.append(posImg)      
      # pairLabels.append(1)

      negLabel=np.random.choice(list(range(numLabels)), 2)
      negLabel=negLabel[0] if negLabel[0]!=idx else negLabel[1]
      negIdx=np.random.choice(labelList[negLabel], 1)
      negImg=imageArr[negIdx[0]]

      pairs2.append(negImg)      
      # pairLabels.append(0)

  pairs0=np.array(pairs0, dtype='float32')
  pairs1=np.array(pairs1, dtype='float32')
  pairs2=np.array(pairs2, dtype='float32')
  # pairLabels=np.array(pairLabels)

  return pairs0, pairs1, pairs2


def identity_loss(y_true, y_pred):
    return K.mean(y_pred)

# def tripletLoss(trueLabel, dist):
#   alpha = 0.2
#   print('1')
#   print(tf.keras.backend.get_value(dist))
#   print(tf.shape(trueLabel))
#   posDist=dist[0]
#   negDist=dist[1]
#   print(tf.shape(posDist))
#   print('2')
#   diff = posDist-negDist+alpha
#   print('3')
#   zero=tf.constant(0, diff.dtype)
#   loss = K.maximum(diff,zero)
#   return loss


In [120]:
#Divide images by labels
numLabels=len(np.unique(yTrain))
labelList=[]
labelList=[np.where(yTrain==i)[0] for i in range(numLabels)]
pairs0, pairs1, pairs2 = make_triplets(labelList, xTrain, numLabels)

In [121]:
K.clear_session()
siamese_net = triplet_siamese((28,28,1))

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 26, 26, 32)        320       
_________________________________________________________________
batch_normalization (BatchNo (None, 26, 26, 32)        128       
_________________________________________________________________
activation (Activation)      (None, 26, 26, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 24, 24, 64)        18496     
_________________________________________________________________
batch_normalization_1 (Batch (None, 24, 24, 64)        256       
_________________________________________________________________
activation_1 (Activation)    (None, 24, 24, 64)        0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 12, 12, 64)        0

### Adam

In [108]:
K.clear_session()
siamese_net.compile(loss=identity_loss, optimizer=keras.optimizers.Adam(learning_rate=0.0001) ,metrics = ['accuracy'])

In [109]:
labels=np.ones((np.shape(pairs0)[0]))
siamese_net.fit([pairs0, pairs1, pairs2], labels,
          batch_size=64,
          epochs=10,
          verbose=2,
          validation_split=0.3)

Epoch 1/10
766/766 - 12s - loss: 0.3018 - accuracy: 0.0066 - val_loss: 0.3017 - val_accuracy: 0.0040
Epoch 2/10
766/766 - 10s - loss: 0.2962 - accuracy: 0.0065 - val_loss: 0.3009 - val_accuracy: 0.0078
Epoch 3/10
766/766 - 10s - loss: 0.2941 - accuracy: 0.0091 - val_loss: 0.3034 - val_accuracy: 0.0100
Epoch 4/10
766/766 - 10s - loss: 0.2919 - accuracy: 0.0125 - val_loss: 0.3017 - val_accuracy: 0.0167
Epoch 5/10
766/766 - 10s - loss: 0.2892 - accuracy: 0.0147 - val_loss: 0.3020 - val_accuracy: 0.0039
Epoch 6/10
766/766 - 10s - loss: 0.2870 - accuracy: 0.0152 - val_loss: 0.3014 - val_accuracy: 0.0151
Epoch 7/10
766/766 - 10s - loss: 0.2842 - accuracy: 0.0167 - val_loss: 0.3002 - val_accuracy: 0.0113
Epoch 8/10
766/766 - 10s - loss: 0.2824 - accuracy: 0.0156 - val_loss: 0.3015 - val_accuracy: 0.0114
Epoch 9/10
766/766 - 10s - loss: 0.2801 - accuracy: 0.0158 - val_loss: 0.2999 - val_accuracy: 0.0069
Epoch 10/10
766/766 - 10s - loss: 0.2781 - accuracy: 0.0167 - val_loss: 0.3011 - val_accura

<tensorflow.python.keras.callbacks.History at 0x7f1b22e13d10>

### RMSProp

In [122]:
K.clear_session()
siamese_net.compile(loss='binary_crossentropy',optimizer=keras.optimizers.RMSprop(learning_rate=0.001) ,metrics = ['accuracy'])

In [123]:
labels=np.ones((np.shape(pairs0)[0]))
siamese_net.fit([pairs0, pairs1, pairs2], labels,
          batch_size=64,
          epochs=10,
          verbose=2,
          validation_split=0.3)

Epoch 1/10
766/766 - 13s - loss: 3.6506 - accuracy: 0.0014 - val_loss: 5.4333 - val_accuracy: 3.8095e-04
Epoch 2/10
766/766 - 11s - loss: 1.6086 - accuracy: 0.0017 - val_loss: 1.2040 - val_accuracy: 0.0000e+00
Epoch 3/10
766/766 - 11s - loss: 1.2040 - accuracy: 0.0000e+00 - val_loss: 1.2040 - val_accuracy: 0.0000e+00
Epoch 4/10
766/766 - 11s - loss: 1.2040 - accuracy: 0.0000e+00 - val_loss: 1.2040 - val_accuracy: 0.0000e+00
Epoch 5/10
766/766 - 11s - loss: 1.2040 - accuracy: 0.0000e+00 - val_loss: 1.2040 - val_accuracy: 0.0000e+00
Epoch 6/10
766/766 - 11s - loss: 1.2040 - accuracy: 0.0000e+00 - val_loss: 1.2040 - val_accuracy: 0.0000e+00
Epoch 7/10
766/766 - 11s - loss: 1.2040 - accuracy: 0.0000e+00 - val_loss: 1.2040 - val_accuracy: 0.0000e+00
Epoch 8/10
766/766 - 11s - loss: 1.2040 - accuracy: 0.0000e+00 - val_loss: 1.2040 - val_accuracy: 0.0000e+00
Epoch 9/10
766/766 - 11s - loss: 1.2040 - accuracy: 0.0000e+00 - val_loss: 1.2040 - val_accuracy: 0.0000e+00
Epoch 10/10
766/766 - 11s -

<tensorflow.python.keras.callbacks.History at 0x7f1b0c69e390>

### Mini Batch Gradient Descent

In [33]:
siamese_net.compile(loss=identity_loss,optimizer=keras.optimizers.SGD(learning_rate=0.001) ,metrics = ['accuracy'])

In [34]:
K.clear_session()
labels=np.ones((np.shape(pairs0)[0]))
siamese_net.fit([pairs0, pairs1, pairs2], labels,
          batch_size=64,
          epochs=10,
          verbose=2,
          validation_split=0.3)

Epoch 1/10
766/766 - 12s - loss: 0.4930 - accuracy: 0.0038 - val_loss: 0.5037 - val_accuracy: 0.0026
Epoch 2/10
766/766 - 10s - loss: 0.4931 - accuracy: 0.0037 - val_loss: 0.5035 - val_accuracy: 0.0028
Epoch 3/10
766/766 - 10s - loss: 0.4932 - accuracy: 0.0037 - val_loss: 0.5035 - val_accuracy: 0.0024
Epoch 4/10
766/766 - 10s - loss: 0.4927 - accuracy: 0.0038 - val_loss: 0.5034 - val_accuracy: 0.0027
Epoch 5/10
766/766 - 10s - loss: 0.4927 - accuracy: 0.0037 - val_loss: 0.5034 - val_accuracy: 0.0026
Epoch 6/10
766/766 - 10s - loss: 0.4926 - accuracy: 0.0037 - val_loss: 0.5034 - val_accuracy: 0.0027
Epoch 7/10
766/766 - 10s - loss: 0.4927 - accuracy: 0.0036 - val_loss: 0.5032 - val_accuracy: 0.0029
Epoch 8/10
766/766 - 10s - loss: 0.4926 - accuracy: 0.0039 - val_loss: 0.5033 - val_accuracy: 0.0028
Epoch 9/10
766/766 - 10s - loss: 0.4925 - accuracy: 0.0035 - val_loss: 0.5032 - val_accuracy: 0.0029
Epoch 10/10
766/766 - 10s - loss: 0.4924 - accuracy: 0.0037 - val_loss: 0.5031 - val_accura

<tensorflow.python.keras.callbacks.History at 0x7f1b6024ced0>

### Accuracies
---
|                      | Adam     | RMSProp  | Mini Batch GD |
| :---                 | :----:   | :----:   | ---:  |
| **Contrastive Loss** | 69.72  | 67.60  | 64.42 |
| **Cross Entropy**    | 69.03  | 68.85  | 64.76 |
| **Triplet Loss**     | 1.67   | 0      | 0.29  |




> **Best Optimizer**: Adam




### Pros and Cons


---


### Pros of Siamese Network
- **Robust to class imbalance**: Few images per class is sufficient to train the network
- **Learns with semantic similarity**: Places similar classes close

### Cons of Siamese Network
- More training time
- Doesn't give the probabilities of prediction; just the distance between the classes