In [None]:
# Visualizing 1 Forgery Signature pair
forgery_results = []
img1 = preprocess_image(path+forgery_imgs[0][1])
img2 = preprocess_image(path+forgery_imgs[0][2])
embed1 = signature_embeddings.predict(np.expand_dims(img1,axis=0))
embed2 = signature_embeddings.predict(np.expand_dims(img2,axis=0))
forgery_results.append(embed1)
forgery_results.append(embed2)

In [None]:
forgery_results = np.array(forgery_results)

In [None]:
forgery_results.shape

In [None]:
# Save embeddings for visualization in projector
np.savetxt("vecs_forgery.tsv", forgery_results.reshape((2,-1)), delimiter='\t')

<img src="https://drive.google.com/uc?id=119ggTHKoBIB6rTq3c0_qMQcnpkbPF48_" width=600>

Similar signatures have less euclidean distance, while the different signatures have comparatively higher euclidean distance

Now lets split the data into two sets

- to compute the threshold and accuracy in the first set of data
- Use the same threshold to find the accuracy in the test set

In [None]:
labels = pairs_new.iloc[:,3].values
dist = pairs_new['distance'].values

In [None]:
from sklearn.model_selection import train_test_split

dist_train,dist_test,labels_train,labels_test = train_test_split(dist,labels,test_size=0.2,random_state=42)

In [None]:
def compute_accuracy_thresh(predictions, labels):

    dmax = np.max(predictions)
    dmin = np.min(predictions)
    nsame = np.sum(labels == 1)
    ndiff = np.sum(labels == 0)

    step = 0.01
    max_acc = 0
    best_thresh = -1
    best_labels = None

    for d in np.arange(dmin, dmax+step, step):

        best_labels = (predictions <= d).astype(int)

    # Calculate tp, tn, fp, fn using best_labels and ground truth labels
        tp = np.sum((best_labels == 1) & (labels == 1))
        tn = np.sum((best_labels == 0) & (labels == 0))
        fp = np.sum((best_labels == 1) & (labels == 0))
        fn = np.sum((best_labels == 0) & (labels == 1))
        accuracy = (tp + tn) / (tp + tn + fp + fn)

        if accuracy>max_acc:
          max_acc = accuracy
          best_thresh = d

    return max_acc, best_thresh

In [None]:
acc,thresh = compute_accuracy_thresh(dist_train,labels_train)

In [None]:
# Accuracy
acc

In [None]:
# Threshold
thresh

Now lets use the same threshold to find the accuracy of the test set

In [None]:
# Computing Test Accuracy with the train threshold
def compute_accuracy_thresh_test(predictions, labels,d):

    best_labels = (predictions <= d).astype(int)
    tp = np.sum((best_labels == 1) & (labels == 1))
    tn = np.sum((best_labels == 0) & (labels == 0))
    fp = np.sum((best_labels == 1) & (labels == 0))
    fn = np.sum((best_labels == 0) & (labels == 1))
    accuracy = (tp + tn) / (tp + tn + fp + fn)

    return accuracy

In [None]:
# Test Accuracy
test_acc= compute_accuracy_thresh_test(np.array(dist_test), np.array(labels_test),thresh)
test_acc

In [None]:
# Train loss vs validation loss
plt.plot(history.history['loss'],label='Train Loss')
plt.plot(history.history['val_loss'],label='Validation Loss')
plt.legend()
plt.show()

### Contrastive Loss Vs Triplet Loss

<table>
  <tr>
    <th>Loss</th>
    <th>Train Accuracy</th>
    <th>Test Accuracy</th>
  </tr>
  <tr>
    <td>Contrastive Loss</td>
    <td>92.56</td>
    <td>63.81</td>
  </tr>
  <tr>
    <td>Triplet Loss</td>
    <td>82.66</td>
    <td>82.48</td>
  </tr>
</table>

> **Note:** Due to insufficent memory and GPu in colab, the model with triplet loss has been trained only using 20,000 triplets out of 44,000 triplets. The performance of the model can be improved by training with more samples.

## Summary

- A **Siamese neural network** is an artificial neural network that contains two or more identical subnetwork which is also known as twin neural network or sister network.

- Siamese network takes two different inputs passed through two similar subnetworks with the same architecture, parameters, and weights.

- Siamese network’s objective is to classify if the two inputs are the same or different using the Similarity score.

- The Similarity score can be calculated using **Contrastive function**, or **Triplet loss**, which are techniques for the general distance metric learning approach.

- Since training of Siamese networks involves pairwise learning usual, Cross entropy loss cannot be used in this case, mainly two loss functions are used, they are
 - **Contrastive loss**
 - **Triplet loss**

- **Contrastive loss** is a metric learning objective function where we learn from training data examples structured as pairs:
 - Positive pairs (examples that belong to the same class)
 - Negative pairs (examples that belong to different classes).

- The contrastive loss function is set up such that we minimize the distance between embeddings for positive pairs, and maximize the distance between embeddings for negative pairs.

- The **triplet loss** function is an alternative to the contrastive loss function. It has convergence advantages over contrastive loss function

- The distance between the baseline input and the positive input is reduced to a minimum, while the distance between the baseline input and the negative input is increased.

- The triplet loss function aims to learn a distance between representations such that the anchor-to-positive distance is less than the anchor-to-negative distance.