# Generate embeddings from training images

In [1]:
import tensorflow as tf


def _pairwise_distances(embeddings, squared=False):
    """Compute the 2D matrix of distances between all the embeddings.

    Args:
        embeddings: tensor of shape (batch_size, embed_dim)
        squared: Boolean. If true, output is the pairwise squared euclidean distance matrix.
                 If false, output is the pairwise euclidean distance matrix.

    Returns:
        pairwise_distances: tensor of shape (batch_size, batch_size)
    """
    # Get the dot product between all embeddings
    # shape (batch_size, batch_size)
    im_embeddings = embeddings[:, :int(embeddings.shape[1] / 2)]
    #im_embeggings_np = im_embeddings.numpy()
    anchor_emb = embeddings[:, int(embeddings.shape[1] / 2):]
    anchor_emb = tf.expand_dims(anchor_emb[0], axis=0)
    #anchor_emb_np = anchor_emb.numpy()
    dot_product = tf.matmul(im_embeddings, tf.transpose(anchor_emb))
    #dot_product_np = dot_product.numpy()
    # Get squared L2 norm for each embedding. We can just take the diagonal of `dot_product`.
    # This also provides more numerical stability (the diagonal of the result will be exactly 0).
    # shape (batch_size,)
    square_norm_a = tf.reduce_sum(tf.square(im_embeddings), axis=1, keepdims=True)
    #square_norm_a_np = square_norm_a.numpy()
    square_norm_b = tf.reduce_sum(tf.square(anchor_emb), axis=1, keepdims=True)
    #square_norm_b_np = square_norm_b.numpy()
    # Compute the pairwise distance matrix as we have:
    # ||a - b||^2 = ||a||^2  - 2 <a, b> + ||b||^2
    # shape (batch_size, batch_size)
    distances = tf.add(square_norm_a, square_norm_b - 2.0*dot_product)

    # Because of computation errors, some distances might be negative so we put everything >= 0.0
    distances = tf.maximum(distances, 0.0)

    if not squared:
        # Because the gradient of sqrt is infinite when distances == 0.0 (ex: on the diagonal)
        # we need to add a small epsilon where distances == 0.0
        mask = tf.cast(tf.equal(distances, 0.0), float)
        distances = distances + mask * 1e-16

        distances = tf.sqrt(distances)

        # Correct the epsilon added: set the distances on the mask to be exactly 0.0
        distances = distances * (1.0 - mask)
    #distances_np = distances.numpy()
    return distances


def _get_anchor_positive_triplet_mask(labels):
    """Return a 2D mask where mask[a, p] is True iff a and p are distinct and have same label.

    Args:
        labels: tf.int32 `Tensor` with shape [batch_size]

    Returns:
        mask: tf.bool `Tensor` with shape [batch_size, batch_size]
    """
    # Check that i and j are distinct
    multiple = tf.constant([1, tf.shape(labels)[0].numpy()], tf.int32)
    positive = tf.tile(labels, multiple)
    positive = tf.cast(positive, tf.bool)
    positive_np = positive.numpy()

    return positive


def _get_anchor_negative_triplet_mask(labels):
    """Return a 2D mask where mask[a, n] is True iff a and n have distinct labels.

    Args:
        labels: tf.int32 `Tensor` with shape [batch_size]

    Returns:
        mask: tf.bool `Tensor` with shape [batch_size, batch_size]
    """
    # Check if labels[i] != labels[k]
    # Uses broadcasting where the 1st argument has shape (1, batch_size) and the 2nd (batch_size, 1)
    multiple = tf.constant([1, tf.shape(labels)[0].numpy()], tf.int32)
    positive = tf.tile(labels, multiple)
    positive = tf.cast(positive, tf.bool)
    negative = tf.logical_not(positive)
    positive_np = negative.numpy()

    return negative


def _get_triplet_mask(labels):
    """Return a 3D mask where mask[a, p, n] is True iff the triplet (a, p, n) is valid.

    A triplet (i, j, k) is valid if:
        - i, j, k are distinct
        - labels[i] == labels[j] and labels[i] != labels[k]

    Args:
        labels: tf.int32 `Tensor` with shape [batch_size]
    """
    # Check that i, j and k are distinct
    indices_equal = tf.cast(tf.eye(tf.shape(labels)[0]), tf.bool)
    indices_not_equal = tf.logical_not(indices_equal)
    i_not_equal_j = tf.expand_dims(indices_not_equal, 2)
    i_not_equal_k = tf.expand_dims(indices_not_equal, 1)
    j_not_equal_k = tf.expand_dims(indices_not_equal, 0)

    distinct_indices = tf.logical_and(tf.logical_and(i_not_equal_j, i_not_equal_k), j_not_equal_k)


    # Check if labels[i] == labels[j] and labels[i] != labels[k]
    label_equal = tf.equal(tf.expand_dims(labels, 0), tf.expand_dims(labels, 1))
    i_equal_j = tf.expand_dims(label_equal, 2)
    i_equal_k = tf.expand_dims(label_equal, 1)

    valid_labels = tf.logical_and(i_equal_j, tf.logical_not(i_equal_k))

    # Combine the two masks
    mask = tf.logical_and(distinct_indices, valid_labels)

    return mask



def batch_hard_triplet_loss(y_true, y_pred):
    """Build the triplet loss over a batch of embeddings.

    For each anchor, we get the hardest positive and hardest negative to form a triplet.

    Args:
        labels: labels of the batch, of size (batch_size,)
        embeddings: tensor of shape (batch_size, embed_dim)
        margin: margin for triplet loss
        squared: Boolean. If true, output is the pairwise squared euclidean distance matrix.
                 If false, output is the pairwise euclidean distance matrix.

    Returns:
        triplet_loss: scalar tensor containing the triplet loss
    """

    # Get the pairwise distance matrix
    
    #margin = 1.
    labels = y_true
    squared=False
    labels = tf.cast(labels, dtype='int32')
    #label_np = labels.numpy()
    embeddings = y_pred # shape 2048 (embeddings of the image + anchor)

    # save embeddings to file
    

    # Compute the 2D matrix of distances between all the embeddings.
    pairwise_dist = _pairwise_distances(embeddings, squared=squared)

    '''
    #pairwise_dist_np = pairwise_dist.numpy()
    # For each anchor, get the hardest positive
    # First, we need to get a mask for every valid positive (they should have same label)
    #mask_anchor_positive = _get_anchor_positive_triplet_mask(labels)
    #mask_anchor_positive = tf.cast(mask_anchor_positive, float)
    # We put to 0 any element where (a, p) is not valid (valid if a != p and label(a) == label(p))
    anchor_positive_dist = tf.multiply(tf.cast(labels, float), pairwise_dist)
    #anchor_positive_dist_np=anchor_positive_dist.numpy()
    # shape (batch_size, 1)
    hardest_positive_dist = tf.reduce_max(anchor_positive_dist, axis=0)
    '''

    # Calculate all the distances between the anchor and all the positives (same label as anchor)
    anchor_positive_dist = tf.multiply(tf.cast(labels, float), pairwise_dist)
    # Find the HARDEST TO GUESS POSITIVE, which is the one that is the FARTHEST away => largest distance
    hardest_positive_dist = tf.reduce_max(anchor_positive_dist, axis=0)

    tf.summary.scalar("hardest_positive_dist", tf.reduce_mean(hardest_positive_dist))

    '''
    # For each anchor, get the hardest negative
    # First, we need to get a mask for every valid negative (they should have different labels)
    #mask_anchor_negative = _get_anchor_negative_triplet_mask(labels)
    #mask_anchor_negative = tf.cast(mask_anchor_negative, float)
    # We add the maximum value in each row to the invalid negatives (label(a) == label(n))
    #max_anchor_negative_dist = tf.reduce_max(pairwise_dist, axis=1, keepdims=True)
    '''
    # Calculate all the distances between the anchor and all the negatives (same label as anchor)
    anchor_negative_dist = tf.multiply(1-(tf.cast(labels, float)), pairwise_dist)
    zero = tf.constant(0, dtype=tf.float32)
    # Get the negative samples distance where the distance is not 0
    where = tf.not_equal(anchor_negative_dist, zero)
    # Find the HARDEST TO GUESS NEGATIVE, which is the one that is CLOSEST to the anchor => min value
    hardest_negative_dist = tf.reduce_min(anchor_negative_dist[where], axis=0)

    # Find the difference in distance between the Hardest Positive and the Hardest negative!
    D = hardest_positive_dist - hardest_negative_dist
    # Calculate the margin given the formula in the thesis
    margin = tf.math.log(1 + tf.math.exp(D))
    
    tf.summary.scalar("hardest_negative_dist", tf.reduce_mean(hardest_negative_dist))

    # Combine biggest d(a, p) and smallest d(a, n) into final triplet loss
    triplet_loss = tf.maximum(hardest_positive_dist - hardest_negative_dist + margin, 0.0)

    # Get final mean triplet loss

    #triplet_loss = tf.reduce_mean(triplet_loss)

    return triplet_loss

# Find the Accuracy based on the hardest positive and hardest negative
def compute_accuracy_hard(y_true, y_pred):
    #del y_true

    labels = y_true
    squared = False
    labels = tf.cast(labels, dtype='int32')
    margin = 1.
    embeddings = y_pred

    pairwise_dist = _pairwise_distances(embeddings, squared=squared)

    # Find Hardest Positive
    anchor_positive_dist = tf.multiply(tf.cast(labels, float), pairwise_dist)
    hardest_positive_dist = tf.reduce_max(anchor_positive_dist, axis=0)
    # Find positives that don't have the distance 0
    zero = tf.constant(0, dtype=tf.float32)
    where = tf.not_equal(anchor_positive_dist, zero)
    positive_non_zero = anchor_positive_dist[where]
    # shape (batch_size, 1)

    # Find hardest negatives
    # For each anchor, get the hardest negative
    # We add the maximum value in each row to the invalid negatives (label(a) == label(n))
    anchor_negative_dist = tf.multiply(1 - (tf.cast(labels, float)), pairwise_dist)
    # shape (batch_size,)
    # Find negatives that don't have the distance 0
    zero = tf.constant(0, dtype=tf.float32)
    where = tf.not_equal(anchor_negative_dist, zero)
    hardest_negative_dist = tf.reduce_min(anchor_negative_dist[where], axis=0)

    # Calculate the difference in distance and the margin
    D = hardest_positive_dist - hardest_negative_dist
    margin = tf.math.log(1 + tf.math.exp(D))

    # Selects positives that don't have distance 0, but smaller than the hardest negative distance minus margin
    positive_less_negative = tf.less_equal(positive_non_zero, hardest_negative_dist - margin)
    positive_less_negative = tf.cast(positive_less_negative, float)
    
    # Calculate the average value of those distances
    # TODO: Question - why is this a metric of accuracy?
    accuracy = tf.reduce_mean(positive_less_negative)
    return accuracy

In [2]:
import PIL
from PIL import Image
import numpy as np

def background_thumbnail(template, modality, thumbnail_size=(200,200)):
    foreground = Image.fromarray(template).convert(modality)
    background = Image.new(modality, thumbnail_size, "white")
    foreground.thumbnail(thumbnail_size)
    (w, h) = foreground.size
    
    upper_left=(int((thumbnail_size[0] - w) / 2), int((thumbnail_size[1] - h) / 2))
    background.paste(foreground, upper_left)
    return np.array(background)

In [3]:
def find_score(file_name):
    split = file_name.split('_')
    if (split[1].lower() == '1.png'):
        return 0
    else:
        return 1

In [1]:
patterns = ['cross', 'face', 'rail', 'rombo', 'rett_diag', 'rect', 'cross_vert']

In [5]:
import tensorflow as tf
from tensorflow.keras.models import Model, load_model
import os
import cv2
import pandas as pd 
import json

root = '../'
input_shape = (100, 100, 3)
models_folder = root + 'first_model_old_3'

template_dir = os.path.join(root, 'templates')

for p in range(len(patterns)):
    # if p < 4:
    #     continue
    
    im_template = cv2.imread(os.path.join(template_dir, patterns[p] + ".png"), cv2.IMREAD_GRAYSCALE)
    im_template = background_thumbnail(im_template, 'L', (input_shape[0], input_shape[1]))
    im_template = im_template.astype('float32')
    im_template /= 255.
    im_template = np.repeat(im_template[...,np.newaxis], 3, -1)

    model = load_model(models_folder + '/best_model_triplet_' + patterns[p] + '_transfer.hdf5', 
                                custom_objects={'batch_hard_triplet_loss': batch_hard_triplet_loss,
                                                'compute_accuracy_hard': compute_accuracy_hard})

    images = os.listdir(os.path.join(root, 'cropped_completi', str(p))) 

    anchors_dataset = [im_template for i in range(len(images)) ]
    images_dataset = []
    labels_dataset = []

    for img in images:
        image_matrix = cv2.imread(os.path.join(root, 'cropped_completi', str(p), img), cv2.IMREAD_GRAYSCALE)
        image_padded = background_thumbnail(image_matrix, 'L', (input_shape[0], input_shape[1]))
        image_padded = image_padded.astype('float32')
        image_padded /= 255.
        image_padded = np.repeat(image_padded[...,np.newaxis], 3, -1)

        images_dataset.append(image_padded)
        labels_dataset.append(find_score(img))

    images_dataset = np.array(images_dataset)
    anchors_dataset= np.array(anchors_dataset)
    embeddings = model.predict([images_dataset, anchors_dataset], batch_size=32)

    distances = _pairwise_distances(embeddings, squared=False).numpy().reshape(-1)

    embeddings_dataframe = pd.DataFrame(columns=['name', 'scores', 'labels', 'embeddings'])

    embeddings2 = [json.dumps(e.tolist()) for e in list(embeddings)]
    images2 = [i.split('_')[0] for i in images]

    embeddings_dataframe['name'] = images2
    embeddings_dataframe['scores'] = distances
    embeddings_dataframe['labels'] = labels_dataset
    embeddings_dataframe['embeddings'] = embeddings2

    embeddings_dataframe.to_csv('../results/embeddings_' + str(p) + '.csv', header = True, index=False)



# model.