Training for a similarity model using triplet loss.

Babed on this tutorial: https://keras.io/examples/vision/siamese_network/

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import os
import random
import tensorflow as tf
from pathlib import Path

2021-09-17 00:08:41.237082: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-17 00:08:41.237112: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
from tensorflow.keras import applications
from tensorflow.keras import layers
from tensorflow.keras import losses
from tensorflow.keras import optimizers
from tensorflow.keras import metrics
from tensorflow.keras import Model
from tensorflow.keras.applications import resnet

In [3]:
import tensorflow

In [4]:
tensorflow.__version__

'2.6.0'

In [5]:
# target_shape = (28, 28)
target_shape = (32,32)
# target_shape = (56, 56)

In [6]:
def preprocess_image(filename):
    """
    Load the specified file as a JPEG image, preprocess it and
    resize it to the target shape.
    """

    image_string = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(image_string, channels=3)
    image = tf.image.convert_image_dtype(image, tf.float32)
    image = tf.image.resize(image, target_shape)
    return image


def preprocess_triplets(anchor, positive, negative):
    """
    Given the filenames corresponding to the three images, load and
    preprocess them.
    """

    return (
        preprocess_image(anchor),
        preprocess_image(positive),
        preprocess_image(negative),
    )

def visualize(anchor, positive, negative, num_samples=3):
    """Visualize a few triplets from the supplied batches."""

    def show(ax, image):
        ax.imshow(image)
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)

    fig = plt.figure(figsize=(9, 3*num_samples))

    axs = fig.subplots(num_samples,3)
    for i in range(num_samples):
        show(axs[i, 0], anchor[i])
        show(axs[i, 1], positive[i])
        show(axs[i, 2], negative[i])

In [7]:
image_folder = '/mnt/osn3/caceres/classes/geological/geological_similarity'

classes = [ 'andesite', 'gneiss', 'marble', 'quartzite', 'rhyolite', 'schist']

In [8]:
import glob

In [9]:
filenames = {}
for i, label in enumerate(classes):
    image_list = glob.glob(image_folder + '/' + label + '/*.jpg')
    filenames[i] = []
    for filename in image_list:
        filenames[i].append(filename)

In [10]:
[len(v) for k,v in filenames.items()]

[5000, 5000, 4998, 5000, 5000, 5000]

In [11]:
test_perc = 0.2

train_set = []
test_set = []
for k,v in filenames.items():
    random.shuffle(v)
    test_count = int(test_perc * len(v))
    test_set.extend(v[:test_count])
    train_set.extend(v[test_count:])

In [12]:
len(train_set), len(test_set)

(23999, 5999)

# Prepare data

In [13]:
def train_gen(image_list, batch_size=32):
    
    while True:
        # choose random sample for batch
        anchors = random.sample(image_list, batch_size)
        #print(anchors)
        positives = []
        negatives = []
        # get positive and negative
        #c = 0
        for anchor in anchors:
            #print(c)
            class_name, image_name = os.path.split(anchor)
            _, class_name = os.path.split(class_name)
    
            failed = True
            while failed:
                positive = random.choice(image_list)
                pos_class_name, pos_image_name = os.path.split(positive)
                _, pos_class_name = os.path.split(pos_class_name)
                #print('pos', class_name, image_name, pos_class_name, pos_image_name)
                if class_name == pos_class_name and image_name != pos_image_name:
                    failed = False
            positives.append(positive)
            
            failed = True
            while failed:
                negative = random.choice(image_list)
                neg_class_name, neg_image_name = os.path.split(negative)
                _, neg_class_name = os.path.split(neg_class_name)
                #print('neg', class_name, image_name, neg_class_name, neg_image_name)
                if class_name != neg_class_name:
                    failed = False
            negatives.append(negative)
            #c += 1
            
        tuple_triplets = [preprocess_triplets(anchor, pos, neg) for anchor, pos, neg in zip(anchors, positives, negatives)]

        anchors, positives, negatives = zip(*tuple_triplets)
        anchors = tensorflow.stack(anchors,0)
        positives = tensorflow.stack(positives,0)
        negatives = tensorflow.stack(negatives,0)
        
        dataset = [anchors, positives, negatives]
        
        #         print(anchors.shape)
        #         print(positives.shape)
        #         anchors = tf.data.Dataset.from_tensor_slices(anchors)
        #         positives = tf.data.Dataset.from_tensor_slices(positives)
        #         negatives = tf.data.Dataset.from_tensor_slices(negatives)

        #         dataset = tf.data.Dataset.zip((anchors, positives, negatives))

        #         dataset = dataset.batch(batch_size, drop_remainder=False)
        #         dataset = dataset.prefetch(8)
    
        #print('yielding')
        yield dataset

In [14]:
o = next(train_gen(test_set, 4))

2021-09-17 00:08:43.098781: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-09-17 00:08:43.099368: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-09-17 00:08:43.099412: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublas.so.11'; dlerror: libcublas.so.11: cannot open shared object file: No such file or directory
2021-09-17 00:08:43.099449: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcublasLt.so.11'; dlerror: libcublasLt.so.11: cannot open shared object file: No such file or directory
2021-09-17 00:08:43.101024: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Co

In [None]:
visualize(*list(next(train_gen(test_set, batch_size=8))),num_samples=5)

In [None]:
base_cnn = resnet.ResNet50(
    weights="imagenet", input_shape=target_shape + (3,), include_top=False
)

flatten = layers.Flatten()(base_cnn.output)
dense1 = layers.Dense(512, activation="relu")(flatten)
dense1 = layers.BatchNormalization()(dense1)
dense2 = layers.Dense(256, activation="relu")(dense1)
dense2 = layers.BatchNormalization()(dense2)
output = layers.Dense(256)(dense2)

embedding = Model(base_cnn.input, output, name="Embedding")

trainable = False
for layer in base_cnn.layers:
    if layer.name == "conv5_block1_out":
        trainable = True
    layer.trainable = trainable

In [None]:
class DistanceLayer(layers.Layer):
    """
    This layer is responsible for computing the distance between the anchor
    embedding and the positive embedding, and the anchor embedding and the
    negative embedding.
    """

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def call(self, anchor, positive, negative):
        ap_distance = tf.reduce_sum(tf.square(anchor - positive), -1)
        an_distance = tf.reduce_sum(tf.square(anchor - negative), -1)
        return (ap_distance, an_distance)


anchor_input = layers.Input(name="anchor", shape=target_shape + (3,))
positive_input = layers.Input(name="positive", shape=target_shape + (3,))
negative_input = layers.Input(name="negative", shape=target_shape + (3,))

distances = DistanceLayer()(
    embedding(resnet.preprocess_input(anchor_input)),
    embedding(resnet.preprocess_input(positive_input)),
    embedding(resnet.preprocess_input(negative_input)),
)

siamese_network = Model(
    inputs=[anchor_input, positive_input, negative_input], outputs=distances
)

In [None]:
class SiameseModel(Model):
    """The Siamese Network model with a custom training and testing loops.

    Computes the triplet loss using the three embeddings produced by the
    Siamese Network.

    The triplet loss is defined as:
       L(A, P, N) = max(‖f(A) - f(P)‖² - ‖f(A) - f(N)‖² + margin, 0)
    """

    def __init__(self, siamese_network, margin=0.5):
        super(SiameseModel, self).__init__()
        self.siamese_network = siamese_network
        self.margin = margin
        self.loss_tracker = metrics.Mean(name="loss")

    def call(self, inputs):
        #print(type(inputs))
        #print([x.shape for x in inputs]) #, len(inputs), inputs.shape)
        return self.siamese_network(inputs)

    def train_step(self, data):
        # GradientTape is a context manager that records every operation that
        # you do inside. We are using it here to compute the loss so we can get
        # the gradients and apply them using the optimizer specified in
        # `compile()`.
        with tf.GradientTape() as tape:
            loss = self._compute_loss(data)

        # Storing the gradients of the loss function with respect to the
        # weights/parameters.
        gradients = tape.gradient(loss, self.siamese_network.trainable_weights)

        # Applying the gradients on the model using the specified optimizer
        self.optimizer.apply_gradients(
            zip(gradients, self.siamese_network.trainable_weights)
        )

        # Let's update and return the training loss metric.
        self.loss_tracker.update_state(loss)
        return {"loss": self.loss_tracker.result()}

    def test_step(self, data):
        loss = self._compute_loss(data)

        # Let's update and return the loss metric.
        self.loss_tracker.update_state(loss)
        return {"loss": self.loss_tracker.result()}

    def _compute_loss(self, data):
        # The output of the network is a tuple containing the distances
        # between the anchor and the positive example, and the anchor and
        # the negative example.
        ap_distance, an_distance = self.siamese_network(data)

        # Computing the Triplet Loss by subtracting both distances and
        # making sure we don't get a negative value.
        loss = ap_distance - an_distance
        loss = tf.maximum(loss + self.margin, 0.0)
        return loss

    @property
    def metrics(self):
        # We need to list our metrics here so the `reset_states()` can be
        # called automatically.
        return [self.loss_tracker]

In [None]:
batch_size = 32
steps_per_epoch = len(train_set) // batch_size

validation_steps = len(test_set) // batch_size

steps_per_epoch,validation_steps

In [None]:
siamese_model = SiameseModel(siamese_network, margin=10)
siamese_model.compile(optimizer=optimizers.Adam(0.0001))

lr_steps_callback = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss', factor=0.1, patience=3, verbose=0,
    mode='auto', min_delta=0.0001, cooldown=0, min_lr=0)

es_callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=10)

# siamese_model.fit(train_dataset, epochs=50, validation_data=val_dataset)

history = siamese_model.fit(train_gen(train_set, batch_size=batch_size), 
                  epochs=50, 
                  validation_data=train_gen(test_set, batch_size=batch_size),
                  steps_per_epoch=steps_per_epoch,
                  validation_steps=validation_steps, 
                  callbacks=[lr_steps_callback, es_callback])

# siamese_model.fit(input_fn(train_set, batch_size=32), 
#                   epochs=50, 
#                   validation_data=input_fn(test_set, batch_size=32))

In [None]:
# sample = next(iter(train_dataset))
sample = next(train_gen(train_set, batch_size=batch_size))
visualize(*sample)

anchor, positive, negative = sample
anchor_embedding, positive_embedding, negative_embedding = (
    embedding(resnet.preprocess_input(anchor)),
    embedding(resnet.preprocess_input(positive)),
    embedding(resnet.preprocess_input(negative)),
)

In [None]:
cosine_similarity = metrics.CosineSimilarity()

positive_similarity = cosine_similarity(anchor_embedding, positive_embedding)
print("Positive similarity:", positive_similarity.numpy())

negative_similarity = cosine_similarity(anchor_embedding, negative_embedding)
print("Negative similarity", negative_similarity.numpy())

# Positive similarity: 0.99999315
# Negative similarity 0.9999791

In [None]:
breakhere

# Save model

In [None]:
save_path = '/home/ccaceresgarcia/Documents/Projects/image_search/ImageSearch/tripletExtractor/'

In [None]:
tensorflow.saved_model.save(embedding, save_path)

In [None]:
embedding.save('TripletResnet50_v1.h5')

In [None]:
plt.plot(list(embedding(resnet.preprocess_input(anchor))[0,...].numpy().reshape(-1)))

In [None]:
anchor

In [None]:
im = anchor[0,...].numpy()

In [None]:
im.shape

In [None]:
plt.imshow(im)

# Get similar images

In [None]:

labels = []
filenames = []
features = []
for i, label in enumerate(classes):
    image_list = glob.glob(image_folder + '/' + label + '/*.jpg')

    for filename in image_list:

        img = preprocess_image(filename)
        img = tensorflow.expand_dims(img, 0)
        
        feats = list(embedding(resnet.preprocess_input(img)).numpy().reshape(-1))
        features.append(feats)
        labels.append(label)
        filenames.append(filename)
        
features2 = np.array(features)
print(features2.shape)

## Write vectors to file for s3 upload

In [None]:
f3 = [str(list(x)) for x in features]

In [None]:
import csv

with open("resnet_triplet_vectors.csv", "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerows(f3)

In [None]:
label_features = {}
for i, label in enumerate(classes):
    label_features[i] = [(x, name) for name,x, la in zip(filenames, f3, labels) if la == label]

In [None]:
[len(x) for i, x in label_features.items()]

### Per Class files

In [None]:
import json

In [None]:
for i, label in enumerate(classes):
    f4, lab_names = zip(*label_features[i])
    
    feature_data = {'labels':label, 
                'filenames':lab_names,
                'features':f4}
    
    with open('resnet_triplet_vectors_{}.json'.format(label), 'w') as f:
        json.dump(feature_data, f)

In [None]:
feature_data = {'labels':labels, 
                'filenames':filenames,
                'features':f3}

In [None]:
import json
with open('resnet_triplet_vectors.json', 'w') as f:
    json.dump(feature_data, f)

## Make KNN object

In [None]:
# for i, label in enumerate(classes):
#     image_list = glob.glob(image_folder + '/' + label + '/*.jpg')
#     print(i,label, len(image_list))
    
# len(labels), len(filenames), len(features)

In [None]:
from sklearn.neighbors import NearestNeighbors

In [None]:
neighbors = NearestNeighbors(n_neighbors=30, algorithm='brute',metric='euclidean').fit(features)

In [None]:
# import pickle 

# # Its important to use binary mode 
# knnPickle = open('knnpickle_file', 'wb') 

# # source, destination 
# pickle.dump(neighbors, knnPickle)                      

# # load the model from disk
# loaded_model = pickle.load(open('knnpickle_file.pickle', 'rb'))

# with open('filenames.pickle', 'wb') as f:
#     # Pickle the 'data' dictionary using the highest protocol available.
#     pickle.dump(filenames, f, pickle.HIGHEST_PROTOCOL)

# with open('filenames.pickle', 'rb') as f:
#     # The protocol version used is detected automatically, so we do not
#     # have to specify it.
#     loaded_filenames = pickle.load(f)

In [None]:
# boto3
# !pip3 install requests_aws4auth
# !pip3 install elasticsearch=='7.13.4' #https://opensearch.org/docs/clients/index/


In [None]:
from io import BytesIO
import base64
import glob
import os
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
import random

In [None]:
k = 4

for query in [random.randint(0,len(filenames)-1) for _ in range(10)]:
    
    img = preprocess_image(filenames[query])
    img = tensorflow.expand_dims(img, 0)
    flat_feature = list(embedding(resnet.preprocess_input(img)).numpy().reshape(-1))
    distances, indices = neighbors.kneighbors([flat_feature])
    
    _, axes = plt.subplots(1,k,figsize=(16,4))
    for i in range(k):
        # load the image
        match = indices[0][i]
        image = Image.open(filenames[match])
        # convert image to numpy array
        im = np.asarray(image)
        axes.flat[i].imshow(im)
        axes.flat[i].set_title('{}-{} \n {}-{}-{:.3f}'.format(query, 
                                                              labels[query], 
                                                              match, 
                                                              labels[match], 
                                                              round(distances[0][i],3)))


In [None]:
def get_class(x):
    return os.path.split(os.path.split(x)[0])[1]

get_class(filenames[0])

In [None]:

def get_features(filename):
    img = preprocess_image(filename)
    img = tensorflow.expand_dims(img, 0)
    flat_feature = list(embedding(resnet.preprocess_input(img)).numpy().reshape(-1))
    return flat_feature

def calculate_accuracy(model_func, filenames, k=5):
    correct = 0
    incorrect = 0
    for i in range(len(filenames)):

        flat_feature = model_func(filenames[i])
        distances, indices = neighbors.kneighbors([flat_feature])
        #print(len(indices[0]))
        for j in range(k):
            if get_class(filenames[indices[0][j]]) == get_class(filenames[i]):
                correct += 1
            else:
                incorrect += 1
                
    return correct/(correct+incorrect)

for k in range(1,20):
    ts = time.time()
    acc = calculate_accuracy(get_features, filenames, k) 
    print(k, ':', acc, 'took: ', time.time() - ts)

In [None]:
import time

In [None]:
for k in range(1,20):
    ts = time.time()
    acc = calculate_accuracy(get_features, filenames, k) 
    print(k, ':', acc, 'took: ', time.time() - ts)

# 1 : 1.0 took:  3362.8139638900757
# 2 : 0.9531468764584305 took:  3293.8385870456696
# 3 : 0.9357290486032402 took:  3257.395818710327
# 4 : 0.9269367957863858 took:  3337.58216547966
# 5 : 0.9213880925395026 took:  3263.669928789139
# 6 : 0.917550058892815 took:  3321.3202335834503
# 7 : 0.9145133485089482 took:  3265.5803034305573
# 8 : 0.9123941596106407 took:  3340.8867971897125
# 9 : 0.910668118615315 took:  3289.156872034073
# 10 : 0.9092439495966398 took:  3318.057454586029
# 11 : 0.9080847814096698 took:  3267.5558347702026
