## GPU Init

In [1]:
# Taken from
# https://stackoverflow.com/questions/48750199/google-colaboratory-misleading-information-about-its-gpu-only-5-ram-available
# memory footprint support libraries/code
!ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi
!pip install gputil
!pip install psutil
!pip install humanize
import psutil
import humanize
import os
import GPUtil as GPU
GPUs = GPU.getGPUs()
# Colab only provides one GPU and it is not always guaranteed
gpu = GPUs[0]
def printm():
  process = psutil.Process(os.getpid())
  print("RAM Free: " + humanize.naturalsize( psutil.virtual_memory().available ), " | Proc size: " + humanize.naturalsize( process.memory_info().rss))
  print("GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total {3:.0f}MB".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))



In [2]:
printm()

('RAM Free: 12.8 GB', ' | Proc size: 91.7 MB')
GPU RAM Free: 15079MB | Used: 0MB | Util   0% | Total 15079MB


## Initialization

In [3]:
# Clone repo
!git clone https://github.com/MatchLab-Imperial/keras_triplet_descriptor

fatal: destination path 'keras_triplet_descriptor' already exists and is not an empty directory.


In [4]:
# Change directory
%cd /content/keras_triplet_descriptor    


/content/keras_triplet_descriptor


In [5]:
# Download data
!wget -O hpatches_data.zip https://imperialcollegelondon.box.com/shared/static/ah40eq7cxpwq4a6l4f62efzdyt8rm3ha.zip


--2020-08-19 06:45:20--  https://imperialcollegelondon.box.com/shared/static/ah40eq7cxpwq4a6l4f62efzdyt8rm3ha.zip
Resolving imperialcollegelondon.box.com (imperialcollegelondon.box.com)... 185.235.236.197
Connecting to imperialcollegelondon.box.com (imperialcollegelondon.box.com)|185.235.236.197|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: /public/static/ah40eq7cxpwq4a6l4f62efzdyt8rm3ha.zip [following]
--2020-08-19 06:45:21--  https://imperialcollegelondon.box.com/public/static/ah40eq7cxpwq4a6l4f62efzdyt8rm3ha.zip
Reusing existing connection to imperialcollegelondon.box.com:443.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://imperialcollegelondon.app.box.com/public/static/ah40eq7cxpwq4a6l4f62efzdyt8rm3ha.zip [following]
--2020-08-19 06:45:21--  https://imperialcollegelondon.app.box.com/public/static/ah40eq7cxpwq4a6l4f62efzdyt8rm3ha.zip
Resolving imperialcollegelondon.app.box.com (imperialcollegelondon.app.bo

In [6]:
# Extract data
!unzip -q ./hpatches_data.zip
!rm ./hpatches_data.zip

replace hpatches/v_man/e3.anisjitter? [y]es, [n]o, [A]ll, [N]one, [r]ename: 

In [7]:
import sys
import json
import os
import glob
import time
import tensorflow as tf
import numpy as np
import cv2
import random
import matplotlib.pyplot as plt

import keras
from keras import backend as K
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Activation, Flatten, Input, Lambda, Reshape
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization 
from keras.layers import Input, UpSampling2D, concatenate  

from read_data import HPatches, DataGeneratorDesc, hpatches_sequence_folder, DenoiseHPatches, tps
from utils import generate_desc_csv, plot_denoise, plot_triplet

Using TensorFlow backend.


In [8]:
def plot_history(history, metric = None):
  # Plots the loss history of training and validation (if existing)
  # and a given metric
  
  if metric != None:
    fig, axes = plt.subplots(2,1)
    axes[0].plot(history.history[metric])
    try:
      axes[0].plot(history.history['val_'+metric])
      axes[0].legend(['Train', 'Val'])
    except:
      pass
    axes[0].set_title('{:s}'.format(metric))
    axes[0].set_ylabel('{:s}'.format(metric))
    axes[0].set_xlabel('Epoch')
    fig.subplots_adjust(hspace=0.5)
    axes[1].plot(history.history['loss'])
    try:
      axes[1].plot(history.history['val_loss'])
      axes[1].legend(['Train', 'Val'])
    except:
      pass
    axes[1].set_title('Model Loss')
    axes[1].set_ylabel('Loss')
    axes[1].set_xlabel('Epoch')
  else:
    plt.plot(history.history['loss'])
    try:
      plt.plot(history.history['val_loss'])
      plt.legend(['Train', 'Val'])
    except:
      pass
    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')

In [9]:
random.seed(1234)
np.random.seed(1234)
tf.random.set_seed(1234)

In [10]:
hpatches_dir = './hpatches'
splits_path = './splits.json'

splits_json = json.load(open(splits_path, 'rb'))
split = splits_json['a']

train_fnames = split['train']
test_fnames = split['test']

seqs = glob.glob(hpatches_dir+'/*')
seqs = [os.path.abspath(p) for p in seqs]   
seqs_train = list(filter(lambda x: x.split('/')[-1] in train_fnames, seqs)) 
seqs_test = list(filter(lambda x: x.split('/')[-1] in split['test'], seqs)) 


## Final UNet/L2Net architecture


In [11]:
def final_UNet(shape):
  """
  Returns the denoising model. 
  The input for the function is the size of the patch, which will be *1x32x32*, 
  and it outputs a keras denoising model.

  """
    
  inputs = Input(shape)
  
  ## Encoder
  conv1 = Conv2D(16, 3, activation = 'relu', padding = 'same', kernel_initializer = 'lecun_uniform')(inputs)
  pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)
  
  ## Bottleneck
  conv2 = Conv2D(32, 3, activation = 'relu', padding = 'same', kernel_initializer = 'lecun_uniform')(pool1)
  pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)  
  
  conv3 = Conv2D(64, 3, activation = 'relu', padding = 'same', kernel_initializer = 'lecun_uniform')(pool2)
  pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)
  
  ## Decoder
  conv4 = Conv2D(128, 3, activation = 'relu', padding = 'same', kernel_initializer = 'lecun_uniform')(pool3)
  
  up5 = Conv2D(256, 2, activation = 'relu', padding = 'same', kernel_initializer = 'lecun_uniform')(UpSampling2D(size = (2,2))(conv4))
  merge5 = concatenate([conv3,up5], axis = -1)
  conv5 = Conv2D(256, 3, activation = 'relu', padding = 'same', kernel_initializer = 'lecun_uniform')(merge5)

  up6 = Conv2D(512, 2, activation = 'relu', padding = 'same', kernel_initializer = 'lecun_uniform')(UpSampling2D(size = (2,2))(conv5))
  merge6 = concatenate([conv2,up6], axis = -1)
  conv6 = Conv2D(512, 3, activation = 'relu', padding = 'same', kernel_initializer = 'lecun_uniform')(merge6)
    
  up7 = Conv2D(1024, 2, activation = 'relu', padding = 'same', kernel_initializer = 'lecun_uniform')(UpSampling2D(size = (2,2))(conv6))
  merge7 = concatenate([conv1,up7], axis = -1)
  conv7 = Conv2D(1024, 3, activation = 'relu', padding = 'same', kernel_initializer = 'lecun_uniform')(merge7)

  conv8 = Conv2D(1,3,padding='same')(conv7)

  shallow_net = Model(inputs = inputs, outputs = conv8)
  
  return shallow_net

def final_L2Net(shape):
  
  '''
  Builds the descriptor model. The input for the function is the size of the patch, 
  which will be *1x32x32*, and it outputs a keras descriptor model. The model we use as 
  baseline returns a descriptor of dimension *128x1*.
  
  '''
  
  init_weights = keras.initializers.glorot_normal()
  
  descriptor_model = Sequential()
  descriptor_model.add(Conv2D(32, 3, padding='same', input_shape=shape, use_bias = True, kernel_initializer=init_weights))
  descriptor_model.add(BatchNormalization(axis = -1))
  descriptor_model.add(Activation('relu'))

  descriptor_model.add(Conv2D(32, 3, padding='same', use_bias = True, kernel_initializer=init_weights))
  descriptor_model.add(BatchNormalization(axis = -1))
  descriptor_model.add(Activation('relu'))

  descriptor_model.add(Conv2D(64, 3, padding='same', strides=2, use_bias = True, kernel_initializer=init_weights))
  descriptor_model.add(BatchNormalization(axis = -1))
  descriptor_model.add(Activation('relu'))

  descriptor_model.add(Conv2D(64, 3, padding='same', use_bias = True, kernel_initializer=init_weights))
  descriptor_model.add(BatchNormalization(axis = -1))
  descriptor_model.add(Activation('relu'))

  descriptor_model.add(Conv2D(128, 3, padding='same', strides=2,  use_bias = True, kernel_initializer=init_weights))
  descriptor_model.add(BatchNormalization(axis = -1))
  descriptor_model.add(Activation('relu'))

  descriptor_model.add(Conv2D(128, 3, padding='same', use_bias = True, kernel_initializer=init_weights))
  descriptor_model.add(BatchNormalization(axis = -1))
  descriptor_model.add(Activation('relu'))
  descriptor_model.add(Dropout(0.3))

  descriptor_model.add(Conv2D(128, 8, padding='valid', use_bias = True, kernel_initializer=init_weights))
  
  # Final descriptor reshape
  descriptor_model.add(Reshape((128,)))
  
  return descriptor_model
  
  
def triplet_loss(x):
  """
  Defines the loss function which is used to train the descriptor model.

  """
  
  output_dim = 128
  a, p, n = x
  _alpha = 1.0
  positive_distance = K.mean(K.square(a - p), axis=-1)
  negative_distance = K.mean(K.square(a - n), axis=-1)
  
  return K.expand_dims(K.maximum(0.0, positive_distance - negative_distance + _alpha), axis = 1)

## Denoising Image Patches


We use the *DenoiseHPatches* class implemented in the read_data.py file, which takes as input the list of sequences to load and the size of batches. 

*DenoiseHPatches* outputs batches where the input data is the noisy image and the label is the clean image, so we can use a mean absolute error (MAE) metric as loss function. 

Afterward, a subset of training and validation sequences by using *random.sample* (3 sequences for training and 1 for validation data). Remove the random.sample function to give the generator all the training data.


In [12]:
denoise_generator = DenoiseHPatches(random.sample(seqs_train, 3), batch_size=32)
denoise_generator_val = DenoiseHPatches(random.sample(seqs_test, 1), batch_size=32)

# Uncomment following lines for using all the data to train the denoising model
# denoise_generator = DenoiseHPatches(seqs_train, batch_size=32)
# denoise_generator_val = DenoiseHPatches(seqs_test, batch_size=32)

100%|██████████| 3/3 [00:03<00:00,  1.18s/it]
100%|██████████| 1/1 [00:00<00:00,  3.79it/s]


In [13]:
shape = (32, 32, 1)
denoise_model = final_UNet(shape)

In [None]:
nadam = keras.optimizers.Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, schedule_decay=0.004)
denoise_model.compile(loss='mean_absolute_error', optimizer=nadam, metrics=['accuracy'])
epochs = 1
### Use a loop to save for each epoch the weights in an external website in
### case colab stops. Every time you call fit/fit_generator the weigths are NOT
### reset, so e.g. calling 5 times fit(epochs=1) behave as fit(epochs=5)
for e in range(epochs):
  denoise_history = denoise_model.fit_generator(generator=denoise_generator, 
                                                epochs=50, verbose=1, 
                                                validation_data=denoise_generator_val)
  # denoise_model.save('denoise.h5') 


Epoch 1/50
   4/2370 [..............................] - ETA: 1:23:34 - loss: 519849.2561 - accuracy: 0.0030    

In [None]:
# Plot learning curves
plot_history(denoise_history)
plot_history(denoise_history,'val_acc')

In [None]:
# Visualization of denoised images
plot_denoise(denoise_model)

## Training a Descriptor Network
In the last section we trained a model that given a noisy patch, outputs a denoised version of it. We hoped that by doing so, we will improve the performance of the second part, which is training a network that outputs the descriptor. As we mentioned, a descriptor is a numerical vector that represents the small images we have. The dataset consists of a large number of small images, which are cropped patches from other larger images. Hence, they represent some local part of a scene. That is why there are no objects represented, only corners or textures. Each of these patches is related to a subset of other patches of the dataset by some kind of geometric transformation (e.g. rotation).  For a given patch, we want the network to output a vector that is close to the vectors of the patches that represent the same local part of a scene, while being far from patches do not represent that local part of a scene.

To do so, we will build a convolutional neural network that takes the input of $32\times32$ and outputs a descriptor of size $128$. For the loss, we use the triplet loss, which takes an anchor patch, a negative patch and a positive patch. The idea is to train the network so the descriptors from the anchor and positive patch have a low distance between them, and the negative and anchor patch have a large distance between them. 

In this cell we generate a triplet network, which is a network formed by three copies of the same network. That means that the descriptor model will compute the descriptor for the input `'a'` (anchor), the same descriptor model (with the same weights) will compute the descriptor for the input `'p'` (positive), and again the same model will compute the descriptor for the input `'n'` (negative). 

**Updated explanation**: Due to the way Keras handles the compile method, it needs a loss as an argument in that compile method. However, our loss is computed in the lambda layer, so we want to minimize the output of that layer. As we want to minimize the output of the Lambda function (in this case the triplet loss), we output as the label in the training_generator a vector of zeros and we compute the mean absolute error of the triplet loss and this vector of zeros. What we aim to minimize is therefore:
$$  |\text{triplet_loss} - 0| =  |\text{triplet_loss}| = \text{triplet_loss} $$



In [None]:
from keras.layers import Lambda
shape = (32, 32, 1)
xa = Input(shape=shape, name='a')
xp = Input(shape=shape, name='p')
xn = Input(shape=shape, name='n')
descriptor_model = final_L2Net(shape)
ea = descriptor_model(xa)
ep = descriptor_model(xp)
en = descriptor_model(xn)

loss = Lambda(triplet_loss)([ea, ep, en])

descriptor_model_trip = Model(inputs=[xa, xp, xn], outputs=loss)
adamax=keras.optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0)
descriptor_model_trip.compile(loss='mean_absolute_error', optimizer=adamax, metrics=['accuracy'])

In [None]:
### Descriptor loading and training
# Loading images
hPatches = HPatches(train_fnames=train_fnames, test_fnames=test_fnames,
                    denoise_model=denoise_model, use_clean=False)
# Creating training generator
training_generator = DataGeneratorDesc(*hPatches.read_image_file(hpatches_dir, train=1), num_triplets=1000000)
# Creating validation generator
val_generator = DataGeneratorDesc(*hPatches.read_image_file(hpatches_dir, train=0), num_triplets=100000)


In [None]:
plot_triplet(training_generator)

In [None]:
# Train Descriptor L2Net

epochs = 1
### As with the denoising model, we use a loop to save for each epoch 
## #the weights in an external website in case colab stops. 
### reset, so e.g. calling 5 times fit(epochs=1) behave as fit(epochs=5)

### If you have a model saved from a previous training session
### Load it in the next line
# descriptor_model_trip.set_weights(keras.models.load_model('./descriptor.h5').get_weights())
# descriptor_model_trip.optimizer = keras.models.load_model('./descriptor.h5').optimizer

for e in range(epochs):
  
  descriptor_history = descriptor_model_trip.fit_generator(generator=training_generator, epochs=50, verbose=1, validation_data=val_generator)
  
  #descriptor_model_trip.save('descriptor.h5') 

In [None]:
# Plot Learning Curves
plot_history(descriptor_history)
plot_history(descriptor_history,'val_acc')

# Descriptor Evaluation
To evaluate the performance of the final model, we will use the HPatches benchmark. HPatches benchmark takes as input the descriptors for the test data in a CSV form. 

This function generates those files by passing it a descriptor model and a denoising model. It performs a first step of denoising the patches, and a second one of computing the descriptor of the denoised patch. If no denoising model is given (variable set to `None`), the descriptor is computed directly in the noisy patch.

Similarly to the loading data part, you have the denoise_model variable and `use_clean` variable. If `use_clean` is set to True, the CSV generated will be those of the clean patches, even if a denoising model is given. If set to False, then depends on the variable `denoise_model`. If there is no denoise model (`denoise_model=None`), then it will use the noisy patches. 

In [None]:
 generate_desc_csv(descriptor_model, seqs_test, denoise_model=denoise_model, use_clean=False)


Now we will perform the evaluation of three different tasks (Verification, Matching and Evaluation) using the descriptor CSV files. The definition of the three different tasks is taken from the [HPatches paper](https://arxiv.org/pdf/1704.05939.pdf).





In [None]:
# Verification: 
# Patch verification measures the ability of a descriptor to classify whether two patches are extracted 
# from the same measurement.

!python ./hpatches-benchmark/hpatches_eval.py --descr-name=custom --descr-dir=/content/keras_triplet_descriptor/out/ --task=verification --delimiter=";"
!python ./hpatches-benchmark/hpatches_results.py --descr=custom --results-dir=./hpatches-benchmark/results/ --task=verification

# Matching:
# Image matching, tests to what extent a descriptor can correctly identify correspondences in two images.
!python ./hpatches-benchmark/hpatches_eval.py --descr-name=custom --descr-dir=/content/keras_triplet_descriptor/out/ --task=matching --delimiter=";"
!python ./hpatches-benchmark/hpatches_results.py --descr=custom --results-dir=./hpatches-benchmark/results/ --task=matching

# Retrieval
# Retrieval tests how well a descriptor can match a query patch to a pool of patches extracted from many images.
!python ./hpatches-benchmark/hpatches_eval.py --descr-name=custom --descr-dir=/content/keras_triplet_descriptor/out/ --task=retrieval --delimiter=";"
!python ./hpatches-benchmark/hpatches_results.py --descr=custom --results-dir=./hpatches-benchmark/results/ --task=retrieval


# 5 UNet epochs with all patches

In [None]:
denoise_generator = DenoiseHPatches(seqs_train, batch_size=32)
denoise_generator_val = DenoiseHPatches(seqs_test, batch_size=32)

shape = (32, 32, 1)
denoise_model = final_UNet(shape)

nadam = keras.optimizers.Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, schedule_decay=0.004)
denoise_model.compile(loss='mean_absolute_error', optimizer=nadam, metrics=['accuracy'])
epochs = 1
### Use a loop to save for each epoch the weights in an external website in
### case colab stops. Every time you call fit/fit_generator the weigths are NOT
### reset, so e.g. calling 5 times fit(epochs=1) behave as fit(epochs=5)
for e in range(epochs):
  denoise_history = denoise_model.fit_generator(generator=denoise_generator, 
                                                epochs=5, verbose=1, 
                                                validation_data=denoise_generator_val)
  # denoise_model.save('denoise.h5') 

In [None]:
# Plot learning curves
plot_history(denoise_history)
plot_history(denoise_history,'val_acc')

In [None]:
from keras.layers import Lambda
shape = (32, 32, 1)
xa = Input(shape=shape, name='a')
xp = Input(shape=shape, name='p')
xn = Input(shape=shape, name='n')
descriptor_model = final_L2Net(shape)
ea = descriptor_model(xa)
ep = descriptor_model(xp)
en = descriptor_model(xn)

loss = Lambda(triplet_loss)([ea, ep, en])

descriptor_model_trip = Model(inputs=[xa, xp, xn], outputs=loss)
adamax=keras.optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0)
descriptor_model_trip.compile(loss='mean_absolute_error', optimizer=adamax, metrics=['accuracy'])

In [None]:
### Descriptor loading and training
# Loading images
hPatches = HPatches(train_fnames=train_fnames, test_fnames=test_fnames,
                    denoise_model=denoise_model, use_clean=False)
# Creating training generator
training_generator = DataGeneratorDesc(*hPatches.read_image_file(hpatches_dir, train=1), num_triplets=1000000)
# Creating validation generator
val_generator = DataGeneratorDesc(*hPatches.read_image_file(hpatches_dir, train=0), num_triplets=100000)


In [None]:
plot_triplet(training_generator)

In [None]:
# Train Descriptor L2Net

epochs = 1
### As with the denoising model, we use a loop to save for each epoch 
## #the weights in an external website in case colab stops. 
### reset, so e.g. calling 5 times fit(epochs=1) behave as fit(epochs=5)

### If you have a model saved from a previous training session
### Load it in the next line
# descriptor_model_trip.set_weights(keras.models.load_model('./descriptor.h5').get_weights())
# descriptor_model_trip.optimizer = keras.models.load_model('./descriptor.h5').optimizer

for e in range(epochs):
  
  descriptor_history = descriptor_model_trip.fit_generator(generator=training_generator, epochs=50, verbose=1, validation_data=val_generator)
  
  #descriptor_model_trip.save('descriptor.h5') 

In [None]:
# Plot Learning Curves
plot_history(descriptor_history)
plot_history(descriptor_history,'val_acc')

In [None]:
 generate_desc_csv(descriptor_model, seqs_test, denoise_model=denoise_model, use_clean=False)

In [None]:
# Verification: 
# Patch verification measures the ability of a descriptor to classify whether two patches are extracted 
# from the same measurement.

!python ./hpatches-benchmark/hpatches_eval.py --descr-name=custom --descr-dir=/content/keras_triplet_descriptor/out/ --task=verification --delimiter=";"
!python ./hpatches-benchmark/hpatches_results.py --descr=custom --results-dir=./hpatches-benchmark/results/ --task=verification

# Matching:
# Image matching, tests to what extent a descriptor can correctly identify correspondences in two images.
!python ./hpatches-benchmark/hpatches_eval.py --descr-name=custom --descr-dir=/content/keras_triplet_descriptor/out/ --task=matching --delimiter=";"
!python ./hpatches-benchmark/hpatches_results.py --descr=custom --results-dir=./hpatches-benchmark/results/ --task=matching

# Retrieval
# Retrieval tests how well a descriptor can match a query patch to a pool of patches extracted from many images.
!python ./hpatches-benchmark/hpatches_eval.py --descr-name=custom --descr-dir=/content/keras_triplet_descriptor/out/ --task=retrieval --delimiter=";"
!python ./hpatches-benchmark/hpatches_results.py --descr=custom --results-dir=./hpatches-benchmark/results/ --task=retrieval
