# SETUP

In [0]:
GDRIVE_ROOT = '/content/gdrive'
trainer = 'Vinh'

In [3]:
from google.colab import drive
drive.mount(GDRIVE_ROOT, force_remount=True)

Mounted at /content/gdrive


In [4]:
!rm -r triplet-cnn-cbir
!git clone https://github.com/VictorNM/triplet-cnn-cbir.git

Cloning into 'triplet-cnn-cbir'...
remote: Enumerating objects: 196, done.[K
remote: Counting objects: 100% (196/196), done.[K
remote: Compressing objects: 100% (112/112), done.[K
remote: Total 730 (delta 120), reused 156 (delta 82), pack-reused 534[K
Receiving objects: 100% (730/730), 27.38 MiB | 3.97 MiB/s, done.
Resolving deltas: 100% (411/411), done.


In [5]:
import sys
sys.path.append('triplet-cnn-cbir')

import os
import numpy as np

import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from keras.models import load_model
from keras.optimizers import SGD
from keras.utils import Sequence

from src import data, models, visualization, utils, callbacks, experiment

Using TensorFlow backend.


# RUN

## Define function and class

In [0]:
def pairwise_distances(features, square=False):
  dot_product = np.matmul(features, np.transpose(features))
  square_norm = K.diag_part(dot_product)
  distances = np.expand_dims(square_norm, 0) - 2.0 * dot_product + np.expand_dims(square_norm, 1)
  distances = np.maximum(distances, 0.0)
  if not squared:
    mask = np.to_float(np.equal(distances, 0.0))
    distances = distances + mask * 1e-16

    distances = np.sqrt(distances)

    distances = distances * (1.0 - mask)

  return distances

In [0]:
def _get_anchor_positive_triplet_mask(labels):
  indices_equal = np.cast(np.eye(np.shape(labels)[0]), np.bool)
  indices_not_equal = np.logical_not(indices_equal)
  
  labels_equal = np.equal(np.expand_dims(labels, 0), np.expand_dims(labels, 1))
  mask = np.logical_and(indices_not_equal, labels_equal)
  
  return mask

def _get_anchor_negative_triplet_mask(labels):
  labels_equal = np.equal(np.expand_dims(labels, 0), np.expand_dims(labels, 1))
  
  mask = np.logical_not(labels_equal)

  return mask

In [0]:
triplet_loss()

def triplet_loss(y_true, y_pred):
  margin = 0.2
  a, p, n = inputs
  
  p_dist = K.sqrt(K.sum(K.square(a-p), axis=-1))
  n_dist = K.sqrt(K.sum(K.square(a-n), axis=-1))
  
  loss = K.mean(K.maximum(p_dist - n_dist + self.alpha, 0), axis=0)
  
  return loss  

In [10]:
cnn_clf_path = '/content/gdrive/My Drive/bku-dissertation/model/Vinh/2019-05-17 00:19:54.350018.h5'
cnn_classifier = load_model(cnn_clf_path )

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.


In [0]:
cnn_extractor = models.build_cnn_extractor(cnn_classifier, features_layer='fc2')

for i in range(len(cnn_extractor.layers)):
  if cnn_extractor.layers[i].name != 'fc2' and cnn_extractor.layers[i].name != 'fc1':
    cnn_extractor.layers[i].trainable = False

In [16]:
mAP_normal = experiment.mAP_normal(
    extractor=cnn_extractor,
    db=(x_valid, y_valid),
    queries=(x_valid, y_valid)
)
print(mAP_normal)

0.7672082483805169


In [17]:
mAP_kmeans = experiment.mAP_kmeans(
    extractor=cnn_extractor,
    db=(x_valid, y_valid),
    queries=(x_valid, y_valid)
)

print(mAP_kmeans)

0.7714395649592363


In [0]:
from keras.layers import Layer, Input
import keras.backend as K

class TripletLossLayer(Layer):
    def __init__(self, alpha, **kwargs):
        self.alpha = alpha
        super(TripletLossLayer, self).__init__(**kwargs)
    
    def triplet_loss(self, inputs):
        a, p, n = inputs
        p_dist = K.sqrt(K.sum(K.square(a-p), axis=-1))
        n_dist = K.sqrt(K.sum(K.square(a-n), axis=-1))
        return K.mean(K.maximum(p_dist - n_dist + self.alpha, 0), axis=0)
    
    def call(self, inputs):
        loss = self.triplet_loss(inputs)
        self.add_loss(loss)
        return loss

In [19]:
cnn_extractor.name = 'cnn_extractor'

a = Input(shape=(224, 224, 3))
p = Input(shape=(224, 224, 3))
n = Input(shape=(224, 224, 3))
a_cnn = cnn_extractor(a)
p_cnn = cnn_extractor(p)
n_cnn = cnn_extractor(n)

triplet_loss_layer = TripletLossLayer(alpha=0.2, name='triplet_loss_layer')([a_cnn, p_cnn, n_cnn])

triplet_model = Model(inputs=[a, p, n], outputs=triplet_loss_layer)
triplet_model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
input_3 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
cnn_extractor (Model)           (None, 4096)         134260544   input_1[0][0]                    
                                                                 input_2[0][0]                    
          

In [0]:
import time
from math import inf
def get_triplet_index_hard(features, labels):
    triplets = []

    for i in range(len(labels)):
        max_pos_d = 0
        max_pos_idx = None
        min_neg_d = inf
        min_neg_idx = None

        for j in range(len(labels)):
            if j == i:  # ignore the same image
                continue

            distance = np.sqrt(np.sum(np.square(features[i] - features[j])))

            if labels[j] == labels[i]:  # positive
                if distance > max_pos_d:
                    max_pos_d = distance
                    max_pos_idx = j

            else:  # negative
                if distance < min_neg_d:
                    min_neg_d = distance
                    min_neg_idx = j

        if max_pos_d + 0.2 >= min_neg_d:
          triplets.append([i, max_pos_idx, min_neg_idx])
        
    print('Len triplets {} / {}'.format(len(triplets), len(labels)))
    return triplets

In [21]:
input_size = (224, 224)

train_path = '/content/gdrive/My Drive/bku-dissertation/data/raw/_2_fan_kettle_600_200_200/train'

train_datagen = ImageDataGenerator(rescale=1./255)

train_generator_0 = train_datagen.flow_from_directory(
    train_path, 
    input_size, 
    batch_size=16,
    classes=['fan'])

train_generator_1 = train_datagen.flow_from_directory(
    train_path, 
    input_size, 
    batch_size=16, 
    classes=['kettle']
)

Found 600 images belonging to 1 classes.
Found 600 images belonging to 1 classes.


In [0]:
train_generator_1

In [0]:
def get_triplets_images(triplets_index, x):
  num_triplets = len(triplets_index)
  image_shape = x.shape[1:]
  anchors = np.empty(shape=(0,) + image_shape)
  positives = np.empty(shape=(0,) + image_shape)
  negatives = np.empty(shape=(0,) + image_shape)
  
  for i in range(num_triplets):
    ai, pi, ni = triplets_index[i]
#     print(ai, pi, ni)
    
    if pi is None or ni is None:
      continue
      
    a = x[ai]
    p = x[pi]
    n = x[ni]
    
    anchors = np.append(anchors, np.expand_dims(a, axis=0), axis=0)
    positives = np.append(positives, np.expand_dims(p, axis=0), axis=0)
    negatives = np.append(negatives, np.expand_dims(n, axis=0), axis=0)
    
  return anchors, positives, negatives

In [0]:
class TripletGenerator(Sequence):
  def __init__(self, extractor, gen0, gen1):
    self.gen0 = gen0
    self.gen0.reset()
    self.gen1 = gen1
    self.gen1.reset()
    self.extractor = extractor
    self.extractor._make_predict_function()

  def __len__(self):
    return min(self.gen0.__len__(), self.gen1.__len__())
  
  def __getitem__(self, idx):
    x0_batch, y0_batch = self.gen0.__getitem__(idx)
    x1_batch, y1_batch = self.gen1.__getitem__(idx)
    
    y0_batch = np.argmax(y0_batch, axis=1)
    y1_batch = np.argmax(y1_batch, axis=1) + 1
    
    x_batch = np.concatenate((x0_batch, x1_batch))
    y_batch = np.concatenate((y0_batch, y1_batch))
    
    feature_batch = self.extractor.predict(x_batch)    
    
    triplet_index_batch = get_triplet_index_hard(feature_batch, y_batch)
    a, p, n = get_triplets_images(triplet_index_batch, x_batch)
    
    return [a, p, n], None
  
  def on_epoch_end(self):
    self.gen0.reset()
    self.gen1.reset()
  
train_triplet_generator = TripletGenerator(
    triplet_model.get_layer('cnn_extractor'),
    train_generator_0,
    train_generator_1
)

In [0]:
train_triplet_generator.__len__()

38

In [0]:
# compile model
optimizer_params = {
    "lr": 0.001
}

opt = SGD(**optimizer_params)

triplet_model.compile(loss=None, optimizer=opt)

In [25]:
import gc
gc.collect()

4

In [27]:
triplet_model.fit_generator(train_triplet_generator, epochs=5) 

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x7f43901a9208>

In [13]:
input_size = (224, 224)

valid_path = '/content/gdrive/My Drive/bku-dissertation/data/raw/_2_fan_kettle_600_200_200/valid'

valid_datagen = ImageDataGenerator(rescale=1./255)
valid_generator = valid_datagen.flow_from_directory(valid_path, input_size)

Found 400 images belonging to 2 classes.


In [0]:
x_valid, y_valid = data.convert_generator_to_data(valid_generator)

In [15]:
cnn_classifier.evaluate(x_valid, keras.utils.to_categorical(y_valid), 2)



[0.31250551495264517, 0.89]

In [28]:
mAP_normal = experiment.mAP_normal(
    extractor=cnn_extractor,
    db=(x_valid, y_valid),
    queries=(x_valid, y_valid)
)
print(mAP_normal)

0.8136425008059011


In [29]:
mAP_kmeans = experiment.mAP_kmeans(
    extractor=cnn_extractor,
    db=(x_valid, y_valid),
    queries=(x_valid, y_valid)
)

print(mAP_kmeans)

0.8681053548786035
