# SETUP

In [0]:
GDRIVE_ROOT = '/content/gdrive'
trainer = 'Vinh'

In [2]:
from google.colab import drive
drive.mount(GDRIVE_ROOT, force_remount=True)

Mounted at /content/gdrive


In [3]:
!rm -r triplet-cnn-cbir
!git clone https://github.com/VictorNM/triplet-cnn-cbir.git

Cloning into 'triplet-cnn-cbir'...
remote: Enumerating objects: 8, done.[K
remote: Counting objects: 100% (8/8), done.[K
remote: Compressing objects: 100% (6/6), done.[K
remote: Total 897 (delta 2), reused 4 (delta 2), pack-reused 889[K
Receiving objects: 100% (897/897), 59.16 MiB | 11.54 MiB/s, done.
Resolving deltas: 100% (527/527), done.


In [4]:
import sys
sys.path.append('triplet-cnn-cbir')

import os
import numpy as np

import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model, load_model
from keras.layers import Input
from keras.optimizers import SGD
from keras.utils import Sequence

from src import data, models, visualization, utils, callbacks, experiment, triplet

Using TensorFlow backend.


# RUN

## Define parameters

In [0]:
DISSERTATION_ROOT = os.path.join(GDRIVE_ROOT, 'My Drive/bku-dissertation')
DATA_ROOT = os.path.join(DISSERTATION_ROOT, 'data')
DATASET_NAME = '_2_fan_kettle_600_200_200'


input_size = (224, 224)
margin = 0.2
features_layer = 'fc2'

## Prepare data

In [6]:
# load x_valid, y_valid for evaluate
valid_path = os.path.join(DATA_ROOT, 'raw', DATASET_NAME, 'valid')

valid_datagen = ImageDataGenerator(rescale=1./255)
valid_generator = valid_datagen.flow_from_directory(valid_path, input_size)

x_valid, y_valid = data.convert_generator_to_data(valid_generator)

# get name of classes
classes = [[] for i in range(valid_generator.num_classes)]
for k, v in valid_generator.class_indices.items():
  classes[v] = k
  
print('Classes:', classes)

Found 400 images belonging to 2 classes.
Classes: ['fan', 'kettle']


## Load CNN

In [7]:
cnn_name = '2019-05-24 21:06:53'
cnn_path = os.path.join(DISSERTATION_ROOT, 'model', trainer, cnn_name + '.h5')
cnn_classifier = load_model(cnn_path)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.


## Evaluate before train with triplet loss

In [8]:
cnn_classifier.evaluate(x_valid, keras.utils.to_categorical(y_valid, 2))



[0.27537485778331755, 0.8975]

In [0]:
cnn_extractor = models.build_cnn_extractor(cnn_classifier, features_layer='fc2')

In [0]:
mAP_normal = experiment.mAP_normal(
    extractor=cnn_extractor,
    x=x_valid,
    y=y_valid
)
print(mAP_normal)

0.7790436353963364


In [0]:
mAP_kmeans = experiment.mAP_kmeans(
    extractor=cnn_extractor,
    x=x_valid,
    y=y_valid
)
print(mAP_kmeans)

In [11]:
ks = [10, 20, 30, 40, 50]
for k in ks:
  mean_precision_at_k_normal = experiment.mean_precision_at_k(cnn_extractor, x_valid, y_valid, k)
  print(mean_precision_at_k_normal)

0.865
0.850625
0.8435833333333335
0.839625
0.8305500000000001


In [12]:
for k in ks:
  mean_precision_at_k_kmeans = experiment.mean_precision_at_k_kmeans(cnn_extractor, x_valid, y_valid, k)
  print(mean_precision_at_k_kmeans)

0.8634999999999999
0.8504999999999999
0.8426666666666668
0.8383125
0.8294


## Train with triplet loss

In [0]:
# build triplet model
triplet_extractor = models.build_triplet_extractor(
    cnn_classifier,
    margin=margin,
    features_layer=features_layer
)

In [9]:
# load data for training
train_triplet_generator = triplet.TripletGenerator(
    extractor = triplet_extractor.get_layer('extractor'),
    margin=margin,
    directory=os.path.join(DATA_ROOT, 'raw', DATASET_NAME, 'train'),
    batch_size=32
)

Found 600 images belonging to 1 classes.
Found 600 images belonging to 1 classes.


In [10]:
# load data for testing
valid_triplet_generator = triplet.TripletGenerator(
    extractor = triplet_extractor.get_layer('extractor'),
    margin=margin,
    directory=os.path.join(DATA_ROOT, 'raw', DATASET_NAME, 'valid'),
    batch_size=32
)

Found 200 images belonging to 1 classes.
Found 200 images belonging to 1 classes.


In [0]:
# compile model
optimizer_params = {
    "lr": 0.01,
    "momentum": 0.9
}

opt = SGD(**optimizer_params)

triplet_extractor.compile(loss=None, optimizer=opt)

In [12]:
history = triplet_extractor.fit_generator(
      train_triplet_generator, 
      epochs=10, 
      validation_data=valid_triplet_generator
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


## Evaluate triplet model

In [15]:
mAP_normal = experiment.mAP_normal(
    extractor=triplet_extractor.get_layer('extractor'),
    x=x_valid,
    y=y_valid
)
print(mAP_normal)

0.8019843952696243


In [17]:
mAP_kmeans = experiment.mAP_kmeans(
    extractor=triplet_extractor.get_layer('extractor'),
    x=x_valid,
    y=y_valid
)
print(mAP_kmeans)

0.8007959839100355


In [16]:
ks = [10, 20, 30, 40, 50]
for k in ks:
  mean_precision_at_k_normal = experiment.mean_precision_at_k(triplet_extractor.get_layer('extractor'), x_valid, y_valid, k)
  print(mean_precision_at_k_normal)

0.8719999999999999
0.8615
0.85875
0.8542499999999998
0.8486


In [18]:
for k in ks:
  mean_precision_at_k_kmeans = experiment.mean_precision_at_k_kmeans(triplet_extractor.get_layer('extractor'), x_valid, y_valid, k)
  print(mean_precision_at_k_kmeans)

0.87075
0.86125
0.8582500000000002
0.8526250000000001
0.8473


# POST TRAINING

In [0]:
# save model
final_extractor = triplet_extractor.get_layer('extractor')
# path = 'triplet-' + cnn_name
final_extractor.save('/content/gdrive/My Drive/bku-dissertation/model/Vinh/triplet-2019-05-24 21:06:53.h5')