In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.backend import set_session

tf.keras.backend.clear_session()
tf.reset_default_graph()
config = tf.ConfigProto()
config.gpu_options.allow_growth = True  # dynamically grow the memory used on the GPU
config.log_device_placement = True  # to log device placement (on which device the operation ran)
                                    # (nothing gets printed in Jupyter, only if you run it standalone)
session = tf.InteractiveSession(config=config)
set_session(session)  # set this TensorFlow session as the default session for Keras

In [2]:
classes_number = 10
from random import randint, uniform

batch_size = 128
fc_sizes = [128, 128]
dropouts = [.2, .2]
learning_rate = 6e-4
margin = .1
method = 'embeddings'
#method = 'classification'

log_dir = "logdir-07"

In [3]:
from tensorflow.keras.datasets import cifar10
import numpy


(x_train, y_train), (x_test, y_test) = cifar10.load_data()
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

y_train = tf.keras.utils.to_categorical(y_train, classes_number)
y_test = tf.keras.utils.to_categorical(y_test, classes_number)

mean = np.array([0.4914, 0.4822, 0.4465]).reshape(1, 1, 1, 3)
std = np.array([0.247, 0.243, 0.261]).reshape(1, 1, 1, 3)

x_train = (x_train / 255.0 - mean) / std
x_test = (x_test / 255.0 - mean) / std

x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

split_index = 10000
_validation_percentage = 0.9
_dataset_len = x_train.shape[0]
split_index = int(_dataset_len * _validation_percentage)

indices = numpy.random.permutation(_dataset_len)

train_indices = indices[:split_index]
validation_indices = indices[split_index:]

x, y = x_train, y_train

x_train = x[train_indices]
y_train = y[train_indices]
x_validation = x[validation_indices]
y_validation = y[validation_indices]

shape_x_train = x_train.shape
shape_x_test = x_test.shape
shape_x_validation = x_validation.shape
shape_y_train = y_train.shape
shape_y_test = y_test.shape
shape_y_validation = y_validation.shape

In [4]:
dataset_train = tf.data.Dataset \
    .from_tensor_slices((x_train, y_train)) \
    .shuffle(buffer_size=7000) \
    .batch(batch_size) \

dataset_validation = tf.data.Dataset \
    .from_tensor_slices((x_validation, y_validation)) \
    .batch(batch_size)

dataset_test = tf.data.Dataset \
    .from_tensor_slices((x_test, y_test)) \
    .batch(batch_size)

In [5]:
train_iterator = dataset_train.make_initializable_iterator()
validation_iterator = dataset_validation.make_initializable_iterator()
test_iterator = dataset_test.make_initializable_iterator()

handle = tf.placeholder(tf.string, shape=[])
iterator = tf.data.Iterator.from_string_handle(
    handle, dataset_train.output_types, dataset_train.output_shapes)
next_element = iterator.get_next()

train_iterator = dataset_train.make_initializable_iterator()
validation_iterator = dataset_validation.make_initializable_iterator()
test_iterator = dataset_test.make_initializable_iterator()

train_handle = session.run(train_iterator.string_handle())
validation_handle = session.run(validation_iterator.string_handle())
test_handle = session.run(test_iterator.string_handle())

Instructions for updating:
Colocations handled automatically by placer.


In [6]:
session.run(train_iterator.initializer)
session.run(validation_iterator.initializer)
session.run(test_iterator.initializer)

train_handle = session.run(train_iterator.string_handle())
validation_handle = session.run(validation_iterator.string_handle())
test_handle = session.run(test_iterator.string_handle())

In [7]:
del x, y, x_train, x_test, x_validation, y_train, y_validation, y_test

In [8]:
# load pretrained weights
pretrained = tf.keras.applications.VGG16(weights='imagenet', 
                                         include_top=False, 
                                         classes=classes_number, 
                                         input_tensor=next_element[0])
                                         #input_shape=(32, 32, 3))

for layer in pretrained.layers:
    layer.trainable = False

#last_pretrained_layer = pretrained.get_layer('block4_pool')
last_pretrained_layer = pretrained

fc = last_pretrained_layer.output

fc = tf.keras.layers.Dropout(dropouts[0])(fc)
fc = tf.keras.layers.Flatten(name='flatten')(fc)
fc = tf.keras.layers.Dense(fc_sizes[0], activation='relu', name='fc1')(fc)
fc = tf.keras.layers.Dropout(dropouts[1])(fc)
fc = tf.keras.layers.Dense(fc_sizes[-1], activation='relu', name='fc2')(fc)

embeddings = fc

fc = tf.keras.layers.Dropout(0.3)(fc)
y_pred = tf.keras.layers.Dense(classes_number)(fc)

saver = tf.train.Saver()

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [9]:
# KNN
k = tf.placeholder("int32")
x_keys = tf.placeholder("float", [None, fc_sizes[-1]])
x_queries = tf.placeholder("float", [None, fc_sizes[-1]])
y_keys = tf.placeholder("int32", [None])

distance = tf.reduce_sum(tf.abs(tf.subtract(x_keys, tf.expand_dims(x_queries,1))), axis=2)

_, indices = tf.nn.top_k(tf.negative(distance), k=k)

nearest = tf.gather(y_keys, indices, axis=0)

In [10]:
from metrics.numpy import mean_average_precision
from IPython.display import display, clear_output


def get_embeddings(session, _embeddings, _handle):
    embs = []
    ys = []
    try:
        while True:
            _embedding, _y = session.run([_embeddings, next_element[1]], feed_dict={handle: _handle})
            embs.append(_embedding)
            ys.append(_y)
    except tf.errors.OutOfRangeError:
        pass
    return np.concatenate(embs), np.concatenate(ys)


def knn_classes(embeddings_train, embeddings_target, y_train, items_per_step=20):
    knn_outs = []
    max_step = _y_target.shape[0] // items_per_step
    
    keys_train = np.argmax(y_train, axis=1)
    
    for i in range(max_step):
        clear_output(wait=True)
        res = session.run(nearest, feed_dict={
            x_keys: embeddings_train,
            y_keys: keys_train,
            x_queries: embeddings_target[i*items_per_step:(i+1)*items_per_step],
            k: y_train.shape[0]})
        knn_outs.append(res)
        display('KNN, step[{}/{}]'.format(i + 1, max_step))
        
    knn_res = np.concatenate(knn_outs)
    
    return knn_res

In [11]:
import time
import os

time_str = time.strftime("%m-%d__%H:%M:%S", time.gmtime())
tensorboard_dir = f"{time_str}/adam{learning_rate}_fc{fc_sizes}_dropouts{dropouts}_batch{batch_size}_margin{margin}"

writer_train = tf.summary.FileWriter(
    logdir=os.path.join(log_dir, "train", tensorboard_dir))

writer_validation = tf.summary.FileWriter(
    logdir=os.path.join(log_dir, "validate", tensorboard_dir))

In [12]:
from loss.triplet import (triplet_loss_batch_all,
                          triplet_loss_batch_hard,
                          triplet_loss_batch_hard_negative)

if method == 'embeddings':
    print('Triplet loss')
    labels = tf.argmax(next_element[1], axis=1)
    loss = triplet_loss_batch_all(embeddings, labels, margin=margin)
elif method == 'classification':
    print('cross_entropy')
    labels = next_element[1]
    loss = tf.losses.softmax_cross_entropy(labels, y_pred)
else:
    raise ValueError(f"{method} has incorrect value")

tf.summary.scalar('loss', loss)
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)

Triplet loss
Instructions for updating:
Use tf.cast instead.


In [None]:
batch = 0
loss_accum = 0
epoch = 0
summary_op = tf.summary.merge_all()
session.run(tf.global_variables_initializer())

In [None]:
from metrics.numpy import mean_average_precision


last_train_batch = batch
last_mAP = 0.0

while True:
    # train
    batch = last_train_batch
    try:
        while True:
            batch += 1
            summary, _, loss_val = session.run(
                [summary_op, optimizer, loss],
                feed_dict={handle: train_handle}
            )
            writer_train.add_summary(summary, batch)
            writer_train.flush()
    except tf.errors.OutOfRangeError:
        session.run(train_iterator.initializer)
        last_train_batch = batch
    #validate
    try:
        while True:
            batch += 1
            summary, loss_val = session.run(
                [summary_op, loss],
                feed_dict={handle: validation_handle}
            )
            writer_validation.add_summary(summary, batch)
            writer_validation.flush()
    except tf.errors.OutOfRangeError:
        session.run(validation_iterator.initializer)
    
    epoch += 1
    
    if epoch % 4 == 0:    
        session.run(train_iterator.initializer)
        session.run(validation_iterator.initializer)

        embeddings_train, _y_train = get_embeddings(session, embeddings, train_handle)
        embeddings_target, _y_target = get_embeddings(session, embeddings, validation_handle)

        knn_res = knn_classes(embeddings_train, embeddings_target, _y_train, items_per_step=10)

        mAP = mean_average_precision(knn_res, np.argmax(_y_target, axis=1))

        summary = tf.Summary()
        summary.value.add(tag='validation/mAP', simple_value=mAP)
        writer_validation.add_summary(summary, batch)
        writer_validation.flush()
        
        print(mAP)
        
        if mAP < last_mAP:
            print('Finished')
            break
            
        last_mAP = mAP
        

session.run(train_iterator.initializer)
session.run(test_iterator.initializer)

embeddings_train, _y_train = get_embeddings(session, embeddings, train_handle)
embeddings_target, _y_target = get_embeddings(session, embeddings, test_handle)

knn_res = knn_classes(embeddings_train, embeddings_target, _y_train, items_per_step=10)

mAP = mean_average_precision(knn_res, np.argmax(_y_target, axis=1))
print(f"test mAP={mAP}")

summary = tf.Summary()
summary.value.add(tag='test/mAP', simple_value=mAP)
writer_validation.add_summary(summary, batch)
writer_validation.flush()

'KNN, step[287/500]'

In [None]:
# Restore variables from disk.
# saver.restore(session, f"saved_models/{tensorboard_dir}")