In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import os
os.environ['CUDA_VISIBLE_DEVICES']='2'
from sklearn.preprocessing import OneHotEncoder
import time
from sklearn.metrics import silhouette_score

<br>

Load Data

In [2]:
cifar10 = tf.keras.datasets.cifar10
(x_train, y_train),(x_test, y_test) = cifar10.load_data()
x_train = x_train / 255.0
x_test = x_test / 255.0
y_train = np.squeeze(y_train)
y_test = np.squeeze(y_test)
ohe = OneHotEncoder()
y_train_ohe = ohe.fit_transform(y_train.reshape(-1,1)).toarray().astype('float32')
y_test_ohe = ohe.transform(y_test.reshape(-1,1)).toarray().astype('float32')
print('x_train:{}, y_train:{}'.format(x_train.shape, y_train.shape))
print('x_test:{}, y_test:{}'.format(x_test.shape, y_test.shape))
print('y_train_ohe:', y_train_ohe.shape)
print('y_test_ohe:', y_test_ohe.shape)

x_train:(50000, 32, 32, 3), y_train:(50000,)
x_test:(10000, 32, 32, 3), y_test:(10000,)
y_train_ohe: (50000, 10)
y_test_ohe: (10000, 10)


In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


<br>

params

In [3]:
params = {}
params['seed'] = 0
params['embedding'] = 256
params['n_classes'] = 10
params['labels'] = np.unique(y_train).astype('int')
params['batch_size'] = 128
params['logits_scale'] = 10
params['logits_margin'] = 0.1
params['feed_limit'] = 10000

<br>
Model = LeNet5

In [4]:
tf.reset_default_graph()
tf.set_random_seed(params['seed'])

x = tf.placeholder(tf.float32, [None, 32, 32, 3])
y_ohe = tf.placeholder(tf.float32, [None, params['n_classes']])
quantized_y = tf.placeholder(tf.float32, [None, params['embedding']])

conv1 = tf.layers.conv2d(inputs=x, filters=6, kernel_size=(5,5), strides=(1,1), padding='valid', name='conv1')
conv1 = tf.nn.relu(conv1)
conv1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=(2,2), strides=(2,2), padding='valid')

conv2 = tf.layers.conv2d(inputs=conv1, filters=16, kernel_size=(5,5), strides=(1,1), padding='valid', name='conv2')
conv2 = tf.nn.relu(conv2)
conv2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=(2,2), strides=(2,2), padding='valid')

flatten = tf.layers.flatten(conv2)

fc1 = tf.layers.dense(flatten, units=120, use_bias=True, activation=tf.nn.relu, name='fc1')
fc2 = tf.layers.dense(fc1, units=84, use_bias=True, activation=tf.nn.relu, name='fc2')
embedding_layer = tf.layers.dense(fc2, units=params['embedding'], use_bias=True, activation=tf.nn.sigmoid)

clf_layer = tf.layers.dense(embedding_layer, units=params['n_classes'], activation=None, use_bias=False, name='clf_layer')
softmax_layer = tf.nn.softmax(clf_layer)

W0830 21:35:57.111392 140074959161088 deprecation.py:323] From <ipython-input-4-b715e0b7b635>:8: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.keras.layers.Conv2D` instead.
W0830 21:35:57.121677 140074959161088 deprecation.py:506] From /home/kim1/anaconda3/envs/rok/lib/python3.6/site-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0830 21:35:57.443397 140074959161088 deprecation.py:323] From <ipython-input-4-b715e0b7b635>:10: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version.
Instructions for updating:
Use keras.layers.MaxPooling2D instead.
W0830 21:35:57.644582 140074959161088 dep

In [5]:
print('conv1:{}'.format(conv1.shape))
print('conv2:{}'.format(conv2.shape))
print('fc1:{}'.format(fc1.shape))
print('fc2:{}'.format(fc2.shape))
print('embedding_layer:{}'.format(embedding_layer.shape))
print('clf_layer:{}'.format(clf_layer.shape))

conv1:(?, 14, 14, 6)
conv2:(?, 5, 5, 16)
fc1:(?, 120)
fc2:(?, 84)
embedding_layer:(?, 256)
clf_layer:(?, 10)


In [6]:
def tf_dhcq_loss(embedding_layer, clf_layer, y, quantized_y, params, gamma=1e-3):
    cross_entropy = tf.reduce_sum(-y*tf.log(clf_layer) - (1-y)*tf.log(1-clf_layer)) / params['batch_size']
    binary_diff_matrix = quantized_y - embedding_layer
    quantization_loss = tf.trace(tf.matmul(tf.transpose(binary_diff_matrix), binary_diff_matrix)) * gamma
    total_loss = cross_entropy + quantization_loss
    return total_loss 

In [7]:
eta = 1e-4
epsilon = 1e-5
loss = tf_dhcq_loss(embedding_layer=embedding_layer, clf_layer=softmax_layer, y=y_ohe, quantized_y=quantized_y, params=params, gamma=1e-5)
loss += tf.losses.get_regularization_loss()
train_model = tf.train.GradientDescentOptimizer(learning_rate=eta).minimize(loss)

In [8]:
def generate_binary_code(x):
    x[x >= 0.5] = 1
    x[x < 0.5] = 0
    return x

<br>
Training

In [9]:
loss_path_train = []
loss_path_test = []

embedding_train = []
embedding_test = []

silhouette_train = []
silhouette_test = []

init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
iter_cnt = 50
np.random.seed(params['seed'])
sample_size = 10000 # use 10000 samples only for monitoring

# initial embedding train/test 
embedding_train.append(sess.run(embedding_layer, feed_dict={x:x_train[:sample_size]}))
embedding_test.append(sess.run(embedding_layer, feed_dict={x:x_test[:sample_size]})) 

for epoch in range(iter_cnt):
    start_time = time.time()
    cursor = 0
    step = 1
    
    # random shuffle
    train_idx = np.arange(len(y_train))
    np.random.shuffle(train_idx)
    shuffled_x_train = x_train[train_idx]
    shuffled_y_train_ohe = y_train_ohe[train_idx]

    while cursor < len(y_train): 
        batch_x_train = shuffled_x_train[cursor:cursor+params['batch_size']]
        batch_y_train_ohe = shuffled_y_train_ohe[cursor:cursor+params['batch_size']] 
        sess.run(train_model, feed_dict={x:batch_x_train, y_ohe:batch_y_train_ohe,
                                         quantized_y:generate_binary_code(sess.run(embedding_layer, feed_dict={x:shuffled_x_train[cursor:cursor+params['batch_size']]}))})       
        step += 1
        cursor += params['batch_size']
    
    # embedding train/test
    embedding_train.append(sess.run(embedding_layer, feed_dict={x:x_train[:sample_size]}))
    embedding_test.append(sess.run(embedding_layer, feed_dict={x:x_test[:sample_size]}))
    
    # silhouette train/query
    silhouette_train.append(silhouette_score(embedding_train[-1], y_train[:sample_size]))
    silhouette_test.append(silhouette_score(embedding_test[-1], y_test[:sample_size]))    
    
    # loss train/test
    loss_path_train.append(sess.run(loss, feed_dict={x:x_train[:sample_size], y_ohe:y_train_ohe[:sample_size], quantized_y:generate_binary_code(sess.run(embedding_layer, feed_dict={x:x_train[:sample_size]}))}))
    loss_path_test.append(sess.run(loss, feed_dict={x:x_test[:sample_size], y_ohe:y_test_ohe[:sample_size], quantized_y:generate_binary_code(sess.run(embedding_layer, feed_dict={x:x_test[:sample_size]}))}))
            
    end_time = time.time()
    
    print('epoch:{}    {:.2f}sec \n\
           train(loss:{:.4f}, silhouette:{:.4f}) \n\
           test (loss:{:.4f}, silhouette:{:.4f})'.format(
           epoch+1, end_time - start_time,
           loss_path_train[-1], silhouette_train[-1],
           loss_path_test[-1], silhouette_test[-1]))
    print('')
sess.close()

epoch:1    25.03sec 
           train(loss:279.7110, silhouette:-0.0498) 
           test (loss:278.8896, silhouette:-0.0525)

epoch:2    24.06sec 
           train(loss:273.1373, silhouette:-0.0499) 
           test (loss:272.4274, silhouette:-0.0526)

epoch:3    24.38sec 
           train(loss:268.6442, silhouette:-0.0501) 
           test (loss:268.0502, silhouette:-0.0528)

epoch:4    24.49sec 
           train(loss:265.6192, silhouette:-0.0502) 
           test (loss:265.1340, silhouette:-0.0529)

epoch:5    24.34sec 
           train(loss:263.6215, silhouette:-0.0504) 
           test (loss:263.2330, silhouette:-0.0531)

epoch:6    24.38sec 
           train(loss:262.3283, silhouette:-0.0505) 
           test (loss:262.0219, silhouette:-0.0533)

epoch:7    25.52sec 
           train(loss:261.5038, silhouette:-0.0507) 
           test (loss:261.2646, silhouette:-0.0535)

epoch:8    24.46sec 
           train(loss:260.9841, silhouette:-0.0509) 
           test (loss:260.7986, silho