In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import os
os.environ['CUDA_VISIBLE_DEVICES']='2'
from sklearn.preprocessing import OneHotEncoder
import time
from center_loss import center_loss

<br>

Load Data

In [2]:
cifar10 = tf.keras.datasets.cifar10
(x_train, y_train),(x_test, y_test) = cifar10.load_data()
x_train = x_train / 255.0
x_test = x_test / 255.0
y_train = np.squeeze(y_train)
y_test = np.squeeze(y_test)
ohe = OneHotEncoder()
y_train_ohe = ohe.fit_transform(y_train.reshape(-1,1)).toarray().astype('float32')
y_test_ohe = ohe.transform(y_test.reshape(-1,1)).toarray().astype('float32')
print('x_train:{}, y_train:{}'.format(x_train.shape, y_train.shape))
print('x_test:{}, y_test:{}'.format(x_test.shape, y_test.shape))
print('y_train_ohe:', y_train_ohe.shape)
print('y_test_ohe:', y_test_ohe.shape)

x_train:(50000, 32, 32, 3), y_train:(50000,)
x_test:(10000, 32, 32, 3), y_test:(10000,)
y_train_ohe: (50000, 10)
y_test_ohe: (10000, 10)


In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


<br>

params

In [3]:
params = {}
params['seed'] = 0
params['embedding'] = 16
params['n_classes'] = 10
params['labels'] = np.unique(y_train).astype('int')
params['batch_size'] = 128
params['lambda'] = 0.003
params['alpha'] = 0.1
params['feed_limit'] = 10000

<br>

Model = LeNet5

In [4]:
tf.reset_default_graph()
tf.set_random_seed(params['seed'])

x = tf.placeholder(tf.float32, [None, 32, 32, 3])
y = tf.placeholder(tf.float32, [None,])
y_ohe = tf.placeholder(tf.float32, [None, params['n_classes']])
centroids = tf.placeholder(tf.float32, [params['n_classes'], params['embedding']])

conv1 = tf.layers.conv2d(inputs=x, filters=6, kernel_size=(5,5), strides=(1,1), padding='valid', name='conv1')
conv1 = tf.nn.relu(conv1)
conv1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=(2,2), strides=(2,2), padding='valid')

conv2 = tf.layers.conv2d(inputs=conv1, filters=16, kernel_size=(5,5), strides=(1,1), padding='valid', name='conv2')
conv2 = tf.nn.relu(conv2)
conv2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=(2,2), strides=(2,2), padding='valid')

flatten = tf.layers.flatten(conv2)

fc1 = tf.layers.dense(flatten, units=120, use_bias=True, activation=tf.nn.relu, name='fc1')
fc2 = tf.layers.dense(fc1, units=84, use_bias=True, activation=tf.nn.relu, name='fc2')
embedding_layer = tf.layers.dense(fc2, units=params['embedding'], use_bias=True, activation=None)

clf_layer = tf.layers.dense(embedding_layer, units=params['n_classes'], activation=None, use_bias=False, name='clf_layer')
softmax_layer = tf.nn.softmax(clf_layer)

W0830 16:47:11.782063 139932902004480 deprecation.py:323] From <ipython-input-4-5c2c5e769059>:9: conv2d (from tensorflow.python.layers.convolutional) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.keras.layers.Conv2D` instead.
W0830 16:47:11.787720 139932902004480 deprecation.py:506] From /home/kim1/anaconda3/envs/rok/lib/python3.6/site-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W0830 16:47:12.011117 139932902004480 deprecation.py:323] From <ipython-input-4-5c2c5e769059>:11: max_pooling2d (from tensorflow.python.layers.pooling) is deprecated and will be removed in a future version.
Instructions for updating:
Use keras.layers.MaxPooling2D instead.
W0830 16:47:12.158684 139932902004480 dep

In [5]:
print('conv1:{}'.format(conv1.shape))
print('conv2:{}'.format(conv2.shape))
print('fc1:{}'.format(fc1.shape))
print('fc2:{}'.format(fc2.shape))
print('embedding_layer:{}'.format(embedding_layer.shape))
print('clf_layer:{}'.format(clf_layer.shape))

conv1:(?, 14, 14, 6)
conv2:(?, 5, 5, 16)
fc1:(?, 120)
fc2:(?, 84)
embedding_layer:(?, 16)
clf_layer:(?, 10)


In [6]:
eta = 1e-3
epsilon = 1e-5
softmax_loss = tf.reduce_sum(tf.maximum(tf.multiply(-tf.log(softmax_layer + epsilon), y_ohe), 0))
loss_add = center_loss(embedding_layer=embedding_layer, centroids=centroids, y=y, params=params)
loss = softmax_loss + loss_add
train_model = tf.train.GradientDescentOptimizer(learning_rate=eta).minimize(loss)

W0830 16:47:12.873792 139932902004480 deprecation.py:323] From /home/kim1/anaconda3/envs/rok/lib/python3.6/site-packages/tensorflow/python/ops/array_ops.py:1354: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


<br>

Training

In [7]:
loss_path_train = []
loss_path_test = []

embedding_train = []
embedding_test = []

accuracy_train = []
accuracy_test = []

init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
iter_cnt = 50
np.random.seed(params['seed'])
sample_size = 10000 # use 1000 samples only for monitoring

# initial embedding train/test 
embedding_train.append(sess.run(embedding_layer, feed_dict={x:x_train[:sample_size]}))
embedding_test.append(sess.run(embedding_layer, feed_dict={x:x_test[:sample_size]})) 

# initialize centroids
centroids_list = []
train_idx_list = np.arange(len(y_train))
chunk_idx_by_label = {x:train_idx_list[y_train == x] for x in params['labels']} 
for label in params['labels']:
    class_sample_embedding = sess.run(embedding_layer, feed_dict={x:x_train[chunk_idx_by_label[label]]})
    centroids_list.append(np.mean(class_sample_embedding, axis=0))
centroids_list = np.array(centroids_list)

for epoch in range(iter_cnt):
    start_time = time.time()
    cursor = 0
    step = 1
    
    # random shuffle
    train_idx = np.arange(len(y_train))
    np.random.shuffle(train_idx)
    shuffled_x_train = x_train[train_idx]
    shuffled_y_train = y_train[train_idx]
    shuffled_y_train_ohe = y_train_ohe[train_idx]

    while cursor < len(y_train): 
        batch_x_train = shuffled_x_train[cursor:cursor+params['batch_size']]
        batch_y_train = shuffled_y_train[cursor:cursor+params['batch_size']]
        batch_y_train_ohe = shuffled_y_train_ohe[cursor:cursor+params['batch_size']] 
        sess.run(train_model, feed_dict={x:batch_x_train,
                                         y_ohe:batch_y_train_ohe,
                                         y:batch_y_train,
                                         centroids:centroids_list}) 
        
        train_idx_list = np.arange(len(batch_y_train))
        chunk_idx_by_label = {x:train_idx_list[batch_y_train == x] for x in params['labels']} 
        # update centroids
        for label in params['labels']:
            if len(batch_x_train[chunk_idx_by_label[label]]) != 0:
                class_sample_embedding = sess.run(embedding_layer, feed_dict={x:batch_x_train[chunk_idx_by_label[label]]})
                delta = np.sum(centroids_list[label] - class_sample_embedding)
                delta = delta / (1+len(class_sample_embedding))
                centroids_list[label] -= params['alpha'] * delta 
            
        step += 1
        cursor += params['batch_size']
    
    # embedding train/test
    embedding_train.append(sess.run(embedding_layer, feed_dict={x:x_train[:sample_size]}))
    embedding_test.append(sess.run(embedding_layer, feed_dict={x:x_test[:sample_size]}))
    
    # loss train/test
    loss_path_train.append(sess.run(loss, feed_dict={x:x_train[:sample_size], y:y_train[:sample_size], y_ohe:y_train_ohe[:sample_size], centroids:centroids_list}))
    loss_path_test.append(sess.run(loss, feed_dict={x:x_test[:sample_size], y:y_test[:sample_size], y_ohe:y_test_ohe[:sample_size], centroids:centroids_list}))
    
    # loss train/test
    loss_path_train.append(sess.run(loss, feed_dict={x:x_train[:sample_size], y_ohe:y_train_ohe[:sample_size], y:y_train[:sample_size], centroids:centroids_list}))
    loss_path_test.append(sess.run(loss, feed_dict={x:x_test[:sample_size], y_ohe:y_test_ohe[:sample_size], y:y_test[:sample_size], centroids:centroids_list}))
    
    # accuracy train/test
    pred_train = pd.DataFrame(sess.run(softmax_layer, feed_dict={x:x_train[:sample_size]})).idxmax(axis=1)
    accuracy_train.append(np.sum((pred_train == y_train[:sample_size]).astype('int')) / len(y_train[:sample_size]))
    pred_test = pd.DataFrame(sess.run(softmax_layer, feed_dict={x:x_test[:sample_size]})).idxmax(axis=1)
    accuracy_test.append(np.sum((pred_test == y_test[:sample_size]).astype('int')) / len(y_test[:sample_size]))
        
    end_time = time.time()
    
    print('epoch:{}    {:.2f}sec \n\
           train(loss:{:.4f}, accuracy{:.4f}) \n\
           test (loss:{:.4f}, accuracy{:.4f})'.format(
           epoch+1, end_time - start_time,
           loss_path_train[-1], accuracy_train[-1],
           loss_path_test[-1], accuracy_test[-1]))
    print('')
sess.close()

epoch:1    27.92sec 
           train(loss:18411.3555, accuracy0.3591) 
           test (loss:18406.7539, accuracy0.3602)

epoch:2    28.75sec 
           train(loss:15630.8184, accuracy0.4516) 
           test (loss:15783.8838, accuracy0.4469)

epoch:3    27.13sec 
           train(loss:14593.6074, accuracy0.4876) 
           test (loss:14821.1787, accuracy0.4770)

epoch:4    28.23sec 
           train(loss:13614.8281, accuracy0.5208) 
           test (loss:14115.0391, accuracy0.5001)

epoch:5    27.30sec 
           train(loss:12961.9180, accuracy0.5444) 
           test (loss:13578.9365, accuracy0.5166)

epoch:6    28.17sec 
           train(loss:12214.1387, accuracy0.5720) 
           test (loss:13254.2852, accuracy0.5355)

epoch:7    26.92sec 
           train(loss:11577.2344, accuracy0.5969) 
           test (loss:12755.5166, accuracy0.5538)

epoch:8    28.08sec 
           train(loss:11591.2451, accuracy0.5918) 
           test (loss:12890.7031, accuracy0.5402)

epoch:9    27.01