## Fashion data with ResNet ##

** Applied Hyperparameter **

- Learing rate = 0.01

&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;: 현재 validation accuracy가 이전 accuracy 보다 감소 하였을 때(overfitting 가정) learing rate 0.5 배

&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; ☞ **Learning_rate_decay** 함수 참조

- batch size = 64

- L2 regularization ([사용 방법 참조](http://gaussian37.me/221143520556))

&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; : CNN 커널 및 FC의 weight에 사용

In [1]:
#!nvidia-smi

In [2]:
#%env CUDA_DEVICE_ORDER=PCI_BUS_ID
#%env CUDA_VISIBLE_DEVICES=5

In [3]:
import os
import numpy as np
from scipy.misc import imread, imresize
import tensorflow as tf
import matplotlib.pyplot as plt
import random

In [4]:
### Modify !!! #############
save_filename = 'jinsolkim.ckpt'
############################

In [5]:
img_width = 28
img_height = 28

tfrecord_train = 'fashion_train.tfrecord'
tfrecord_val = 'fashion_val.tfrecord'
tfrecord_dir = 'tfrecords'

In [6]:
# hyper parameters
seed = 777
learning_rate = 0.01 # 변경
training_epochs = 50
batch_size = 64 # batch size 2^N
n_train = 50000
n_val = 10000
n_class = 10
lambda_reg = 0.01 # 추가
tf.set_random_seed(777)

In [7]:
def read_and_decode(filename_queue, n_batch):
    
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)
    
    features = tf.parse_single_example(
        serialized_example,
        features={            
            'image': tf.FixedLenFeature([], tf.string),
            'label': tf.FixedLenFeature([], tf.int64)
        })
    
    # Convert from a scalar string tensor
    image = tf.decode_raw(features['image'], tf.uint8)        
    label = tf.cast(features['label'], tf.int32)
    label_onehot = tf.one_hot(label, depth=n_class)
    
    image = tf.reshape(image, [img_height, img_width, 1])    
    
    images, labels = tf.train.batch([image, label_onehot],
                                           batch_size=n_batch,
                                           capacity=10000,
                                           num_threads=4)
    
    return images, labels

In [8]:
def preproc(x):
    # x = x*2 - 1.0
    # per-example mean subtraction (http://ufldl.stanford.edu/wiki/index.php/Data_Preprocessing)
    mean = tf.reduce_mean(x, axis=1, keep_dims=True)
    return x - mean

In [9]:
def conv_bn_activ(name, x, n_filters, kernel_size, strides, training, seed, padding='SAME'):
    with tf.variable_scope(name):
        net = tf.layers.conv2d(x, n_filters, kernel_size, strides=strides, padding=padding, use_bias=False,
                              kernel_initializer=tf.contrib.layers.variance_scaling_initializer(seed=seed))
        net = tf.layers.batch_normalization(net, training=training)
        net = tf.nn.relu(net)
    return net

In [10]:
def residual_block(name, x, n_filters, training, seed, downsample=False):    
    if downsample:
        strides = 2
    else:
        strides = 1
    with tf.variable_scope(name):
        regularizer = tf.contrib.layers.l2_regularizer(scale=lambda_reg)
        with tf.variable_scope("inner_conv1"):            
            net1 = tf.layers.conv2d(x, n_filters, [3, 3], strides=strides, padding='SAME', use_bias=False, kernel_regularizer=regularizer)
            net1 = tf.layers.batch_normalization(net1, training=training)
            net1 = tf.nn.relu(net1)
        with tf.variable_scope("inner_conv2"):
            net2 = tf.layers.conv2d(net1, n_filters, [3, 3], strides=1, padding='SAME', use_bias=False, kernel_regularizer=regularizer)
            net2 = tf.layers.batch_normalization(net2, training=training)
        if downsample:
            x = tf.layers.conv2d(x, n_filters, [1, 1], strides=2, padding='SAME', kernel_regularizer=regularizer)
        return tf.nn.relu(net2 + x)

In [11]:
def build_resnet(X_img, layer_n, training, seed):
    regularizer = tf.contrib.layers.l2_regularizer(scale=lambda_reg)
    net = X_img
    with tf.variable_scope("conv0"):
        net = conv_bn_activ(name="pre_conv", x=net, n_filters=16, kernel_size=[3,3], strides=1, 
                            training=is_train, seed=seed)
    with tf.variable_scope("conv1"):
        for i in range(layer_n):
            net = residual_block(name="resblk{}".format(i), x=net, n_filters=16, training=training, 
                                 seed=seed)
            print(net)
    with tf.variable_scope("conv2"):
        for i in range(layer_n):
            net = residual_block(name="resblk{}".format(i), x=net, n_filters=32, training=training, 
                                 seed=seed, downsample=(i==0))
            print(net)
    with tf.variable_scope("conv3"):
        for i in range(layer_n):
            net = residual_block(name="resblk{}".format(i), x=net, n_filters=64, training=training, 
                                 seed=seed, downsample=(i==0))
            print(net)
    with tf.variable_scope("conv4"):
        for i in range(layer_n):
            net = residual_block(name="resblk{}".format(i), x=net, n_filters=64, training=training, 
                                 seed=seed, downsample=(i==0))
            print(net)
    
    with tf.variable_scope("fc"):
        net = tf.layers.average_pooling2d(name="gap", inputs=net, pool_size=[7, 7], 
                                          strides=7, padding='SAME')
        print(net)
        net = tf.reshape(net, [-1, 64])
        print(net)
        logits = tf.layers.dense(net, 10, name="logits", kernel_regularizer=regularizer)
        print(logits)
    return logits

In [12]:
X = tf.placeholder(tf.float32, [None, img_height, img_width, 1])
Y = tf.placeholder(tf.float32, [None, n_class])
is_train = tf.placeholder(tf.bool)

In [13]:
# X_pre = preproc(X)
# X_img = tf.reshape(X, [-1, 28, 28, 1], name="X_img")
# X_img = tf.reshape(X_pre, [-1, 28, 28, 1], name="X_img")
#print(X)
#print(X_pre)
#print(X_img)

In [14]:
logits = build_resnet(X, layer_n=3, training=is_train, seed=seed)

Tensor("conv1/resblk0/Relu:0", shape=(?, 28, 28, 16), dtype=float32)
Tensor("conv1/resblk1/Relu:0", shape=(?, 28, 28, 16), dtype=float32)
Tensor("conv1/resblk2/Relu:0", shape=(?, 28, 28, 16), dtype=float32)
Tensor("conv2/resblk0/Relu:0", shape=(?, 14, 14, 32), dtype=float32)
Tensor("conv2/resblk1/Relu:0", shape=(?, 14, 14, 32), dtype=float32)
Tensor("conv2/resblk2/Relu:0", shape=(?, 14, 14, 32), dtype=float32)
Tensor("conv3/resblk0/Relu:0", shape=(?, 7, 7, 64), dtype=float32)
Tensor("conv3/resblk1/Relu:0", shape=(?, 7, 7, 64), dtype=float32)
Tensor("conv3/resblk2/Relu:0", shape=(?, 7, 7, 64), dtype=float32)
Tensor("conv4/resblk0/Relu:0", shape=(?, 4, 4, 64), dtype=float32)
Tensor("conv4/resblk1/Relu:0", shape=(?, 4, 4, 64), dtype=float32)
Tensor("conv4/resblk2/Relu:0", shape=(?, 4, 4, 64), dtype=float32)
Tensor("fc/gap/AvgPool:0", shape=(?, 1, 1, 64), dtype=float32)
Tensor("fc/Reshape:0", shape=(?, 64), dtype=float32)
Tensor("fc/logits/BiasAdd:0", shape=(?, 10), dtype=float32)


In [15]:
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=Y), name="loss")
#n_batches_per_epoch = int(mnist.train.num_examples / batch_size)
#decay_steps = int(n_batches_per_epoch * num_epochs_per_decay)
#global_step = tf.Variable(0, trainable=False)
#learningRate = tf.train.exponential_decay(learning_rate=learning_rate,
#                                          global_step= global_step,
#                                          decay_steps=decay_steps,
#                                          decay_rate= 0.15,
#                                          staircase=True)

update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss, name="optimizer")  

In [16]:
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(Y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [17]:
cwd = os.getcwd()
train_path = os.path.join(cwd, '..', tfrecord_dir, tfrecord_train)
val_path = os.path.join(cwd, '..', tfrecord_dir, tfrecord_val)

In [18]:
filename_queue = tf.train.string_input_producer([train_path], num_epochs=training_epochs)
image_batch, label_batch = read_and_decode(filename_queue, batch_size)
filename_queue_val = tf.train.string_input_producer([val_path], num_epochs=training_epochs)
image_val, label_val = read_and_decode(filename_queue_val, batch_size)

In [19]:
train_var = [X, Y, is_train, logits, accuracy]
tf.add_to_collection('train_var', train_var[0])
tf.add_to_collection('train_var', train_var[1])
tf.add_to_collection('train_var', train_var[2])
tf.add_to_collection('train_var', train_var[3])
tf.add_to_collection('train_var', train_var[4])
saver = tf.train.Saver()
##saver.export_meta_graph(os.path.join(cur_dir, 'checkpoints', 'mnist_ckpt.meta'), collection_list=['train_var'])

In [20]:
init_op = tf.group(tf.global_variables_initializer(),
                      tf.local_variables_initializer())
sess = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth =True)))
sess.run(init_op)

In [21]:
def learning_rate_decay(learing_rate, weight, pre_val_acc, now_val_acc):
    if pre_val_acc > now_val_acc:
        print('Learning rate decay : %.9f' % (learning_rate * weight))
        return learning_rate * weight
    else:
        return learning_rate   

In [22]:
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord, sess=sess)

# train my model
print('Learning started. It takes sometime.')
pre_avg_val_acc = 0
for epoch in range(training_epochs):
    avg_cost = 0.
    avg_train_acc = 0.
    avg_val_acc = 0.
    total_batch = int(n_train / batch_size)
    total_batch_val = int(n_val / batch_size)    

    for i in range(total_batch):
        batch_xs, batch_ys = sess.run([image_batch, label_batch])
        batch_xs = batch_xs/255.
        feed_dict = {X: batch_xs, Y: batch_ys, is_train: True}
        #feed_dict = {X: batch_xs, Y: batch_ys, keep_prob: 0.7}
        acc, c, _ = sess.run([accuracy, loss, optimizer], feed_dict=feed_dict)
        avg_cost += c / total_batch
        avg_train_acc += acc / total_batch
        
    for i in range(total_batch_val):
        batch_xs, batch_ys = sess.run([image_val, label_val])
        batch_xs = batch_xs/255.
        feed_dict = {X: batch_xs, Y: batch_ys, is_train: False}
        acc = sess.run(accuracy, feed_dict=feed_dict)
        avg_val_acc += acc / total_batch_val
   
    # Learning rate decay when Validation accuracy decrease
    learning_rate = learning_rate_decay(learning_rate, 0.5, pre_val_acc = pre_avg_val_acc, now_val_acc = avg_val_acc)
    pre_avg_val_acc = avg_val_acc

    print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.9f}'.format(avg_cost), 'train accuracy = ', 
         '{:.5f}'.format(avg_train_acc), 'validation accuracy = ', '{:.5f}'.format(avg_val_acc))


print('Learning Finished!')

Learning started. It takes sometime.
Epoch: 0001 cost = 0.865521912 train accuracy =  0.69588 validation accuracy =  0.59856
Epoch: 0002 cost = 0.492787328 train accuracy =  0.81826 validation accuracy =  0.82432
Epoch: 0003 cost = 0.448738484 train accuracy =  0.83531 validation accuracy =  0.83163
Learning rate decay : 0.050000000
Epoch: 0004 cost = 0.416369579 train accuracy =  0.84467 validation accuracy =  0.80489
Learning rate decay : 0.025000000
Epoch: 0005 cost = 0.390877555 train accuracy =  0.85383 validation accuracy =  0.71144
Epoch: 0006 cost = 0.375255090 train accuracy =  0.86224 validation accuracy =  0.80399
Epoch: 0007 cost = 0.359008015 train accuracy =  0.86934 validation accuracy =  0.85296
Epoch: 0008 cost = 0.346009486 train accuracy =  0.87368 validation accuracy =  0.85357
Learning rate decay : 0.012500000
Epoch: 0009 cost = 0.330378491 train accuracy =  0.87946 validation accuracy =  0.84265
Epoch: 0010 cost = 0.320127202 train accuracy =  0.88356 validation a

In [23]:
saver.save(sess, os.path.join(cwd, '..', 'checkpoints', save_filename))

'/home/ubuntu/FC_Tensorflow_7th/jinsolkim/../checkpoints/jinsolkim.ckpt'

In [24]:
coord.request_stop()
coord.join(threads) 