- Basically, referred to https://github.com/sjchoi86/advanced-tensorflow/blob/master/basic/cnn_mnist_modern.ipynb

In [None]:
import os
import numpy as np
import random
import tensorflow.contrib.slim as slim
import tensorflow as tf
from imgaug import augmenters as iaa
import pickle

In [None]:
# Load pre-trained weights
with open('vgg16_weights.txt','rb') as fp: # tf.keras.applications.VGG16()
    pw = pickle.load(fp)

In [None]:
# vgg16
# https://github.com/tensorflow/models/blob/master/research/slim/nets/vgg.py
def vgg_arg_scope(weight_decay=0.0005):
    """Defines the VGG arg scope.
    Args:
    weight_decay: The l2 regularization coefficient.
    Returns:
    An arg_scope.
    """
    with slim.arg_scope([slim.conv2d, slim.fully_connected],
                  activation_fn=tf.nn.relu,
                  weights_regularizer=slim.l2_regularizer(weight_decay),
                  biases_initializer=tf.compat.v1.zeros_initializer()):
        with slim.arg_scope([slim.conv2d], padding='SAME') as arg_sc:
            return arg_sc
        
n_input = 196608 # 224*224*3
n_classes = 16

x = tf.placeholder("float", [None,224,224,3])
y = tf.placeholder("float", [None, n_classes]) 
is_training = tf.placeholder(tf.bool)

def vgg16(inputs, is_training=True, dropout_keep_prob=0.5):
    with slim.arg_scope(vgg_arg_scope()):
        with tf.variable_scope('conv1'):
            net = slim.conv2d(inputs, 64, [3, 3], stride=1,
                              weights_initializer=tf.constant_initializer(pw[0]),
                              biases_initializer=tf.constant_initializer(pw[1]),
                              trainable=True,
                              scope='conv1_1')
            net = slim.conv2d(net, 64, [3, 3], stride=1,
                              weights_initializer=tf.constant_initializer(pw[2]),
                              biases_initializer=tf.constant_initializer(pw[3]),
                              trainable=True,
                              scope='conv1_2')
        net = slim.max_pool2d(net, [2, 2], scope='pool1') # 112,112,64
        with tf.variable_scope('conv2'):
            net = slim.conv2d(net, 128, [3, 3], stride=1,
                              weights_initializer=tf.constant_initializer(pw[4]),
                              biases_initializer=tf.constant_initializer(pw[5]),
                              trainable=True,
                              scope='conv2_1')
            net = slim.conv2d(net, 128, [3, 3], stride=1,
                              weights_initializer=tf.constant_initializer(pw[6]),
                              biases_initializer=tf.constant_initializer(pw[7]),
                              trainable=True,
                              scope='conv2_2')
        net = slim.max_pool2d(net, [2, 2], scope='pool2') # 56,56,128
        with tf.variable_scope('conv3'):
            net = slim.conv2d(net, 256, [3, 3], stride=1,
                              weights_initializer=tf.constant_initializer(pw[8]),
                              biases_initializer=tf.constant_initializer(pw[9]),
                              trainable=True,
                              scope='conv3_1')
            net = slim.conv2d(net, 256, [3, 3], stride=1,
                              weights_initializer=tf.constant_initializer(pw[10]),
                              biases_initializer=tf.constant_initializer(pw[11]),
                              trainable=True,
                              scope='conv3_2')
            net = slim.conv2d(net, 256, [3, 3], stride=1,
                              weights_initializer=tf.constant_initializer(pw[12]),
                              biases_initializer=tf.constant_initializer(pw[13]),
                              trainable=True,
                              scope='conv3_3')
        net = slim.max_pool2d(net, [2, 2], scope='pool3') # 28,28,256
        with tf.variable_scope('conv4'):
            net = slim.conv2d(net, 512, [3, 3], stride=1,
                              weights_initializer=tf.constant_initializer(pw[14]),
                              biases_initializer=tf.constant_initializer(pw[15]),
                              trainable=True,
                              scope='conv4_1')
            net = slim.conv2d(net, 512, [3, 3], stride=1,
                              weights_initializer=tf.constant_initializer(pw[16]),
                              biases_initializer=tf.constant_initializer(pw[17]),
                              trainable=True,
                              scope='conv4_2')
            net = slim.conv2d(net, 512, [3, 3], stride=1,
                              weights_initializer=tf.constant_initializer(pw[18]),
                              biases_initializer=tf.constant_initializer(pw[19]),
                              trainable=True,
                              scope='conv4_3')
        net = slim.max_pool2d(net, [2, 2], scope='pool4') # 14x14x512
        with tf.variable_scope('conv5'):
            net = slim.conv2d(net, 512, [3, 3], stride=1,
                              weights_initializer=tf.constant_initializer(pw[20]),
                              biases_initializer=tf.constant_initializer(pw[21]),
                              trainable=True,
                              scope='conv5_1')
            net = slim.conv2d(net, 512, [3, 3], stride=1,
                              weights_initializer=tf.constant_initializer(pw[22]),
                              biases_initializer=tf.constant_initializer(pw[23]),
                              trainable=True,
                              scope='conv5_2')
            net = slim.conv2d(net, 512, [3, 3], stride=1,
                              weights_initializer=tf.constant_initializer(pw[24]),
                              biases_initializer=tf.constant_initializer(pw[25]),
                              trainable=True,
                              scope='conv5_3')
        net = slim.max_pool2d(net, [2, 2], scope='pool5') # 7,7,512
        
        # fc
        net = slim.conv2d(net, 1024, [7, 7], padding='VALID', scope='fc6') # cf.4096
        net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout6')
        # output
        net = slim.conv2d(net, n_classes, [1, 1], activation_fn=None, normalizer_fn=None,
                          scope='fc8')
        # spatial squeeze
        out = tf.squeeze(net, name ='SpatialSqueeze')
    return out

In [None]:
# Prediction
pred = vgg16(x, is_training)

# Loss & Optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=pred))

# With decay
global_step = tf.Variable(0, trainable=False)
starter_learning_rate = 0.00001
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
                                           8050, 0.96, staircase=True)
optm = tf.train.AdamOptimizer(learning_rate).minimize(cost,global_step=global_step) 

corr = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accr = tf.reduce_mean(tf.cast(corr, "float"))

# Initializer
init = tf.global_variables_initializer()
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
sess.run(init)
print ("FUNCTIONS READY")

# Load images
- Load training and validation set

In [None]:
l = np.load('Train_Val.npz')
mnst_tr_X, mnst_tr_y, mnst_val_X, mnst_val_y = l['mnst_tr_X'], l['mnst_tr_y'], l['mnst_val_X'], l['mnst_val_y']
imgnet_tr_X, imgnet_tr_y, imgnet_val_X, imgnet_val_y = l['imgnet_tr_X'], l['imgnet_tr_y'], l['imgnet_val_X'], l['imgnet_val_y']

# Preprocessing and augmentation

In [None]:
# Training set preprocessing and augmentation
vgg_mean = np.array([123.68, 116.779, 103.939], dtype=np.float32).reshape((1,1,3))
ntrain_mnst = mnst_tr_X.shape[0]
ntrain_imgnet = imgnet_tr_X.shape[0]

# mnst
def aug_mnst(xs_):
    xs_ = xs_.reshape(ntrain_mnst,224,224,3).copy().astype(np.float32)
    mnst_aug = iaa.Affine(scale=(1.0, 1.08), translate_percent=(-0.08, 0.08), rotate=(-15, 15))
    xs_ = mnst_aug.augment_images(xs_).astype(np.float32)
    xs2 = np.zeros((ntrain_mnst,224,224,3),dtype=np.float32)
    for i_, x_ in enumerate(xs_):
        x_ = x_ - vgg_mean
        xs2[i_] = x_[:,:, ::-1]
    return xs2

# imgnet
c9 = iaa.Sequential([iaa.CropToFixedSize(width=224, height=224, position='left-top'),iaa.Fliplr(0.5)])
c10 = iaa.Sequential([iaa.CropToFixedSize(width=224, height=224, position='center-top'),iaa.Fliplr(0.5)])
c11 = iaa.Sequential([iaa.CropToFixedSize(width=224, height=224, position='right-top'),iaa.Fliplr(0.5)])
c12 = iaa.Sequential([iaa.CropToFixedSize(width=224, height=224, position='left-center'),iaa.Fliplr(0.5)])
c13 = iaa.Sequential([iaa.CropToFixedSize(width=224, height=224, position='center'),iaa.Fliplr(0.5)])
c14 = iaa.Sequential([iaa.CropToFixedSize(width=224, height=224, position='right-center'),iaa.Fliplr(0.5)])
c15 = iaa.Sequential([iaa.CropToFixedSize(width=224, height=224, position='left-bottom'),iaa.Fliplr(0.5)])
c16 = iaa.Sequential([iaa.CropToFixedSize(width=224, height=224, position='center-bottom'),iaa.Fliplr(0.5)])
c17 = iaa.Sequential([iaa.CropToFixedSize(width=224, height=224, position='right-bottom'),iaa.Fliplr(0.5)])
c18 = iaa.Sequential([iaa.Resize({"height": 224, "width": 224}),
                      iaa.Fliplr(0.5)])
aug = iaa.OneOf([c9, c10, c11, c12, c13, c14, c15, c16, c17,c18])

def fancy_pca(img, alpha_std=0.1):
    # https://github.com/pixelatedbrian/fortnight-furniture/blob/master/src/fancy_pca.py
    orig_img = img.astype('float32').copy()
    img = img / 255.0  
    img_rs = img.reshape(-1,3)
    img_centered = img_rs - np.mean(img_rs, axis=0)
    img_cov = np.cov(img_centered, rowvar=False)
    eig_vals, eig_vecs = np.linalg.eigh(img_cov)
    sort_perm = eig_vals[::-1].argsort()
    eig_vals[::-1].sort()
    eig_vecs = eig_vecs[:, sort_perm]
    m1 = np.column_stack((eig_vecs))
    m2 = np.zeros((3, 1))
    alpha = np.random.normal(0, alpha_std)
    m2[:, 0] = alpha * eig_vals[:]
    add_vect = np.matrix(m1) * np.matrix(m2)
    for idx in range(3):   # RGB
        orig_img[..., idx] += add_vect[idx]
    orig_img = np.clip(orig_img, 0.0, 255.0)
    return orig_img

def aug_imgnet(xs_):
    xs_ = xs_.reshape(ntrain_imgnet,256,256,3).copy().astype(np.float32)
    xs_ = np.array(aug.augment_images(xs_))
    xs2 = np.zeros((ntrain_imgnet,224,224,3),dtype=np.float32)
    for i_, x_ in enumerate(xs_):
        x_ = fancy_pca(x_) - vgg_mean # fancy_pca & subtract mean
        xs2[i_] = x_[:,:,::-1] # to bgr
    return xs2

# Validation set preprocessing
nval_mnst = mnst_val_X.shape[0]
nval_imgnet = imgnet_val_X.shape[0]
nval = nval_mnst+nval_imgnet
val_X_proc = np.zeros((nval,224,224,3),dtype=np.float32)
# mnst
for i_, img_ in enumerate(mnst_val_X):
    new = img_.reshape(224,224,3).astype(np.float32) - vgg_mean
    val_X_proc[i_] = new[:,:,::-1]
# imgnet
for i_, img_ in enumerate(imgnet_val_X):
    new = img_.reshape(224,224,3).astype(np.float32) - vgg_mean
    val_X_proc[i_+nval_mnst] = new[:,:,::-1]

In [None]:
# save
savedir = "/path/to/save/the/model/" # Please set the save directory
saver = tf.train.Saver(max_to_keep=100)
save_step = 10 # set save step

In [None]:
# Parameters
training_epochs = 56 # set epoch number
batch_size = 32 # set batch size
display_step = 10 # set display step 
ntrain = ntrain_mnst+ntrain_imgnet
total_batch = int(ntrain/batch_size)
# Validation
val_batch_size = 16 
total_batch_val = int(nval/val_batch_size)
# y label
# trn
y_trn = np.zeros((ntrain,16),dtype='float32')
y_trn[:ntrain_mnst] = mnst_tr_y
y_trn[ntrain_mnst:] = imgnet_tr_y
# val
y_val = np.zeros((nval,16),dtype='float32')
y_val[:nval_mnst] = mnst_val_y
y_val[nval_mnst:] = imgnet_val_y

In [None]:
# Optimize
for epoch in range(training_epochs):
    avg_cost, avg_acc = 0., 0.
    val_cost, val_acc = 0., 0.
    # We used random undersampling in the analysis.
    # Augmentation
    x_trn = np.zeros((ntrain,224,224,3),dtype='float32')
    x_trn[:ntrain_mnst] = aug_mnst(mnst_tr_X)
    x_trn[ntrain_mnst:] = aug_imgnet(imgnet_tr_X)
    randpermlist_m = np.random.permutation(ntrain_mnst)
    randpermlist_i = np.random.permutation(range(ntrain_mnst,ntrain))
    # Iteration
    for i in range(total_batch):
        # Balanced mini-batch
        randidx_m = randpermlist_m[i*20:min((i+1)*20, ntrain_mnst-1)]
        randidx_i = randpermlist_i[i*12:min((i+1)*12, ntrain_imgnet-1)]
        randidx = list(randidx_m)+list(randidx_i)
        random.shuffle(randidx)
        batch_xs = x_trn[randidx, :]
        batch_ys = y_trn[randidx, :]
        feeds = {x: batch_xs, y: batch_ys, is_training:True}
        sess.run(optm, feed_dict=feeds)
    # Acc and cost 
        avg_cost += sess.run(cost, feed_dict=feeds)
        avg_acc += sess.run(accr, feed_dict=feeds)
    avg_cost = avg_cost / total_batch
    avg_acc = avg_acc / total_batch
    print('\nEpoch :', epoch+1)
    print('Train Acc/Cost : %.5f / %.5f'%(avg_acc,avg_cost))
    # Display validation performance
    if (epoch+1) % display_step == 0:
        for i in range(total_batch_val):
            batch_xs = val_X_proc[i*val_batch_size:min((i+1)*val_batch_size, nval-1)]
            batch_ys = y_val[i*val_batch_size:min((i+1)*val_batch_size, nval-1)]
            feeds = {x: batch_xs, y: batch_ys, is_training:False}
            val_acc += sess.run(accr, feed_dict=feeds)
            val_cost += sess.run(cost, feed_dict=feeds)
        val_acc = val_acc / total_batch_val
        val_cost = val_cost / total_batch_val
        print('Val Acc/Cost : %.5f / %.5f'%(val_acc,val_cost))
    # Save
    if (epoch+1) % save_step == 0:
        savename = savedir + "net-" + str(epoch) + ".ckpt"
        saver.save(sess=sess, save_path=savename)
        print("[%s] Saved" % (savename))
print("Optimization finished")