In [1]:
import tensorflow as tf
import os
%matplotlib inline
import matplotlib.pyplot as plt

  return f(*args, **kwds)


### Utilities

In [2]:
def run_tf(x):
    
    config = tf.ConfigProto()
    config.gpu_options.allow_growth=True
    with tf.Session(config=config) as sess: 
        sess.run(tf.global_variables_initializer())
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        out = sess.run(x)

        coord.request_stop()
        coord.join(threads)
        
        return out
    

class FLAGS(object):
    pass

FLAGS.batch_size = 128
FLAGS.data_dir = "/home/sankaran/exercise/ML/TF-Exercise/Tutorials/CIFAR/cifar-10-batches-bin"
FLAGS.num_preprocess_threads = 16
FLAGS.num_classes = 10
FLAGS.dtype = tf.float32

def distorted_inputs(data_dir, batch_size, distort=True):
    
    filenames = [os.path.join(data_dir, 'data_batch_%d.bin' % i) for i in range(1, 6)]
    
    # Create a queue that produces the filenames to read.
    filename_queue = tf.train.string_input_producer(filenames,seed=0)
    
    #Create FixedLenthRecord Reader with fixed bytes to read
    record_bytes = 32*32*3+1 #32*32*3 image with 1 byte for label
    reader = tf.FixedLengthRecordReader(record_bytes=record_bytes)
    
    key, value = reader.read(filename_queue)
    
    ##Decode
    decoded = tf.decode_raw(value, tf.uint8)
    label = tf.strided_slice(decoded,[0],[1])
    image = tf.strided_slice(decoded,[1],[record_bytes])
    
    
    label = tf.cast(label,tf.int32)
    label = tf.reshape(label,[1])
    image = tf.reshape(image,[3,32,32])
    image = tf.transpose(image,[1,2,0])
    #image = tf.cast(image,tf.float32) ## DESTROYES IMAGE VIS
    
    ##PRE PROCESS
    if(distort):
        image = tf.random_crop(image, [24, 24, 3])
        image = tf.image.random_flip_left_right(image)
        image = tf.image.random_brightness(image,max_delta=0.4)
        image = tf.image.random_contrast(image,lower=0.5,upper=1.8)
    
    image = tf.image.convert_image_dtype(image,dtype=FLAGS.dtype)
    # Ensure that the random shuffling has good mixing properties.
    min_fraction_of_examples_in_queue = 0.4
    NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 50000
    min_queue_examples = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN *
                             min_fraction_of_examples_in_queue)
    
    images, label_batch = tf.train.shuffle_batch(
        [image, label],
        batch_size=batch_size,
        num_threads=FLAGS.num_preprocess_threads,
        capacity=min_queue_examples + 3 * batch_size,
        min_after_dequeue=min_queue_examples,
        seed=0)
    
    return [images,label_batch]

In [3]:
def inference(images):
    
    def weight_variable(shape,std=0.1,dtype=FLAGS.dtype):
      initializer = tf.truncated_normal_initializer(stddev=std, dtype=dtype)
      return tf.get_variable("Weights",shape,initializer=initializer,dtype=dtype)

    def bias_variable(shape,const=0.0,dtype=FLAGS.dtype):
      initializer = tf.constant_initializer(const,dtype)
      return tf.get_variable("biases",shape,initializer=initializer,dtype=dtype)
    
    print("input : ", images)
    
    with tf.variable_scope("conv1"):
        W_conv1 = weight_variable([5, 5, 3, 64],std=5e-2)
        b_conv1 = bias_variable([64],const=0.0)

        conv = tf.nn.conv2d(images,W_conv1,strides=[1,1,1,1],padding="SAME")
        h_conv1 = tf.nn.relu(conv + b_conv1)
        
        print("conv 1 : ",h_conv1)
        
    with tf.variable_scope("maxpool1_norm"):
        h_pool1 = tf.nn.max_pool(h_conv1,ksize=[1,3,3,1],strides=[1,2,2,1],padding="SAME")
        h_norm1 = tf.nn.lrn(h_pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1')
        
        print("pool_norm 1 : ",h_norm1)
        
    with tf.variable_scope("conv2"):
        W_conv2 = weight_variable([5, 5, 64, 64],std=5e-2)
        b_conv2 = bias_variable([64],const=0.1)

        conv = tf.nn.conv2d(h_norm1,W_conv2,strides=[1,1,1,1],padding="SAME")
        h_conv2 = tf.nn.relu(conv + b_conv2)
        
        print("conv 2 : ",h_conv2)
        
    with tf.variable_scope("norm_maxpool2"):
        h_norm2 = tf.nn.lrn(h_conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2')
        h_pool2 = tf.nn.max_pool(h_norm2,ksize=[1,3,3,1],strides=[1,2,2,1],padding="SAME")
        
        print("norm_pool 2 : ",h_pool2)
        
    with tf.variable_scope("Flatten"):
        h_pool2_flat = tf.reshape(h_pool2, [FLAGS.batch_size, -1])
        
        print("flatten : ",h_pool2_flat)
        
    with tf.variable_scope("fc1"):
        W_fc1 = weight_variable([h_pool2_flat.shape[1].value, 384],std=0.04)
        b_fc1 = bias_variable([384],const=0.1)
        h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
        
        print("fc1 : ",h_fc1)
        
    with tf.variable_scope("fc2"):
        W_fc2 = weight_variable([384, 192],std=0.04)
        b_fc2 = bias_variable([192],const=0.1)
        h_fc2 = tf.nn.relu(tf.matmul(h_fc1, W_fc2) + b_fc2)
        
        print("fc2 : ",h_fc2)
        
    with tf.variable_scope("logit"):
        W_fc3 = weight_variable([192, FLAGS.num_classes],std=1/192.0)
        b_fc3 = bias_variable([FLAGS.num_classes],const=0.0)
        logit = tf.nn.relu(tf.matmul(h_fc2, W_fc3) + b_fc3)
        
        print("logit : ",logit)
        
        return logit

In [4]:
with tf.variable_scope("Input-queue"):
    images,labels = distorted_inputs(FLAGS.data_dir,FLAGS.batch_size,distort=False)
    #tf.summary.image('images', images)
    
logit = inference(images)
tf.summary.histogram("Logits",logit)

input :  Tensor("Input-queue/shuffle_batch:0", shape=(128, 32, 32, 3), dtype=float32)
conv 1 :  Tensor("conv1/Relu:0", shape=(128, 32, 32, 64), dtype=float32)
pool_norm 1 :  Tensor("maxpool1_norm/norm1:0", shape=(128, 16, 16, 64), dtype=float32)
conv 2 :  Tensor("conv2/Relu:0", shape=(128, 16, 16, 64), dtype=float32)
norm_pool 2 :  Tensor("norm_maxpool2/MaxPool:0", shape=(128, 8, 8, 64), dtype=float32)
flatten :  Tensor("Flatten/Reshape:0", shape=(128, 4096), dtype=float32)
fc1 :  Tensor("fc1/Relu:0", shape=(128, 384), dtype=float32)
fc2 :  Tensor("fc2/Relu:0", shape=(128, 192), dtype=float32)
logit :  Tensor("logit/Relu:0", shape=(128, 10), dtype=float32)


<tf.Tensor 'Logits:0' shape=() dtype=string>

In [5]:
out = run_tf(logit)
out.shape

(128, 10)

### Loss function

In [6]:
def loss(logit,labels):
    
    with tf.variable_scope("cross-entropy"):
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf.reshape(labels,[FLAGS.batch_size]), logits=logit)
        avg_cross_entropy = tf.reduce_mean(cross_entropy)
        
        return avg_cross_entropy

### Regularizer

Regularizing the weights of fc1 and fc2

In [7]:
def regularizer(wd):
    fc1_w = tf.trainable_variables(scope='fc1/Weights')[0]
    wd_fc1 = tf.multiply(tf.nn.l2_loss(fc1_w), wd, name='fc1/weight_loss')
    tf.summary.scalar("fc1_decay",wd_fc1)

    fc2_w = tf.trainable_variables(scope='fc2/Weights')[0]
    wd_fc2 = tf.multiply(tf.nn.l2_loss(fc2_w), wd, name='fc2/weight_loss')
    tf.summary.scalar("fc2_decay",wd_fc2)
    
    return wd_fc1+wd_fc2

### Training

#### GRADIENT DESCENT

In [8]:
learning_rate = tf.placeholder(FLAGS.dtype)
decay_step = tf.placeholder(tf.int32)
decay_rate = tf.placeholder(FLAGS.dtype)
global_step = tf.train.get_or_create_global_step()

ce_loss = loss(logit,labels)
tf.summary.scalar("mean_cross_entropy",ce_loss)

total_loss = ce_loss + regularizer(0.004)
tf.summary.scalar("total_loss",total_loss)

<tf.Tensor 'total_loss:0' shape=() dtype=string>

In [9]:
lr = tf.train.exponential_decay(
                                learning_rate=learning_rate,
                                global_step=global_step,
                                decay_steps=decay_step,
                                decay_rate=decay_rate,
                                staircase=True)
tf.summary.scalar("learning_rate",lr)
opt = tf.train.GradientDescentOptimizer(lr)
#opt = tf.train.AdamOptimizer(lr)
grads = opt.compute_gradients(total_loss)

grad_to_compute = [[g,v] for g,v in grads if g is not None]
# for grad,var in grad_to_compute:
#     tf.summary.histogram("Params/" + var.name,var)
#     tf.summary.histogram("Gradients/" + var.name,grad)
    
train_op = opt.apply_gradients(grads, global_step=global_step)

#### RUN SESSION

In [10]:
feed_dict = {learning_rate:0.1,
            decay_rate : 0.1,
            decay_step:80000}

max_steps = 1700

In [11]:
merged = tf.summary.merge_all()

config = tf.ConfigProto()
config.gpu_options.allow_growth=True
with tf.Session(config=config) as sess: 
    
    writer = tf.summary.FileWriter("log/Train_init_calc/regularized_lrn/t1_SGD_lr0.1",sess.graph)
    
    sess.run(tf.global_variables_initializer())
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)

    for i in range(max_steps):
        _,loss,out,summary = sess.run([train_op,total_loss,logit,merged],feed_dict=feed_dict)
        writer.add_summary(summary,i)
        if(i%100 == 0):
            print(i, "Loss : ", "{:.2e}".format(loss))

    coord.request_stop()
    coord.join(threads)

0 Loss :  6.37e+00
100 Loss :  6.02e+00
200 Loss :  5.73e+00
300 Loss :  5.40e+00
400 Loss :  5.05e+00
500 Loss :  4.65e+00
600 Loss :  4.53e+00
700 Loss :  4.04e+00
800 Loss :  3.84e+00
900 Loss :  3.61e+00
1000 Loss :  3.41e+00
1100 Loss :  3.12e+00
1200 Loss :  2.98e+00
1300 Loss :  2.77e+00
1400 Loss :  2.76e+00
1500 Loss :  2.26e+00
1600 Loss :  2.49e+00
