# Recurrent Convolutional Neural Network for Object recognition

Implementation of simple rcnn using mnist

In [1]:
import tensorflow as tf

In [2]:
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('../MNIST_data', validation_size=0, one_hot=True)

Extracting ../MNIST_data/train-images-idx3-ubyte.gz
Extracting ../MNIST_data/train-labels-idx1-ubyte.gz
Extracting ../MNIST_data/t10k-images-idx3-ubyte.gz
Extracting ../MNIST_data/t10k-labels-idx1-ubyte.gz


In [3]:
batch_size = 100

In [4]:
def define_inputs():
    x = tf.placeholder(dtype=tf.float32, shape=[batch_size, 28, 28, 1])
    y_ = tf.placeholder(dtype=tf.float32, shape=[batch_size, 10])
    keep_prob = tf.placeholder(dtype=tf.float32)
    return x, y_, keep_prob

### Recurrent Convolutional Layer (RCL)

In [13]:
def rcl(u, filters, kernel_size, name, timesteps=4):
    
    assert timesteps > 0, "timesteps should be greater than 0"
    
    with tf.variable_scope(name):
        wf_u = tf.layers.conv2d(inputs=u, filters=filters, kernel_size=kernel_size, 
                                padding="same", name="wf_u")
        wr = lambda x, reuse: tf.layers.conv2d(inputs=x, filters=filters, kernel_size=kernel_size, 
                                               padding="same", name="wr_x", reuse=reuse)
#         bias = tf.get_variable(name="bias", shape=wf_u.get_shape())
#         activ = lambda z: tf.nn.relu(tf.layers.batch_normalization(z), name="activ")
        activ = lambda z: tf.nn.relu(z)

        def rcl_step(state, timestep):
            print("rcl_step")
            if timestep == 0:
                print("rcl_building 0")
                state_new = activ(wf_u)
                print("in", state_new)
            elif timestep == 1:
                print("rcl_building 1")
                state_new = activ(wf_u + wr(state, reuse=False))
            else:
                print("rcl_building 2")
                state_new = activ(wf_u + wr(state, reuse=True))
            return state_new

        state_t = None
        print("start loop")
        for t in range(timesteps):
            print("loo", t)
            state_new = rcl_step(state_t, t)
            state_t = state_new
        
        print(state_t)
        return state_t

In [17]:
tf.reset_default_graph()
x, y_, keep_prob = define_inputs()
conv1 = rcl(x, filters=16, kernel_size=3, name="rcl1", timesteps=2)
print(conv1)
# conv1 = tf.layers.conv2d(inputs=x, filters=16, kernel_size=3, padding="same", activation=tf.nn.relu, name="my_conv1")
pool_1 = tf.layers.max_pooling2d(conv1, pool_size=[2, 2], strides=2, name="my_pool1")
conv2 = tf.layers.conv2d(inputs=pool_1, filters=16, kernel_size=3, padding="same", activation=tf.nn.relu, name="my_conv2")
pool = tf.layers.max_pooling2d(conv2, pool_size=[2, 2], strides=2, name="my_pool2")
flat = tf.layers.flatten(pool, name="my_flat")
fc = tf.layers.dense(flat, units=1028, activation=tf.nn.softmax, name="my_fc")
drop = tf.layers.dropout(fc, rate=keep_prob, name="my_drop")
logit = tf.layers.dense(drop, units=10, name="my_logit")

cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=logit, name="sfce"), name="cross")

tf.summary.scalar(name="xentro", tensor=cross_entropy)

train_step = tf.train.AdamOptimizer(0.001, name="my_adam").minimize(cross_entropy, name="adam_min")
correct_prediction = tf.equal(tf.argmax(logit,1), tf.argmax(y_,1), name="correct_pred")
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32), name="my_acc")
train_acc = []

start loop
loo 0
rcl_step
rcl_building 0
in Tensor("rcl1/activ:0", shape=(100, 28, 28, 16), dtype=float32)
loo 1
rcl_step
rcl_building 1
Tensor("rcl1/activ_1:0", shape=(100, 28, 28, 16), dtype=float32)
Tensor("rcl1/activ_1:0", shape=(100, 28, 28, 16), dtype=float32)


In [18]:
def train_model():
    with tf.Session() as sess:
        
        train_writer = tf.summary.FileWriter('./train', sess.graph)
         
        sess.run(tf.global_variables_initializer())
        
        for i in range(3000):
            batch = mnist.train.next_batch(batch_size)
            imgs = batch[0].reshape(batch_size, 28, 28, 1)
            labels = batch[1]
            if i%100 == 0:
                train_accuracy = accuracy.eval(feed_dict={x:imgs, y_: labels, keep_prob: 1})
                train_acc.append(train_accuracy)
                print("step %d, training accuracy %g"%(i, train_accuracy))
            train_step.run(feed_dict={x: imgs, y_: batch[1], keep_prob: 0.5})
        
        test_acc = 0
        for j in range(1000):
            batch_test = mnist.test.next_batch(batch_size)
            imgs_test = batch[0].reshape(-1, 28, 28, 1)
            labels_test = batch[1]
            test_acc += accuracy.eval(
                feed_dict={x: imgs_test, y_: labels_test, keep_prob: 1})
        test_acc = test_acc/1000
        print("test accuracy: {0:.4}".format(test_acc))
    
    return train_acc, test_acc 

In [None]:
train_model()

step 0, training accuracy 0.11
step 100, training accuracy 0.79
step 200, training accuracy 0.85
step 300, training accuracy 0.88
step 400, training accuracy 0.88
step 500, training accuracy 0.91
