<a href="https://colab.research.google.com/github/iamdsc/deep_learning/blob/master/8_cnn_tf_layers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Implementing CNN with tf Layers API

In [0]:
import os
import struct
import numpy as np

### Loading and pre-processing the data

In [0]:
def load_mnist(path, kind='train'):
    """Load MNIST data from path"""
    labels_path=os.path.join(path, '%s-labels.idx1-ubyte' % kind)
    images_path=os.path.join(path, '%s-images.idx3-ubyte' % kind)
    
    with open(labels_path, 'rb') as lbpath:
        magic, n=struct.unpack('>II', lbpath.read(8))
        labels=np.fromfile(lbpath, dtype=np.uint8)
    
    with open(images_path, 'rb') as imgpath:
        magic, num, rows, cols=struct.unpack('>IIII', imgpath.read(16))
        images=np.fromfile(imgpath, dtype=np.uint8).reshape(len(labels), 784)
        images=((images/255.0)-0.5)*2 #normalized to -1 to 1 range
    
    return images, labels

In [10]:
#downloading dataset from github
!wget https://github.com/iamdsc/deep_learning/raw/master/data.zip
!mkdir data
!unzip data.zip -d data/


X_data, y_data=load_mnist('data/data/',kind='train')
print('Rows:{},Columns:{}'.format(X_data.shape[0], X_data.shape[1]))
X_test, y_test=load_mnist('data/data/',kind='t10k')
print('Rows:{},Columns:{}'.format(X_test.shape[0], X_data.shape[1]))

X_train, y_train=X_data[:50000,:], y_data[:50000]
X_valid, y_valid=X_data[50000:,:], y_data[50000:]
print('Training: ',X_train.shape, y_train.shape)
print('Validation: ',X_valid.shape, y_valid.shape)
print('Test Set: ',X_test.shape, y_test.shape)

--2019-02-24 13:40:36--  https://github.com/iamdsc/deep_learning/raw/master/data.zip
Resolving github.com (github.com)... 192.30.253.112, 192.30.253.113
Connecting to github.com (github.com)|192.30.253.112|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/iamdsc/deep_learning/master/data.zip [following]
--2019-02-24 13:40:36--  https://raw.githubusercontent.com/iamdsc/deep_learning/master/data.zip
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 11683796 (11M) [application/zip]
Saving to: ‘data.zip.1’


2019-02-24 13:40:36 (83.0 MB/s) - ‘data.zip.1’ saved [11683796/11683796]

mkdir: cannot create directory ‘data’: File exists
Archive:  data.zip
replace data/data/t10k-images.idx3-ubyte? [y]es, [n]o, [A]

In [0]:
def batch_generator(X, y, batch_size=64, shuffle=False, random_seed=None):
    " Function for iterating through mini-batches of data "
    idx=np.arange(y.shape[0])
    
    if shuffle:
        rng=np.random.RandomState(random_seed)
        rng.shuffle(idx)
        X=X[idx]
        y=y[idx]
    
    for i in range(0, X.shape[0], batch_size):
        yield (X[i:i+batch_size,:], y[i:i+batch_size])

In [0]:
# Normalizing the data
mean_vals=np.mean(X_train, axis=0)
std_val=np.std(X_train)

X_train_centered=(X_train-mean_vals)/std_val
X_valid_centered=(X_valid-mean_vals)/std_val
X_test_centered=(X_test-mean_vals)/std_val

In [0]:
# Implementing the model
import tensorflow as tf

class ConvNN(object):
  def __init__(self,batchsize=64,epochs=20,learning_rate=1e-4,dropout_rate=0.5,shuffle=True,random_seed=None):
    np.random.seed(random_seed)
    self.batchsize=batchsize
    self.epochs=epochs
    self.learning_rate=learning_rate
    self.dropout_rate=dropout_rate
    self.shuffle=shuffle
    
    g=tf.Graph()
    with g.as_default():
      # set random-seed
      tf.set_random_seed(random_seed)
      
      # build the network
      self.build()
      
      # initializer
      self.init_op=tf.global_variables_initializer()
      
      # saver
      self.saver=tf.train.Saver()
      
      # create a session
      self.sess=tf.Session(graph=g)
  
  def build(self):
    # Placeholders for X and y:
    tf_x=tf.placeholder(tf.float32,shape=[None,784],name='tf_x')
    tf_y=tf.placeholder(tf.int32,shape=[None],name='tf_y')
    is_train=tf.placeholder(tf.bool,shape=(),name='is_train')
    
    # reshape x to a 4D tensor: [batchsize,width,height,1]
    tf_x_image=tf.reshape(tf_x,shape=[-1,28,28,1],name='input_x_2dimages')
    
    # one-hot encoding
    tf_y_onehot=tf.one_hot(indices=tf_y,depth=10,dtype=tf.float32,name='input_y_onehot')
    
    # 1st layer: Conv_1
    h1=tf.layers.conv2d(tf_x_image,kernel_size=(5,5),filters=32,activation=tf.nn.relu)
    
    # MaxPooling
    h1_pool=tf.layers.max_pooling2d(h1,pool_size=(2,2),strides=(2,2))
    
    # 2nd layer: Conv_2
    h2=tf.layers.conv2d(h1_pool,kernel_size=(5,5),filters=64,activation=tf.nn.relu)
    
    # MaxPooling
    h2_pool=tf.layers.max_pooling2d(h2,pool_size=(2,2),strides=(2,2))
    
    # 3rd layer: Fully Connected
    input_shape=h2_pool.get_shape().as_list()
    n_input_units=np.prod(input_shape[1:])
    h2_pool_flat=tf.reshape(h2_pool,shape=[-1,n_input_units])
    
    h3=tf.layers.dense(h2_pool_flat,1024,activation=tf.nn.relu)
    
    # Dropout
    h3_drop=tf.layers.dropout(h3,rate=self.dropout_rate,training=is_train)
    
    # 4th layer: Fully Connected (linear activation)
    h4=tf.layers.dense(h3_drop,10,activation=None)
    
    # Prediction
    predictions={'probabilites':tf.nn.softmax(h4,name='probabilities'),'labels':tf.cast(tf.argmax(h4,axis=1),tf.int32,name='labels')}
    
    # loss function and optimization
    cross_entropy_loss=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=h4,labels=tf_y_onehot),name='cross_entropy_loss')
    
    # optimizer
    optimizer=tf.train.AdamOptimizer(self.learning_rate)
    optimizer=optimizer.minimize(cross_entropy_loss,name='train_op')
    
    # finding accuracy
    correct_predictions=tf.equal(predictions['labels'],tf_y,name='correct_preds')
    accuracy=tf.reduce_mean(tf.cast(correct_predictions,tf.float32),name='accuracy')
    
  def save(self,epoch,path='./tflayers-model/'):
    if not os.path.isdir(path):
      os.makedirs(path)
    print('Saving model in %s' % path)
    self.saver.save(self.sess,os.path.join(path,'model.ckpt'),global_step=epoch)
    
  def load(self,epoch,path):
    print('Loading model from %s' % path)
    self.saver.restore(self.sess,os.path.join(path,'model.ckpt-%d' % epoch))
  
  def train(self,training_set,validation_set=None,initialize=True):
    # initialize variables
    if initialize:
      self.sess.run(self.init_op)
    
    self.train_cost_=[]
    X_data=np.array(training_set[0])
    y_data=np.array(training_set[1])
    
    for epoch in range(1,self.epochs+1):
      batch_gen=batch_generator(X_data,y_data,shuffle=self.shuffle)
      avg_loss=0.0
      for i,(batch_x,batch_y) in enumerate(batch_gen):
        feed={'tf_x:0':batch_x,'tf_y:0':batch_y,'is_train:0':True}
        loss,_=self.sess.run(['cross_entropy_loss:0','train_op'],feed_dict=feed)
        avg_loss+=loss
      
      print('Epoch %02d: Training Avg. Loss: %7.3f'%(epoch,avg_loss), end=' ')
      
      if validation_set is not None:
        feed={'tf_x:0':batch_x,'tf_y:0':batch_y,'is_train:0':False}
        valid_acc=self.sess.run('accuracy:0',feed_dict=feed)
        print('Validation Acc: %7.3f' % valid_acc)
      else:
        print()
  
  def predict(self,X_test,return_proba=False):
    feed={'tf_x:0':X_test,'is_train:0':False}
    if return_proba:
      return self.sess.run('probabilities:0',feed_dict=feed)
    else:
      return self.sess.run('labels:0',feed_dict=feed)

In [15]:
cnn=ConvNN(random_seed=123)

# training the model
cnn.train(training_set=(X_train_centered,y_train),validation_set=(X_valid_centered,y_valid),initialize=True)
cnn.save(epoch=20)

Instructions for updating:
Use keras.layers.conv2d instead.
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.max_pooling2d instead.
Instructions for updating:
Use keras.layers.dense instead.
Instructions for updating:
Use keras.layers.dropout instead.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

Epoch 01: Training Avg. Loss: 266.056 Validation Acc:   1.000
Epoch 02: Training Avg. Loss:  72.895 Validation Acc:   1.000
Epoch 03: Training Avg. Loss:  49.570 Validation Acc:   1.000
Epoch 04: Training Avg. Loss:  39.510 Validation Acc:   1.000
Epoch 05: Training Avg. Loss:  31.900 Validation Acc:   1.000
Epoch 06: Training Avg. Loss:  26.443 Validation Acc:   1.000
Epo

In [16]:
# performing prediction
cnn2=ConvNN(random_seed=123)
cnn2.load(epoch=20,path='./tflayers-model/')

print(cnn2.predict(X_test_centered[:10,:]))

Loading model from ./tflayers-model/
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from ./tflayers-model/model.ckpt-20
[7 2 1 0 4 1 4 9 5 9]


In [17]:
# Calculating test accuracy
preds=cnn2.predict(X_test_centered)
print('Test Accuracy: %.2f%%' % (100*np.sum(y_test==preds)/len(y_test)))

Test Accuracy: 99.25%
