In [1]:
#import
import os
import imageio
import glob
import matplotlib
import matplotlib.pyplot as plt

In [2]:
import numpy as np
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [3]:
from tensorflow.python.framework import ops
ops.reset_default_graph()

In [4]:
#set seed to get the same output each time we run the code
np.random.seed(42)
tf.set_random_seed(42)

In [5]:
#data import
def rgb2grey(rgb):
    return np.dot(rgb[..., :3], [0.299, 0.587, 0.114])

In [6]:
s= r'D:/gregg_data/'
s_len = len(s)

#collecting data from all images
images=[]
W= []
H= []
label=[]

#Read image data
for file in glob.glob(r"D:\gregg_data\*.png"):
    f=imageio.imread(file)
    H.append(f.shape[0])
    W.append(f.shape[1])
    g=rgb2grey(f)
    images.append(g)
    l=file[s_len:-4].lower()
    label.append(l)

In [7]:
#calculating the max height and width among all the images
max_H = np.max(H)
max_W = np.max(W)

print(max_H)
print(max_W)

131
214


In [8]:
#padding zeros to translate images to same size
def appendzeros(img,h,w):
    result = np.zeros(shape=(max_H,max_W,1))
    img.shape=(h,w,1)
    result[:h,:w,:] = img
    return result

In [9]:
#dividing the data into train, validation and test set
X_train = []
X_test = []
X_dev = []
Y_train = []
Y_test = []
Y_dev = []

for i in range(len(images)):
    img=appendzeros(images[i],H[i],W[i])
    if i % 20 == 0:
        X_dev.append(img)
        Y_dev.append(label[i])
    elif i % 20 == 1:
        X_test.append(img)
        Y_test.append(label[i])
    else:
        X_train.append(img)
        Y_train.append(label[i])

In [10]:
print(len(images))
print(len(label))
print(len(X_train))
print(len(X_test))
print(len(X_dev))

15709
15709
14137
786
786


In [11]:
#Translation dictionary 
alpha={
    'a':0,
    'b':1,
    'c':2,
    'd':3,
    'e':4,
    'f':5,
    'g':6,
    'h':7,
    'i':8,
    'j':9,
    'k':10,
    'l':11,
    'm':12,
    'n':13,
    'o':14,
    'p':15,
    'q':16,
    'r':17,
    's':18,
    't':19,
    'u':20,
    'v':21,
    'w':22,
    'x':23,
    'y':24,
    'z':25,
    '#':26, #<PAD>
    '+':27  #<GO>
}

In [12]:
#personalized one hot vector function
def one_hot(A):
    Y=[]
    for t in A:
        T=np.zeros(shape=(2,26))
        for i in range(26):
            if t[i]==1:
                T[1][i]=1.0
            else:
                T[0][i]=1.0
        Y.append(T)
    return Y

In [13]:
#prior probabilities in training data
def mplusmat():
    m= np.zeros(shape=(26,2))
    total_characters=0

    for l in Y_train:
        for i in range(len(l)):
            if l[i].isalpha():
                m[alpha[l[i]]][0]= m[alpha[l[i]]][0] + 1
                total_characters=total_characters+1

    for i in range(m.shape[0]):
        m[i][0]=m[i][0]/total_characters
        m[i][1]=1-m[i][0]
    
    return m

In [14]:
#translates the presence of characters into one vector of 26
def label_translate(t):
    t_label=np.zeros(shape=(26),dtype=int)
    for i in range(0,26):
        if i in t:
            t_label[i]=1
    return t_label

In [15]:
#translates all the characters in range 0-25
def alpha_translate(label):
    t=[]
    l=(list(label))
    for i in range(0,len(l)):
        if l[i].isalpha():
            t.append(alpha[l[i]])
    return label_translate(t)

In [16]:
#appending all the translated labels
def datafilter_alphabet():
    tag_train=list()
    tag_dev=list()
    tag_test=list()
        
    for label in Y_train:
        tag_train.append(alpha_translate(label))
        
    for label in Y_dev:
        tag_dev.append(alpha_translate(label))
            
    for label in Y_test:
        tag_test.append(alpha_translate(label))
           
    return np.asarray(tag_train),np.asarray(tag_dev),np.asarray(tag_test)

In [17]:
#squahing function to bring the output values into the range of 0 and 1
def squash(s, axis=-1, epsilon=1e-7, name=None):
    squared_norm = tf.reduce_mean(tf.square(s), axis=axis, keep_dims=True)
    sq_norm_vec = tf.sqrt(squared_norm + epsilon)
    squash_factor = squared_norm / (1. + squared_norm)
    unit_vector = s / sq_norm_vec
    return squash_factor * unit_vector

In [18]:
#routing algorithm
def routing(routing_iterations):
    #initial logit set to zero; b_i,j <- 0
    b_ij = tf.zeros([batch_size, primary_caps, alphacaps, alphacaps_label, 1, 1], dtype=np.float32, name="b_ij")
    for i in range(routing_iterations):
        #c_i <- softmax(b_i,j)
        c_i = tf.nn.softmax(b_ij, dim=2, name="c_i")
        #s_j <- sum_i(c_i,j* sec_caps_predicted)
        s = tf.multiply(c_i, sec_caps_predicted, name="s")
        s_j = tf.reduce_sum(s, axis=1, keep_dims=True, name="s_j")
        #v_j <- squash(s_j)
        v = squash(s_j, axis=-2, name="v")
        #duplicate v by caps1
        v_j = tf.tile(v, [1, primary_caps, 1, 1, 1, 1], name="v_j")
        #agreement
        agreement = tf.matmul(sec_caps_predicted, v_j, transpose_a=True, name="agreement")
        #update b_ij
        b_ij = tf.add(b_ij, agreement)
        #print("routing layer",i+1,"\n")
    
    return v

In [19]:
#calcutaing norm of a vector
def norm_vec(s, axis=-1, epsilon=1e-7, keep_dims=False, name=None):
    squared_norm_1 = tf.reduce_sum(tf.square(s), axis=axis, keep_dims=keep_dims)
    print(s)
    return tf.sqrt(squared_norm_1 + epsilon)

In [20]:
#tuning hyperparameters
n_epochs = 6
n_iterations_per_epoch = 157
b_size=15
n_iter_val_test = 131

In [21]:
#placeholder for input pixels
X=tf.placeholder(shape=[None,max_H,max_W, 1], dtype=tf.float32, name="X")
X

<tf.Tensor 'X:0' shape=(?, 131, 214, 1) dtype=float32>

In [22]:
#number of capsules grids
caps_grid=32
#number of capsules
primary_caps=caps_grid * 7 * 12
#number of dimensions
primary_caps_dims=8

In [23]:
#parameters of convolution
conv1_parameters = {
    "filters": 32,
    "kernel_size": 3,
    "strides": 1,
    "padding": "valid",
    "activation": tf.nn.relu,
}
conv2_parameters = {
    "filters": 64,
    "kernel_size": 3,
    "strides": 2,
    "padding": "valid",
    "activation": tf.nn.relu
}
conv3_parameters = {
    "filters": 128,
    "kernel_size": 3,
    "strides": 2,
    "padding": "valid",
    "activation": tf.nn.relu
}
conv4_parameters = {
    "filters": 256, 
    "kernel_size": 3,
    "strides": 2,
    "padding": "valid",
    "activation": tf.nn.relu
}
conv5_parameters = {
    "filters": caps_grid * primary_caps_dims, 
    "kernel_size": 3,
    "strides": 2,
    "padding": "valid",
    "activation": tf.nn.relu
}

We use five convolutional layers, each layer except for the the last ones is followed by a batch normalization function.

In [24]:
epsilon = 1e-3

In [25]:
conv1 = tf.layers.conv2d(X, name="conv1", **conv1_parameters)
print (conv1)

Tensor("conv1/Relu:0", shape=(?, 129, 212, 32), dtype=float32)


In [26]:
mean1, var1 = tf.nn.moments(conv1,[0])
scale1 = tf.Variable(tf.ones([32]))
beta1 = tf.Variable(tf.zeros([32]))
BN1 = tf.nn.batch_normalization(conv1,mean1,var1,beta1,scale1,epsilon)
print(BN1)

Tensor("batchnorm/add_1:0", shape=(?, 129, 212, 32), dtype=float32)


In [27]:
conv2 = tf.layers.conv2d(BN1, name="conv2", **conv2_parameters)
print (conv2)

Tensor("conv2/Relu:0", shape=(?, 64, 105, 64), dtype=float32)


In [28]:
mean2, var2 = tf.nn.moments(conv2,[0])
scale2 = tf.Variable(tf.ones([64]))
beta2 = tf.Variable(tf.zeros([64]))
BN2 = tf.nn.batch_normalization(conv2,mean2,var2,beta2,scale2,epsilon)
print(BN2)

Tensor("batchnorm_1/add_1:0", shape=(?, 64, 105, 64), dtype=float32)


In [29]:
conv3 = tf.layers.conv2d(BN2, name="conv3", **conv3_parameters)
print (conv3)

Tensor("conv3/Relu:0", shape=(?, 31, 52, 128), dtype=float32)


In [30]:
mean3, var3 = tf.nn.moments(conv3,[0])
scale3 = tf.Variable(tf.ones([128]))
beta3 = tf.Variable(tf.zeros([128]))
BN3 = tf.nn.batch_normalization(conv3,mean3,var3,beta3,scale3,epsilon)
print(BN3)

Tensor("batchnorm_2/add_1:0", shape=(?, 31, 52, 128), dtype=float32)


In [31]:
conv4 = tf.layers.conv2d(BN3, name="conv4", **conv4_parameters)
print (conv3)

Tensor("conv3/Relu:0", shape=(?, 31, 52, 128), dtype=float32)


In [32]:
mean4, var4 = tf.nn.moments(conv4,[0])
scale4 = tf.Variable(tf.ones([256]))
beta4 = tf.Variable(tf.zeros([256]))
BN4 = tf.nn.batch_normalization(conv4,mean4,var4,beta4,scale4,epsilon)
print(BN4)

Tensor("batchnorm_3/add_1:0", shape=(?, 15, 25, 256), dtype=float32)


In [33]:
conv5 = tf.layers.conv2d(BN4, name="conv5", **conv5_parameters)
print (conv5)

Tensor("conv5/Relu:0", shape=(?, 7, 12, 256), dtype=float32)


In [34]:
#reshape to get 8D output vectors from primary capsules
primary_caps_reshaped = tf.reshape(conv5, [-1, primary_caps, primary_caps_dims],name="primary_caps_reshaped")
print(primary_caps_reshaped)

Tensor("primary_caps_reshaped:0", shape=(?, 2688, 8), dtype=float32)


In [35]:
#squashing
primary_caps_output = squash(primary_caps_reshaped, name="primary_caps_output")
print (primary_caps_output)

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Tensor("mul:0", shape=(?, 2688, 8), dtype=float32)


In [36]:
#Alphabet Capsule
alphacaps = 26
alphacaps_label = 2
alphacaps_dims = 16

In [37]:
init_sigma = 0.01

#initialize weight matrix for transformation
W_random = tf.random_normal(
    shape=(1, primary_caps, alphacaps, alphacaps_label, alphacaps_dims, primary_caps_dims),name="W_random",
    stddev=init_sigma, dtype=tf.float32)
W = tf.Variable(W_random, name="W")
print(W.shape)

(1, 2688, 26, 2, 16, 8)


In [38]:
#create weight matrix for entire batch size
batch_size = tf.shape(X)[0]
W_tiled = tf.tile(W, [batch_size, 1, 1, 1, 1, 1], name="W_tiled")
W_tiled

<tf.Tensor 'W_tiled:0' shape=(?, 2688, 26, 2, 16, 8) dtype=float32>

In [39]:
#preparing output for multiplication
prim_caps_out_exp = tf.expand_dims(primary_caps_output, -1, name="prim_caps_out_exp")
prim_caps_out_tile = tf.expand_dims(prim_caps_out_exp, 2, name="prim_caps_out_tile")
prim_caps_out_tile2 = tf.expand_dims(prim_caps_out_tile, 3, name="prim_caps_out_tile2")
prim_caps_out_tile3 = tf.tile(prim_caps_out_tile2, [1, 1, alphacaps, alphacaps_label, 1, 1], name="prim_caps_out_tile3")
print(prim_caps_out_exp)
print(prim_caps_out_tile)
print(prim_caps_out_tile2)
print(prim_caps_out_tile3)

Tensor("prim_caps_out_exp:0", shape=(?, 2688, 8, 1), dtype=float32)
Tensor("prim_caps_out_tile:0", shape=(?, 2688, 1, 8, 1), dtype=float32)
Tensor("prim_caps_out_tile2:0", shape=(?, 2688, 1, 1, 8, 1), dtype=float32)
Tensor("prim_caps_out_tile3:0", shape=(?, 2688, 26, 2, 8, 1), dtype=float32)


In [40]:
#transformation of 8D vectors to 16D vectors
sec_caps_predicted = tf.matmul(W_tiled, prim_caps_out_tile3, name="sec_caps_predicted")

In [41]:
#(16X8) matmul by (8X1) => (16X1)
sec_caps_predicted

<tf.Tensor 'sec_caps_predicted:0' shape=(?, 2688, 26, 2, 16, 1) dtype=float32>

In [42]:
#output from secondary capsules via roouting
sec_caps_output = routing(6)

Instructions for updating:
dim is deprecated, use axis instead
Instructions for updating:
keep_dims is deprecated, use keepdims instead


In [43]:
#probabilities of each label class
y_proba = norm_vec(sec_caps_output, axis=-2, name="y_proba")
y_proba

Tensor("mul_6:0", shape=(?, 1, 26, 2, 16, 1), dtype=float32)


<tf.Tensor 'Sqrt_7:0' shape=(?, 1, 26, 2, 1) dtype=float32>

In [44]:
y_proba_max=tf.reduce_max(y_proba, axis=3, name="y_proba_max")
y_proba_max

<tf.Tensor 'y_proba_max:0' shape=(?, 1, 26, 1) dtype=float32>

In [45]:
y_proba_argmax = tf.argmax(y_proba, axis=3, name="y_proba")
y_proba_argmax

<tf.Tensor 'y_proba:0' shape=(?, 1, 26, 1) dtype=int64>

In [46]:
#predicted vector
y_pred = tf.squeeze(y_proba_argmax, axis=[1,3], name="y_pred")

In [47]:
y_pred

<tf.Tensor 'y_pred:0' shape=(?, 26) dtype=int64>

In [48]:
#input vector
y = tf.placeholder(shape=[None,26], dtype=tf.int64, name="y")
y

<tf.Tensor 'y:0' shape=(?, 26) dtype=int64>

In [49]:
#one_hot representation of input vector
T= tf.placeholder(shape=[None,2,26], dtype=tf.float32, name="T")
T

<tf.Tensor 'T:0' shape=(?, 2, 26) dtype=float32>

In [50]:
#setting parameters for loss function
m_plus = mplusmat()
m_minus = 1- m_plus
lambda_ = 1.5

In [51]:
sec_output_norm = norm_vec(sec_caps_output, axis=-2, keep_dims=True, name="sec_output_norm")
sec_output_norm

Tensor("mul_6:0", shape=(?, 1, 26, 2, 16, 1), dtype=float32)


<tf.Tensor 'Sqrt_8:0' shape=(?, 1, 26, 2, 1, 1) dtype=float32>

In [52]:
sec_out_norm_reshape= tf.reshape(sec_output_norm, shape=(-1, alphacaps, alphacaps_label))

In [53]:
present_error = tf.square(tf.maximum(0., m_plus - sec_out_norm_reshape), name="present_error")
print(present_error)

Tensor("present_error:0", shape=(?, 26, 2), dtype=float32)


In [54]:
absent_error = tf.square(tf.maximum(0., sec_out_norm_reshape- m_minus), name="absent_error")
print(absent_error)

Tensor("absent_error:0", shape=(?, 26, 2), dtype=float32)


In [55]:
L = tf.add(tf.matmul(present_error,T), lambda_ * tf.matmul(absent_error,(1.0 - T)), name="L")
L

<tf.Tensor 'L:0' shape=(?, 26, 26) dtype=float32>

In [56]:
#Loss
margin_loss = tf.reduce_mean(tf.reduce_sum(L, axis=2), name="margin_loss")
margin_loss

<tf.Tensor 'margin_loss:0' shape=() dtype=float32>

In [57]:
#existence prediction accuracy
accuracy = tf.reduce_mean(tf.cast(tf.equal(y, y_pred), tf.float32), name="accuracy")

In [58]:
def next_batch(A,B,i,size):
    x= (i-1)*size
    y= (i)*size  
            
    x_batch= A[x:y]
    y_batch= B[x:y]
    y_vec= one_hot(y_batch)
    
    return x_batch,y_batch,y_vec

In [59]:
optimizer = tf.train.AdamOptimizer()
training_op = optimizer.minimize(margin_loss, name="training_op")

In [60]:
init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [62]:
#training the capsule network
restore_checkpoint = True
best_loss_val = np.infty
checkpoint_path = "./my_gregg_capsule_network26_labelvec_batchnorm4"

with tf.Session() as sess:
    if restore_checkpoint and tf.train.checkpoint_exists(checkpoint_path):
        saver.restore(sess, checkpoint_path)
    else:
        init.run()

    train_tag, dev_tag, test_tag = datafilter_alphabet()
    
    for epoch in range(n_epochs):
        for iteration in range(1, n_iterations_per_epoch + 1):
            train_X,train_Y,train_Y_vec=next_batch(X_train,train_tag,iteration,b_size)
                        
            _, loss_train = sess.run([training_op, margin_loss],feed_dict={X: train_X,y: train_Y, T: train_Y_vec})
            print("\rIteration: {}/{} {:.1f}%  Loss: {:.5f}".format(iteration, n_iterations_per_epoch,
                     iteration * 100 / n_iterations_per_epoch,loss_train), end="")
            
        
        dev_X,dev_Y,dev_Y_vec=next_batch(X_dev,dev_tag,epoch+1,n_iter_val_test)
        loss_val, acc_val = sess.run([margin_loss, accuracy],feed_dict={X: dev_X, y: dev_Y, T: dev_Y_vec})
        
        print("\rEvaluating the model ",epoch+1,":")
        print("\rVal accuracy: {:.4f}%  Loss: {:.6f}".format(acc_val * 100, loss_val))

        if loss_val < best_loss_val:
            save_path = saver.save(sess, checkpoint_path)
            best_loss_val = loss_val

Evaluating the model  1 :  Loss: 3.97591
Val accuracy: 43.0417%  Loss: 4.615107
Evaluating the model  2 :  Loss: 4.06781
Val accuracy: 57.8391%  Loss: 4.981878
Evaluating the model  3 :  Loss: 3.91798
Val accuracy: 37.0816%  Loss: 4.593509
Evaluating the model  4 :  Loss: 3.93308
Val accuracy: 63.4469%  Loss: 4.963714
Evaluating the model  5 :  Loss: 3.92564
Val accuracy: 43.1298%  Loss: 4.589741
Evaluating the model  6 :  Loss: 4.17181
Val accuracy: 34.1163%  Loss: 4.616359


In [64]:
#Evaluation

with tf.Session() as sess:
    saver.restore(sess, checkpoint_path)

    loss_tests = []
    acc_tests = []
    for epoch in range(n_epochs):
        
        test_X,test_Y,test_Y_vec=next_batch(X_test,test_tag,epoch+1,n_iter_val_test)
        loss_test, acc_test = sess.run([margin_loss, accuracy],feed_dict={X: test_X, y: test_Y, T: test_Y_vec})
        
        print("\rEvaluating the model ",epoch+1,":")
        print("\rVal accuracy: {:.4f}%  Loss: {:.6f}".format(acc_test * 100, loss_test))

        loss_tests.append(loss_test)
        acc_tests.append(acc_test)

    loss_test = np.mean(loss_tests)
    acc_test = np.mean(acc_tests)
    print("\rFinal test accuracy: {:.4f}%  Loss: {:.6f}".format(acc_test * 100, loss_test))
   

INFO:tensorflow:Restoring parameters from ./my_gregg_capsule_network26_labelvec_batchnorm4
Evaluating the model  1 :
Val accuracy: 35.2613%  Loss: 4.531071
Evaluating the model  2 :
Val accuracy: 42.5426%  Loss: 5.210499
Evaluating the model  3 :
Val accuracy: 45.0969%  Loss: 4.867248
Evaluating the model  4 :
Val accuracy: 36.8174%  Loss: 4.619494
Evaluating the model  5 :
Val accuracy: 46.8585%  Loss: 4.853778
Evaluating the model  6 :
Val accuracy: 41.9260%  Loss: 4.457479
Final test accuracy: 41.4171%  Loss: 4.756595
