# In Graph Replication Asynchronous

In [2]:
import numpy as np
import tensorflow as tf
from keras.datasets import mnist

Using TensorFlow backend.


In [3]:
tf.__version__

'1.11.0'

In [4]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [5]:
print("Train Set: {}".format(x_train.shape))
print("Test Set: {}".format(x_test.shape))

Train Set: (60000, 28, 28)
Test Set: (10000, 28, 28)


In [6]:
print("Train Set: {}".format(y_train.shape))
print("Test Set: {}".format(y_test.shape))

Train Set: (60000,)
Test Set: (10000,)


In [7]:
cluster = tf.train.ClusterSpec(
    {
        "ps": ["172.17.0.1:2222"],
        "worker": ["172.17.0.2:2223",
                   "172.17.0.3:2224",
                   "172.17.0.4:2225",
                   "172.17.0.5:2226"]
    }
)

In [6]:
x_train = x_train/255.0
x_test = x_test/255.0

In [7]:
y_train = np.eye(10)[y_train]
y_test = np.eye(10)[y_test]

if y_train.shape[1] != 10:
    y_train = y_train[:,0]
    y_test = y_test[:,0]

In [8]:
print("Train Set: {}".format(y_train.shape))
print("Test Set: {}".format(y_test.shape))

Train Set: (60000, 10)
Test Set: (10000, 10)


In [9]:
x = tf.placeholder(tf.float32, shape=[None, 28, 28], name="x")
y = tf.placeholder(tf.int8, shape=[None, 10], name="y")

## Sub Graph on Parameter Server 1

In [10]:
with tf.device("/job:ps/task:0"):  
    # Input Layer
    input_layer = tf.reshape(x, [-1, 28, 28, 1])

    # Convolutional Layer #1
    conv1 = tf.layers.conv2d(
                              inputs=input_layer,
                              filters=32,
                              kernel_size=[5, 5],
                              padding="same",
                              activation=tf.nn.relu
                            )

    # Pooling Layer #1
    pool1 = tf.layers.max_pooling2d(
                                    inputs=conv1, 
                                    pool_size=[2, 2], 
                                    strides=2
                                    )

    # Convolutional Layer #2
    conv2 = tf.layers.conv2d(
                              inputs=pool1,
                              filters=64,
                              kernel_size=[5, 5],
                              padding="same",
                              activation=tf.nn.relu
                            )

    # Pooling Layer #2
    pool2 = tf.layers.max_pooling2d(
                                    inputs=conv2, 
                                    pool_size=[2, 2], 
                                    strides=2
                                    )

    # Dense Layer
    pool2_flat = tf.reshape(
                            tensor=pool2, 
                            shape=[-1, 7 * 7 * 64]
                            )

    dense = tf.layers.dense(
                            inputs=pool2_flat, 
                            units=1024, 
                            activation=tf.nn.relu
                            )

    dropout = tf.layers.dropout(
                                inputs=dense, 
                                rate=0.4, 
                                )

    # Logits Layer
    logits = tf.layers.dense(
                            inputs=dropout, 
                            units=10
                            )

In [11]:
NO_OF_WORKERS = 4

## Sub Graph on Worker Node

In [12]:
OUTPUTS = {}

for i in range(NO_OF_WORKERS):
    with tf.device("/job:worker/task:{}".format(i)): 
        # Calculate Loss (for both TRAIN and EVAL modes)
        loss = tf.losses.sparse_softmax_cross_entropy(labels=tf.argmax(y, axis=1), logits=logits)


        # Configure the Training Op (for TRAIN mode)
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
        train_op = optimizer.minimize(
                                    loss=loss,
                                    global_step=tf.train.get_global_step()
                                  )


        # Add evaluation metrics (for EVAL mode)
        correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(y, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

        OUTPUTS[i] = [loss, correct_pred, accuracy]

In [13]:
EPOCHS = 6000
BATCH_SIZE = 16

In [14]:
sess = tf.InteractiveSession()

In [15]:
sess.run(tf.global_variables_initializer())

for i in range(EPOCHS):
    idx = np.random.randint(0, x_train.shape[0], size=BATCH_SIZE)

    x_batch = x_train[idx]
    y_batch = y_train[idx]

    sess.run(train_op, feed_dict={ x: x_batch, y: y_batch })
    LOSS, ACC = sess.run([loss, accuracy], feed_dict={ x: x_batch, y: y_batch })

    if i%100 == 0:
        TEST_ACC = 0
        count = 0
        j=0
        while j<x_test.shape[0] :
            TEST_ACC += sess.run(accuracy, feed_dict={ 
                            x: x_test[j:min(j+BATCH_SIZE, x_test.shape[0])], 
                            y: y_test[j:min(j+BATCH_SIZE, x_test.shape[0])]
                                                    })

            j = j+BATCH_SIZE
            count += 1

        print("Epochs: {:4d}   Loss:{:.6f}   Val_Acc:{:.3f}%   Test_Acc:{:.3f}%".format(i, LOSS, ACC*100, 100*TEST_ACC/count))

Epochs:    0   Loss:2.288923   Val_Acc:12.500%   Test_Acc:10.360%
Epochs:  100   Loss:2.287751   Val_Acc:18.750%   Test_Acc:18.590%
Epochs:  200   Loss:2.254906   Val_Acc:12.500%   Test_Acc:19.790%
Epochs:  300   Loss:2.265726   Val_Acc:31.250%   Test_Acc:32.460%
Epochs:  400   Loss:2.171551   Val_Acc:62.500%   Test_Acc:34.600%
Epochs:  500   Loss:2.251521   Val_Acc:18.750%   Test_Acc:42.720%
Epochs:  600   Loss:2.168793   Val_Acc:37.500%   Test_Acc:45.590%
Epochs:  700   Loss:2.086233   Val_Acc:62.500%   Test_Acc:56.600%
Epochs:  800   Loss:2.075069   Val_Acc:50.000%   Test_Acc:63.570%
Epochs:  900   Loss:1.883248   Val_Acc:62.500%   Test_Acc:66.720%
Epochs: 1000   Loss:1.811375   Val_Acc:75.000%   Test_Acc:72.340%
Epochs: 1100   Loss:1.411565   Val_Acc:93.750%   Test_Acc:72.620%
Epochs: 1200   Loss:1.460059   Val_Acc:62.500%   Test_Acc:73.610%
Epochs: 1300   Loss:1.307693   Val_Acc:75.000%   Test_Acc:77.420%
Epochs: 1400   Loss:1.025935   Val_Acc:87.500%   Test_Acc:78.200%
Epochs: 15