In [1]:
# Imports
from __future__ import print_function
import numpy as np
import tensorflow as tf
from tensorflow.contrib import rnn
import string

  from ._conv import register_converters as _register_converters


In [2]:
# Integer to Binary Generator
def getKary(x, b, padding):
    assert(x >= 0)
    assert(1< b < 37)
    # Edge case
    if x == 0:
        return [0] * (padding)
    else:
        r = ''
        pad = []
        while x > 0:
            r = string.printable[x % b] + r
            x //= b
            converted = list(map(int, list(r)))
            pad = [0] * (padding - len(converted))
            pad.extend(converted)
        return pad
    
# Test case
number = 10
base = 3
padding = 6
print(getKary(number, base, padding))

[0, 0, 0, 1, 0, 1]


In [3]:
# Dataset Creation
def create_data(num_samples, base, padding):
    np.random.seed(1)
    x = np.zeros(num_samples)
    y = np.zeros((num_samples, padding))
    max_val = base ** padding - 1
    for i in range(num_samples):
        number = np.random.randint(0, max_val)
        x[i] = int(number)
        y[i] = getKary(number, base, padding) 
    return x, y

# Test Case
num_samples = 5
base = 3
padding = 6
X, y = create_data(num_samples, base, padding)
for i in range(X.shape[0]):
    print(X[i], '\t ', y[i])

37.0 	  [0. 0. 1. 1. 0. 1.]
235.0 	  [0. 2. 2. 2. 0. 1.]
72.0 	  [0. 0. 2. 2. 0. 0.]
715.0 	  [2. 2. 2. 1. 1. 1.]
645.0 	  [2. 1. 2. 2. 2. 0.]


In [4]:
# TF Model Parameters
padding = 4
base = 3
training_samples = 1000  # Need more samples to train on
testing_samples = 20
# Need to use adaptive LR, as the tr loss starts to diverge going forward if not used
global_step = tf.Variable(0, trainable=False)
starter_learning_rate = 0.01
learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
                                           10000, 0.96, staircase=True)
training_steps = 100000  # Training longer seems to do the trick + More training samples + Multi-LSTM
display_steps = 5000
n_input = 1
n_hidden_units = 32  # 32 BEST # Need more hidden units as compared to Binary to Int model
n_output = padding
timestep = 1

In [5]:
# Generate Training and Testing Data
X_train, y_train = create_data(training_samples, base, padding)
X_test, y_test = create_data(testing_samples, base, padding)

# Print data
display = 5
for i in range(display):
    print(X_train[i], '\t', y_train[i], "\n")

37.0 	 [1. 1. 0. 1.] 

12.0 	 [0. 1. 1. 0.] 

72.0 	 [2. 2. 0. 0.] 

9.0 	 [0. 1. 0. 0.] 

75.0 	 [2. 2. 1. 0.] 



In [6]:
# TF Model and intializations
X = tf.placeholder(tf.float32, [None, timestep, n_input])
y = tf.placeholder(tf.float32, [None, n_output])
W = tf.Variable(tf.random_normal([n_hidden_units, n_output]))
b = tf.Variable(tf.random_normal([n_output]))

def model(X, W, b, timestep, n_hidden_units):
    num_layers = 3  # 3 BEST
    dropout = tf.constant(1)  #0.7 BEST
    X = tf.unstack(X, timestep, 1)
    cell = tf.nn.rnn_cell.LSTMCell(n_hidden_units, state_is_tuple=True)
    cell = tf.nn.rnn_cell.DropoutWrapper(cell, input_keep_prob=dropout, output_keep_prob=dropout)
    cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers, state_is_tuple=True)
    lstm_cell = rnn.MultiRNNCell([rnn.BasicLSTMCell(n_hidden_units),rnn.BasicLSTMCell(n_hidden_units)])  # Additional Layers Required
    # lstm_cell = rnn.BasicLSTMCell(n_hidden_units, forget_bias=1.0)
    outputs, states = rnn.static_rnn(lstm_cell, X, dtype=tf.float32)
    logits = tf.matmul(outputs[-1], W) + b
    return logits

logits = model(X, W, b, timestep, n_hidden_units)
loss = tf.reduce_mean(tf.losses.mean_squared_error(logits, y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)  # Adam works better as compared to RMSProp
training = optimizer.minimize(loss, global_step=global_step)

In [7]:
# Reshape data
X_train = np.reshape(X_train, [-1, timestep, n_input])
y_train = np.reshape(y_train, [-1, n_output])

X_test = np.reshape(X_test, [-1, timestep, n_input])
y_test = np.reshape(y_test, [-1, n_output])

# Print data
display = 5
for i in range(display):
    print(X_train[i], '\t', y_train[i])

[[37.]] 	 [1. 1. 0. 1.]
[[12.]] 	 [0. 1. 1. 0.]
[[72.]] 	 [2. 2. 0. 0.]
[[9.]] 	 [0. 1. 0. 0.]
[[75.]] 	 [2. 2. 1. 0.]


In [10]:
# Run TF
tf.set_random_seed(0)
with tf.Session() as sess:
    tf.global_variables_initializer().run()
    for step in range(training_steps):
        _, loss_out = sess.run([training, loss], feed_dict={X: X_train, y:y_train})
        if step % display_steps == 0:
            print("Loss {} at timestep {}" .format(loss_out, step))
            out = sess.run(logits, feed_dict={X: X_test})

Loss 2.57352161407 at timestep 0
Loss 0.132245913148 at timestep 5000
Loss 0.108583122492 at timestep 10000
Loss 0.12410672754 at timestep 15000
Loss 0.0780641362071 at timestep 20000
Loss 0.0954181924462 at timestep 25000
Loss 0.0461850613356 at timestep 30000
Loss 0.0826843380928 at timestep 35000
Loss 0.205419555306 at timestep 40000
Loss 0.0687571316957 at timestep 45000
Loss 0.0745244473219 at timestep 50000
Loss 0.089118257165 at timestep 55000
Loss 0.0820879340172 at timestep 60000
Loss 0.0714473351836 at timestep 65000
Loss 0.124975688756 at timestep 70000
Loss 0.0915157422423 at timestep 75000
Loss 0.0879760980606 at timestep 80000
Loss 0.0821021646261 at timestep 85000
Loss 0.0857841819525 at timestep 90000
Loss 0.0788920074701 at timestep 95000


In [12]:
# Evaluation Metric
out = np.abs(np.round(out))
plot =  True
if plot is True:
    print("Ground Truth \t Predicted")
    disp = 20
    rdm = np.random.randint(0, y_test.shape[0], disp)
    for i in rdm:
        print(y_test[i], "->", out[i])
acc = out == y_test
acc = acc.sum(axis=1) == padding
acc = acc.sum()/float(len(y_test))
print("Accuracy is {} \n" .format(acc))

Ground Truth 	 Predicted
[2. 2. 1. 1.] -> [2. 2. 1. 1.]
[0. 1. 0. 0.] -> [0. 1. 0. 0.]
[2. 1. 2. 2.] -> [2. 1. 2. 2.]
[1. 0. 0. 1.] -> [1. 0. 0. 1.]
[2. 2. 0. 0.] -> [2. 2. 0. 0.]
[1. 1. 0. 1.] -> [1. 1. 0. 1.]
[1. 1. 0. 1.] -> [1. 1. 0. 1.]
[2. 2. 1. 0.] -> [2. 2. 1. 0.]
[0. 0. 1. 2.] -> [0. 0. 1. 2.]
[2. 2. 2. 1.] -> [2. 2. 2. 1.]
[0. 1. 2. 1.] -> [0. 1. 2. 1.]
[0. 1. 0. 2.] -> [0. 1. 0. 2.]
[0. 2. 0. 2.] -> [0. 2. 0. 2.]
[2. 2. 1. 0.] -> [2. 2. 1. 0.]
[0. 0. 0. 1.] -> [0. 0. 0. 1.]
[2. 2. 1. 1.] -> [2. 2. 1. 1.]
[0. 1. 1. 0.] -> [0. 1. 1. 0.]
[0. 1. 1. 0.] -> [0. 1. 1. 0.]
[2. 1. 0. 1.] -> [2. 1. 0. 1.]
[0. 0. 0. 1.] -> [0. 0. 0. 1.]
Accuracy is 0.95 

