## SIMPLE CHAR-RNN 

In [1]:
from __future__ import print_function
import tensorflow as tf
import numpy as np
from tensorflow.contrib import rnn
tf.set_random_seed(0)  
print ("TENSORFLOW VERSION IS %s" % (tf.__version__))

TENSORFLOW VERSION IS 1.3.0


## DEFINE TRAINING SEQUENCE

In [2]:
quote1 = ("If you want to build a ship, "
          "don't drum up people to collect wood and don't assign them tasks and work,"
          " but rather teach them to long for the endless immensity of the sea.")
quote2 = ("Perfection is achieved, "
          "not when there is nothing more to add, "
          "but when there is nothing left to take away.")
sentence = quote2
print ("FOLLOWING IS OUR TRAINING SEQUENCE:")
print (sentence)

FOLLOWING IS OUR TRAINING SEQUENCE:
Perfection is achieved, not when there is nothing more to add, but when there is nothing left to take away.


## DEFINE VOCABULARY AND DICTIONARY

In [3]:
char_set = list(set(sentence))
char_dic = {w: i for i, w in enumerate(char_set)}
print ("VOCABULARY: ")
print (char_set)
print ("DICTIONARY: ")
print (char_dic)

VOCABULARY: 
['c', 'g', 'l', 'w', 'k', 'n', 'y', 'v', 'u', 'b', 'm', '.', ' ', 'f', 't', 'd', 'e', 'i', ',', 'o', 's', 'r', 'h', 'P', 'a']
DICTIONARY: 
{'c': 0, 'g': 1, 'l': 2, 'w': 3, 'k': 4, 'n': 5, 'y': 6, 'v': 7, 'u': 8, 'b': 9, 'm': 10, '.': 11, ' ': 12, 'f': 13, 't': 14, 'd': 15, 'e': 16, 'i': 17, ',': 18, 'o': 19, 's': 20, 'r': 21, 'h': 22, 'P': 23, 'a': 24}


VOCAB: NUMBER => CHAR / DICTIONARY: CHAR => NUMBER

## CONFIGURE NETWORK

In [4]:
data_dim        = len(char_set)
num_classes     = len(char_set)
hidden_size     = 64
sequence_length = 10  # Any arbitrary number 
print ("DATA_DIM IS [%d]" % (data_dim))

DATA_DIM IS [25]


## SET TRAINING BATCHES

In [5]:
def print_np(_name, _x):
    print("TYPE  OF [%s] is [%s]" % (_name, type(_x)))
    print("SHAPE OF [%s] is %s" % (_name, _x.shape,))
def print_list(_name, _x):
    print("TYPE   OF [%s] is [%s]" % (_name, type(_x)))
    print("LENGTH OF [%s] is %s" % (_name, len(_x)))
    print("%s[0] LOOKS LIKE %s" % (_name, _x[0]))

In [6]:
dataX = []
dataY = []
for i in range(0, len(sentence) - sequence_length):
    x_str = sentence[i:i + sequence_length]
    y_str = sentence[i + 1: i + sequence_length + 1]
    x = [char_dic[c] for c in x_str]  # x str to index
    y = [char_dic[c] for c in y_str]  # y str to index
    dataX.append(x)
    dataY.append(y)
    if i < 5:
        print ("[%4d/%4d] [%s]=>[%s]" % (i, len(sentence), x_str, y_str))
        print ("%s%s => %s" % (' '*12, x, y))
print_list('dataX', dataX)
print_list('dataY', dataY)

[   0/ 107] [Perfection]=>[erfection ]
            [23, 16, 21, 13, 16, 0, 14, 17, 19, 5] => [16, 21, 13, 16, 0, 14, 17, 19, 5, 12]
[   1/ 107] [erfection ]=>[rfection i]
            [16, 21, 13, 16, 0, 14, 17, 19, 5, 12] => [21, 13, 16, 0, 14, 17, 19, 5, 12, 17]
[   2/ 107] [rfection i]=>[fection is]
            [21, 13, 16, 0, 14, 17, 19, 5, 12, 17] => [13, 16, 0, 14, 17, 19, 5, 12, 17, 20]
[   3/ 107] [fection is]=>[ection is ]
            [13, 16, 0, 14, 17, 19, 5, 12, 17, 20] => [16, 0, 14, 17, 19, 5, 12, 17, 20, 12]
[   4/ 107] [ection is ]=>[ction is a]
            [16, 0, 14, 17, 19, 5, 12, 17, 20, 12] => [0, 14, 17, 19, 5, 12, 17, 20, 12, 24]
TYPE   OF [dataX] is [<class 'list'>]
LENGTH OF [dataX] is 97
dataX[0] LOOKS LIKE [23, 16, 21, 13, 16, 0, 14, 17, 19, 5]
TYPE   OF [dataY] is [<class 'list'>]
LENGTH OF [dataY] is 97
dataY[0] LOOKS LIKE [16, 21, 13, 16, 0, 14, 17, 19, 5, 12]


In [7]:
ndata      = len(dataX)
batch_size = 512
print ("     'NDATA' IS %d" % (ndata))
print ("'BATCH_SIZE' IS %d" % (batch_size))

     'NDATA' IS 97
'BATCH_SIZE' IS 512


## DEFINE PLACEHOLDERS

In [8]:
X = tf.placeholder(tf.int32, [None, sequence_length])
X_OH = tf.one_hot(X, num_classes)
Y = tf.placeholder(tf.int32, [None, sequence_length])
print ("'sequence_length' IS [%d]" % (sequence_length))
print ("    'num_classes' IS [%d]" % (num_classes))
print("'X' LOOKS LIKE \n   [%s]" % (X))  
print("'X_OH' LOOKS LIKE \n   [%s]" % (X_OH))
print("'Y' LOOKS LIKE \n   [%s]" % (Y))

'sequence_length' IS [10]
    'num_classes' IS [25]
'X' LOOKS LIKE 
   [Tensor("Placeholder:0", shape=(?, 10), dtype=int32)]
'X_OH' LOOKS LIKE 
   [Tensor("one_hot:0", shape=(?, 10, 25), dtype=float32)]
'Y' LOOKS LIKE 
   [Tensor("Placeholder_1:0", shape=(?, 10), dtype=int32)]


## DEFINE MODEL

In [9]:
with tf.variable_scope('CHAR-RNN', reuse=False):
    cell = rnn.BasicLSTMCell(hidden_size, state_is_tuple=True, reuse=False)
    # cell = rnn.MultiRNNCell([cell]*2, state_is_tuple=True) # BUG IN TF1.1..
    # DYNAMIC RNN WITH FULLY CONNECTED LAYER
    _hiddens  = tf.contrib.layers.fully_connected(X_OH, hidden_size, activation_fn=tf.nn.relu)
    _rnnouts, _states = tf.nn.dynamic_rnn(cell, _hiddens, dtype=tf.float32)
    _denseouts = tf.contrib.layers.fully_connected(_rnnouts, num_classes, activation_fn=None)
    # RESHAPE FOR SEQUNCE LOSS
    outputs = tf.reshape(_denseouts, [batch_size, sequence_length, num_classes])
    
print ("_hiddens   LOOKS LIKE [%s]" % (_hiddens))
print ("_rnnouts   LOOKS LIKE [%s]" % (_rnnouts))
print ("_denseouts LOOKS LIKE [%s]" % (_denseouts))
print ("outputs    LOOKS LIKE [%s]" % (outputs))
print ("MODEL DEFINED.")

_hiddens   LOOKS LIKE [Tensor("CHAR-RNN/fully_connected/Relu:0", shape=(?, 10, 64), dtype=float32)]
_rnnouts   LOOKS LIKE [Tensor("CHAR-RNN/rnn/transpose:0", shape=(?, 10, 64), dtype=float32)]
_denseouts LOOKS LIKE [Tensor("CHAR-RNN/fully_connected_1/BiasAdd:0", shape=(?, 10, 25), dtype=float32)]
outputs    LOOKS LIKE [Tensor("CHAR-RNN/Reshape:0", shape=(512, 10, 25), dtype=float32)]
MODEL DEFINED.


## DEFINE TF FUNCTIONS

In [10]:
weights  = tf.ones([batch_size, sequence_length]) # EQUAL WEIGHTS
seq_loss = tf.contrib.seq2seq.sequence_loss(
    logits=outputs, targets=Y, weights=weights) # THIS IS A CLASSIFICATION LOSS
print ("weights LOOKS LIKE [%s]" % (weights))
print ("outputs LOOKS LIKE [%s]" % (outputs))
print ("Y       LOOKS LIKE [%s]" % (Y))

weights LOOKS LIKE [Tensor("ones:0", shape=(512, 10), dtype=float32)]
outputs LOOKS LIKE [Tensor("CHAR-RNN/Reshape:0", shape=(512, 10, 25), dtype=float32)]
Y       LOOKS LIKE [Tensor("Placeholder_1:0", shape=(?, 10), dtype=int32)]


In [11]:
loss  = tf.reduce_mean(seq_loss)
optm  = tf.train.AdamOptimizer(learning_rate=0.01).minimize(loss)
print ("FUNCTIONS DEFINED.")

FUNCTIONS DEFINED.


## OPTIMIZE

In [12]:
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
sess = tf.Session(config=config)
sess.run(tf.global_variables_initializer())
MAXITER = 2000
for i in range(MAXITER):
    randidx = np.random.randint(low=0, high=ndata, size=batch_size)
    batchX = [dataX[iii] for iii in randidx]
    batchY = [dataY[iii] for iii in randidx]
    feeds = {X: batchX, Y: batchY}
    _, loss_val, results = sess.run(
        [optm, loss, outputs], feed_dict=feeds)
    if (i%200) == 0:
        print ("[%5d/%d] loss_val: %.5f " % (i, MAXITER, loss_val))

[    0/2000] loss_val: 3.22481 
[  200/2000] loss_val: 0.17731 
[  400/2000] loss_val: 0.17315 
[  600/2000] loss_val: 0.17406 
[  800/2000] loss_val: 0.16724 
[ 1000/2000] loss_val: 0.17626 
[ 1200/2000] loss_val: 0.17269 
[ 1400/2000] loss_val: 0.17289 
[ 1600/2000] loss_val: 0.16823 
[ 1800/2000] loss_val: 0.17249 


#### BATCH LOOKS LIKE

In [13]:
print ("LENGTH OF BATCHX IS %d" % (len(batchX)))
print ("batchX[0] looks like %s" % (batchX[0]))
print ("LENGTH OF BATCHY IS %d" % (len(batchY)))
print ("batchY[0] looks like %s" % (batchY[0]))

LENGTH OF BATCHX IS 512
batchX[0] looks like [17, 5, 1, 12, 10, 19, 21, 16, 12, 14]
LENGTH OF BATCHY IS 512
batchY[0] looks like [5, 1, 12, 10, 19, 21, 16, 12, 14, 19]


## PRINT CHARS

In [14]:
randidx = np.random.randint(low=0, high=ndata, size=batch_size)
batchX = [dataX[iii] for iii in randidx]
batchY = [dataY[iii] for iii in randidx]
feeds = {X: batchX}
results = sess.run(outputs, feed_dict=feeds)
for j, result in enumerate(results):
    index = np.argmax(result, axis=1)
    chars = [char_set[t] for t in index]
    if j < 10:
        print ("OUT OF BATCHX:   %s => %s" % (index, chars))
        print ("BATCHY (TARGET): %s\n" % (batchY[j]))

OUT OF BATCHX:   [20  1 12 10 16 13 14 12 14 19] => ['s', 'g', ' ', 'm', 'e', 'f', 't', ' ', 't', 'o']
BATCHY (TARGET): [5, 1, 12, 2, 16, 13, 14, 12, 14, 19]

OUT OF BATCHX:   [14 22 17  5  1 12 10 19 21 16] => ['t', 'h', 'i', 'n', 'g', ' ', 'm', 'o', 'r', 'e']
BATCHY (TARGET): [14, 22, 17, 5, 1, 12, 10, 19, 21, 16]

OUT OF BATCHX:   [22 16 21 16 12 17 20 12  5 19] => ['h', 'e', 'r', 'e', ' ', 'i', 's', ' ', 'n', 'o']
BATCHY (TARGET): [22, 16, 21, 16, 12, 17, 20, 12, 5, 19]

OUT OF BATCHX:   [14 17 19  5 12 17 20 12 24  0] => ['t', 'i', 'o', 'n', ' ', 'i', 's', ' ', 'a', 'c']
BATCHY (TARGET): [14, 17, 19, 5, 12, 17, 20, 12, 24, 0]

OUT OF BATCHX:   [12 17 20 12  5 19 14 22 17  5] => [' ', 'i', 's', ' ', 'n', 'o', 't', 'h', 'i', 'n']
BATCHY (TARGET): [12, 17, 20, 12, 5, 19, 14, 22, 17, 5]

OUT OF BATCHX:   [14 19 14 22 17  5  1 12 10 19] => ['t', 'o', 't', 'h', 'i', 'n', 'g', ' ', 'm', 'o']
BATCHY (TARGET): [5, 19, 14, 22, 17, 5, 1, 12, 10, 19]

OUT OF BATCHX:   [12  5  8 14 12  3 22 16

### SAMPLING FUNCTION 

In [15]:
LEN = 1 # <= LENGHT IS 1 !!
# XL = tf.placeholder(tf.int32, [None, LEN])
XL     = tf.placeholder(tf.int32, [None, 1])
XL_OH  = tf.one_hot(XL, num_classes)
with tf.variable_scope('CHAR-RNN', reuse=True):
    cell_L = rnn.BasicLSTMCell(hidden_size, state_is_tuple=True, reuse=True)
    # cell_L = rnn.MultiRNNCell([cell_L] * 2, state_is_tuple=True) # BUG IN TF1.1
    istate = cell_L.zero_state(batch_size=1, dtype=tf.float32)
    # DYNAMIC RNN WITH FULLY CONNECTED LAYER
    _hiddens  = tf.contrib.layers.fully_connected(XL_OH, hidden_size, activation_fn=tf.nn.relu)
    _outputs_L, states_L = tf.nn.dynamic_rnn(cell_L, _hiddens
                                , initial_state=istate, dtype=tf.float32)
    _outputs_L  = tf.contrib.layers.fully_connected(
        _outputs_L, num_classes, activation_fn=None)
    # RESHAPE FOR SEQUNCE LOSS
    outputs_L = tf.reshape(_outputs_L, [LEN, 1, num_classes])
print ("XL    LOOKS LIKE %s" % (XL))
print ("XL_OH LOOKS LIKE %s" % (XL_OH))

XL    LOOKS LIKE Tensor("Placeholder_2:0", shape=(?, 1), dtype=int32)
XL_OH LOOKS LIKE Tensor("one_hot_1:0", shape=(?, 1, 25), dtype=float32)


#### HELPER FUNCTION

In [16]:
def weighted_pick(weights):
    t = np.cumsum(weights)
    s = np.sum(weights)
    return(int(np.searchsorted(t, np.random.rand(1)*s)))
def softmax(x):
    alpha = 1
    e_x = np.exp(alpha*(x - np.max(x)))
    return e_x / np.sum(e_x) # only difference

## SAMPLE

### BURNIN

In [17]:
prime = "Perfection is"
istateval = sess.run(cell_L.zero_state(1, tf.float32))
for i, c in enumerate(prime[:-1]):
    index = char_dic[c]
    inval = [[index]]
    outval, stateval = sess.run([outputs_L, states_L]
                        , feed_dict={XL:inval, istate:istateval})
    istateval = stateval # UPDATE STATE MANUALLY!!
    if i < 3:
        print ("[%d] -char:  %s \n    -inval: %s \n    -outval: %s " 
               % (i, c, inval, outval))

[0] -char:  P 
    -inval: [[23]] 
    -outval: [[[ -8.083251    -2.7365427   -0.01721089  -2.7928133   -5.45727015
    -1.01345122  -6.01259851  -7.22651958  -3.13926625  -4.29166126
     0.14808631  -3.7139504   -2.8849721    0.78088862   2.98004508
    -1.75051367  12.91055012   4.83504868   1.53668344   3.67999387
    -2.49500585   2.41470742   0.64288658  -3.25742102  -2.91963959]]] 
[1] -char:  e 
    -inval: [[16]] 
    -outval: [[[  5.01418829  -4.0058651    0.82219779  -3.44058609  -1.45845723
     5.68695116   0.26826409   4.01365089   0.33876163  -2.19759774
    -0.4963997   -1.81084132  -1.04217005   6.27676249   0.88501585
     4.29535723   1.94310606  -4.77040052   1.35450602  -3.10625553
    -3.33001232  13.49173737  -5.49444437  -0.9884336   -5.6234827 ]]] 
[2] -char:  r 
    -inval: [[21]] 
    -outval: [[[  1.61779118  -5.15967894  -2.1306777   -5.99924803  -3.75904703
    -2.36696339  -5.14207602   1.05601883  -0.43575048  -8.57248592
    -2.89001083  -1.14938211   2

### SAMPLE

In [18]:
inval  = [[char_dic[prime[-1]]]]
outval, stateval = sess.run([outputs_L, states_L]
                    , feed_dict={XL:inval, istate:istateval})
istateval = stateval
index = np.argmax(outval)
char  = char_set[index]
chars = char
for i in range(100):
    inval = [[index]]
    outval, stateval = sess.run([outputs_L, states_L]
                        , feed_dict={XL:inval, istate:istateval})
    istateval = stateval
    # index = np.argmax(outval)
    index = weighted_pick(softmax(outval))
    char  = char_set[index]
    chars += char
    if i < 5:
        print ("[%d] \n -inval: %s \n -outval: %s \n -index: %d (char: %s) \n -chars: %s" 
               % (i, inval, outval, index, char, chars))

[0] 
 -inval: [[12]] 
 -outval: [[[  0.67843425  -2.08114338  -0.32877958  -0.71623373  -6.46169662
     7.21603251  -2.14627552  -8.24349689  -4.73580694   3.33558416
     0.40434808  -1.73716736   5.20630836  -9.15944004   1.35718107
    -1.93660891  -5.90542173   5.69831371  -0.3494606   -1.54447627
     1.23852599  -7.73629093   1.7253958   -4.76701975  15.29266071]]] 
 -index: 24 (char: a) 
 -chars:  a
[1] 
 -inval: [[24]] 
 -outval: [[[ 16.70030785  -5.62605572  -2.07418704   2.41097379  -3.50659609
     2.73988485   1.72936344  -5.1719346    0.9869777   -0.10085863
    -2.83843112  -1.2794956    0.10418161  -6.61042643   0.0844142
     6.4254899   -9.78278923  -2.84867382  -1.83677375   1.13077807
     5.24185658  -2.07052469   3.12206078  -0.57869923   4.67526865]]] 
 -index: 0 (char: c) 
 -chars:  ac
[2] 
 -inval: [[0]] 
 -outval: [[[ -0.03909219  -4.75933599  -3.95792174  -3.53075409  -7.28938055
    -1.10193872  -5.83994722 -12.79705906   0.78332967  -0.74953425
    -4.15386

### SAMPLED SENTENCE

In [19]:
print ("<SAMPLED SETENCE> \n %s" % (prime+chars))
print ("\n<ORIGINAL SENTENCE> \n %s" % (sentence))

<SAMPLED SETENCE> 
 Perfection is achieved, not when there is nothing more to add, but when there is nothing more to add, but when the

<ORIGINAL SENTENCE> 
 Perfection is achieved, not when there is nothing more to add, but when there is nothing left to take away.


In [None]:
test complete; Gopal