In [33]:
import sys
import random
import numpy as np
import tensorflow as tf

In [34]:
# Data I/O
# open() opens the 'input.txt' in read mode and read() reads the data into 'data'
# set() returns unique characters by eliminating repetitions
# list() returns a list of characters
# sorted() sorts the characters in an alphabetical order
# len() gives the number of characters
# enumerate() associated a number with each character

#file = sys.argv[1]
data = open("input.txt", 'r').read()
chars = sorted(list(set(data)))
data_size, vocab_size = len(data), len(chars)
print('Data has %d characters of which %d are unique.' % (data_size, vocab_size))
# creating dictionaries with key:value as char:number and vice-versa
char_to_ix = {ch: i for i, ch in enumerate(chars)}
ix_to_char = {i: ch for i, ch in enumerate(chars)}



Data has 1115394 characters of which 65 are unique.


In [35]:
# one-hot representation returns an identity matrix of dimension v x vocab_size
# first array is the dimension of the identity matrix, Second array selects the one-hot row corresponding to each label

def one_hot(v):
    return np.eye(vocab_size)[v]

In [36]:
# Hyper-parameters
# hidden_size set to 100 specifying number of nodes in hidden layer
# seq_length specifies the windoe length while traversing the data from left to right
# window length specifies that 'short term memory' for the RNN
# Learning rate is the gradient step and is usually kept as 1e-1

hidden_size = 100  
seq_length = 25   
learning_rate = 1e-1

In [37]:
# Seed value 
# seed value is set so that random values are generated starting from the seed value everytime the model is run(to replicate)

seed_value = 42
tf.set_random_seed(seed_value)
random.seed(seed_value)


In [38]:
# Tensor placeholders
# tf.placeholder() takes the arguments shape, datatype to create tensor type placeholders for all data to be passed in the model 
# data is fed into the placeholders later
# an object called initializer is created
# tf.random_normal_initializer(stddev=0.1) generated random normalized data with stdev 0.1

inputs = tf.placeholder(shape=[None, vocab_size],dtype=tf.float32, name="inputs")
targets = tf.placeholder(shape=[None, vocab_size], dtype=tf.float32, name="targets")
init_state = tf.placeholder(shape=[1, hidden_size], dtype=tf.float32, name="state")
initializer = tf.random_normal_initializer(stddev=0.1)

In [39]:
# weights and biases initialization
# tf.variable_scope() defines the scope for the variables defined inside it
# hs_t is the hidden state and a placeholder is created
# ys is list that stores the activations of the output layer
# get_variable() retrives a variable from outside of scope to inside of scope
# weights and biases are initialzed to random normal values with stdev of 0.1
# xh-input ot hidden hh-prev hidden layer to next hy-hidden layer to output layer
# assign prev sample hidden state value as to a copy of h_prev
# sess.run() for samples
with tf.variable_scope("RNN") as scope:
    hs_t = init_state
    ys = []
    for t, xs_t in enumerate(tf.split(inputs, seq_length, axis=0)):
        if t > 0:
            scope.reuse_variables()  # Reuse variables
        Wxh = tf.get_variable("Wxh", [vocab_size, hidden_size], initializer=initializer)
        Whh = tf.get_variable("Whh", [hidden_size, hidden_size], initializer=initializer)
        Why = tf.get_variable("Why", [hidden_size, vocab_size], initializer=initializer)
        bh = tf.get_variable("bh", [hidden_size], initializer=initializer)
        by = tf.get_variable("by", [vocab_size], initializer=initializer)

        hs_t = tf.tanh(tf.matmul(xs_t, Wxh) + tf.matmul(hs_t, Whh) + bh)
        ys_t = tf.matmul(hs_t, Why) + by
        ys.append(ys_t)
        
#softmax, loss function, and optimizer
hprev = hs_t
output_softmax = tf.nn.softmax(ys[-1])  # Get softmax for sampling

outputs = tf.concat(ys, axis=0)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=targets, logits=outputs))

# Minimizer
minimizer = tf.train.AdamOptimizer()
grads_and_vars = minimizer.compute_gradients(loss)
print(grads_and_vars)

#Gradient Clipping
grad_clipping = tf.constant(5.0, name="grad_clipping")
clipped_grads_and_vars = []
for grad, var in grads_and_vars:
    clipped_grad = tf.clip_by_value(grad, -grad_clipping, grad_clipping)
    clipped_grads_and_vars.append((clipped_grad, var))

# Gradient updates
updates = minimizer.apply_gradients(clipped_grads_and_vars)

# Session
sess = tf.Session()
init = tf.global_variables_initializer()
writer=tf.summary.FileWriter("aish",sess.graph)
sess.run(init)
writer.close()

# Initial values
n, p = 0, 0
hprev_val = np.zeros([1, hidden_size])

# moving and sampling
while True:
    # Initialize
    if p + seq_length+1 >= len(data) or n == 0:
        hprev_val = np.zeros([1, hidden_size])
        p = 0  # reset

    # Prepare inputs
    input_vals = [char_to_ix[ch] for ch in data[p:p + seq_length]]
    target_vals = [char_to_ix[ch] for ch in data[p + 1:p + seq_length + 1]]

    input_vals = one_hot(input_vals)
    target_vals = one_hot(target_vals)

    hprev_val, loss_val, _ = sess.run([hprev, loss, updates],
                                      feed_dict={inputs: input_vals,targets: target_vals,init_state: hprev_val})
    if n % 500 == 0:
        # Progress
        print('iter: %d, p: %d, loss: %f' % (n, p, loss_val))

        # Do sampling
        sample_length = 200
        start_ix = random.randint(0, len(data) - seq_length)
        sample_seq_ix = [char_to_ix[ch] for ch in data[start_ix:start_ix + seq_length]]
        ixes = []
        sample_prev_state_val = np.copy(hprev_val)

        for t in range(sample_length):
            sample_input_vals = one_hot(sample_seq_ix)
            sample_output_softmax_val, sample_prev_state_val = \
                sess.run([output_softmax, hprev],
                         feed_dict={inputs: sample_input_vals,init_state: sample_prev_state_val})

            ix = np.random.choice(range(vocab_size), p=sample_output_softmax_val.ravel())
            ixes.append(ix)
            sample_seq_ix = sample_seq_ix[1:] + [ix]

        txt = ''.join(ix_to_char[ix] for ix in ixes)
        print('----\n %s \n----\n' % (txt,))

    p += seq_length
    n += 1

ValueError: Variable RNN/Wxh already exists, disallowed. Did you mean to set reuse=True or reuse=tf.AUTO_REUSE in VarScope? Originally defined at:

  File "<ipython-input-8-27c3307a23ee>", line 9, in <module>
    "Wxh", [vocab_size, hidden_size], initializer=initializer)
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2961, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2901, in run_ast_nodes
    if self.run_code(code, result):


IndexError: list index out of range

NameError: name 'p' is not defined