# RNNs in Tensorflow #

In [5]:
# import libraries
import tensorflow as tf
import numpy as np
import pandas as pd
import string

In [108]:
# load the data
df = pd.read_csv('songdata.csv')
df['text'] = 'trats ' + df['text'] + ' dne' # start ("trats") and end ("dne") tokens
data = df['text'].str.cat(sep=' ').lower() # lowercase all strings 
data = ' '.join(word.strip(string.punctuation) for word in data.split()) # remove punctuation

In [109]:
# load a bit of the data
from collections import Counter
n_chars = 102
counts = Counter(data[0:n_chars].split(' ')) # using only the first 1000 characters in the string of lyrics]
count_pairs = sorted(counts.items(), key=lambda x: (-x[1], x[0])) # sort first by incidence, then by alpha numeric key
words, word_count = list(zip(*count_pairs))
word_to_id = dict(zip(words, range(len(words)))) # get an ID for each word
id_to_word = dict(zip(range(len(words)), words)) # get a word for each id

In [113]:
all_data_word = data[0:n_chars].split(' ') # using only the first 1000 characters in the string of lyrics
all_data_id = [word_to_id[word] for word in all_data_word]

In [105]:
# into a sparse matrix
V = len(word_to_id)
N = len(all_data_id)
x_tr = np.zeros((N,V))
x_tr[range(N),all_data_id] = 1

y_tr = np.zeros((N,V)).astype(int)
y_tr[range(N),all_data_id] = 1

# initializing NN parameters
H = 10
h_0 = np.zeros((1,H)).astype(float)

In [107]:
# Tensorflow 
lr = 0.01

X = tf.placeholder("float", [N, V])
Y = tf.placeholder("int32", [N, V])
h_zero = tf.placeholder("float", [1, H])

W_i = tf.Variable(np.random.rand(V, H), dtype=tf.float32)
b_i = tf.Variable(np.zeros((1,H)), dtype=tf.float32)

W_h = tf.Variable(np.random.rand(H,H), dtype=tf.float32)

W_o = tf.Variable(np.random.rand(H, V),dtype=tf.float32)
b_o = tf.Variable(np.zeros((1,V)), dtype=tf.float32)

X_ts = tf.unstack(X, axis = 0)
Y_ts = tf.unstack(Y, axis = 0)
Y_ts = [tf.reshape(y, [1,V]) for y in Y_ts]

# Forward pass
all_h = []
h_prev = h_zero
for x_t in X_ts:
    x_t = tf.reshape(x_t, [1, V])
    h_t = tf.nn.tanh(tf.matmul(x_t,W_i) + b_i + tf.matmul(h_prev,W_h))

    all_h.append(h_t)
    h_prev = h_t

all_scores = [tf.matmul(h_t, W_o) + b_o for h_t in all_h] #Broadcasted addition
all_y_hat = [tf.nn.softmax(scores) for scores in all_scores]

all_losses = [tf.nn.softmax_cross_entropy_with_logits(logits, labels) for logits, labels in zip(all_y_hat,Y_ts)]
total_loss = tf.reduce_mean(all_losses)

GD_step = tf.train.GradientDescentOptimizer(lr).minimize(total_loss)

# everything we have done so far has set up the tensorflow graph, but will not make the neural
# network learn. For learning to take place, we need to initialize a tensorflow session and initialize 
# all the variables
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)

# you can loop over this to train over more than one epoch.
nepochs = 1
for i in xrange(nepochs):
    
    # gradient descent    
    sess.run(GD_step, feed_dict={X: x_tr, Y: y_tr, h_zero: h_0})

# If you want to obtain the accuracy of the network on the training set:
final_loss = sess.run(total_loss, feed_dict={X: x_tr, Y: y_tr, h_zero: h_0})
print "The final training loss is: ", final_loss

sess.close()              

The final training loss is:  2.88558
