# Charecter RNN
This is my implementation of the Charecter RNN using LSTM and GRU.

Example adopted from [TensorFlow Examples](https://github.com/aymericdamien/TensorFlow-Examples/blob/master/notebooks/3_NeuralNetworks/recurrent_network.ipynb)

Training text is a free eBook from [Project Gutenberg](https://www.gutenberg.org/wiki/Main_Page). The book used is [Frankenstein](https://www.gutenberg.org/ebooks/84)


Chen Chen

12/15/2016

In [1]:
# Import TensorFlow Module
import tensorflow as tf

# Import RNN modules
from tensorflow.python.ops import rnn, rnn_cell

# and numpy for math
import numpy as np

### Section 1 - Feature Representation
The choice of how to represent features absolutely critical for the performance. Here I adopted a popular and intuitive choice which simply use a unique integer number to represent a charecter.

Given a numerical charecter mapping: 

<center>$[a, b, c, d, ...] \rightarrow [0, 1, 2, 3, ...]$</center>

any input text block can be represented with a unique sequence of integers:

<center>$["bed"] \rightarrow [1, 4, 3]$</center>

The mapping above is pretty intuitive and easy to implement however it's not really good for LSTM network that's using SoftMax and Sigmoidal functions. Since  what we have is basically mapping data from charecters to its **labels**, a numerical encoding of the label isn't really great because it's **continuous**. Therefore, people usually use the so-called **One Hot Encoding** which project the numerical labels into binary:

*For example, gven charecter 'e' with a numerical value 4 to represent it's label:*

<center>$['e'] \rightarrow [4] \rightarrow [0, 0, 0, 0, 1, 0, ...]'$</center>

where the total length of the feature array **$n$**  is the length of the chosen dictionary

In [2]:
# Load training text
filename = "./Data/Exp_2_2.txt"
rawText = open(filename).read()

# Convert all text to lowercase
rawText = rawText.lower()

# Create a unique mapping from chars to integers
allChars = sorted(list(set(rawText)))
charDict = dict( (char, i) for i, char in enumerate(allChars) )

# Summarize
nText = len(rawText)   # Total number of charecters in the input text
nChars = len(allChars) # Total number of unique charecters in the dictionary

print("Input text has {0} charecters\nDictionary size: {1}".format(nText, nChars))

Input text has 421503 charecters
Dictionary size: 52


In [3]:
# Here is an example of the One Hot Encoding
# the input array is [0,1,2,3,4] with 5 different labels
onehot = tf.one_hot([0,1,2,3,4], 5, 1, 0)

with tf.Session() as sess:
    print(sess.run(onehot))

[[1 0 0 0 0]
 [0 1 0 0 0]
 [0 0 1 0 0]
 [0 0 0 1 0]
 [0 0 0 0 1]]


In [4]:
# Break input text into individual training blocks of fixed length
# Note that this is a sliding window sampling with a window
# width of sampleLen and offset of 1
sampleLen = 200
dataIn  = []
dataOut = []
samplesToTake = nText-sampleLen  # the actual end of the range
for i in range(0, 100, 1):
    sampleIn  = rawText[i : i + sampleLen]
    sampleOut = rawText[i + sampleLen]
    dataIn.append( [charDict[char] for char in sampleIn] )
    dataOut.append( charDict[sampleOut] )

# Total number of text samples
nSamples = len(dataIn)
print("Total number of input text samples: {0}".format(nSamples))

Total number of input text samples: 100


In [5]:
# Convert the text input to One Hot Encoding

# Reshape the sampled text sequences
dataX = np.reshape(dataIn, (nSamples, sampleLen))

# Normalize the numeric labels (0, 1)
dataX = dataX / float(nChars)

# Convert to One Hot Encoding
dataY = tf.one_hot(dataOut, nChars, 1, 0)

In [12]:
# Parameters for RNN

# Control Parameters
maxIter = 1e3
batchSize = 10
printStep = 10

# RNN Parameters
learnRate = 0.001  # Learning rate
nHidden = 64      # Number of Hidden Units
nClasses = nChars  # Total number of classes. We are predicting single digits from 0-9

# TensorFlow Graph Input
x = tf.placeholder(tf.float32, [batchSize, sampleLen], name="netInput")
y = tf.placeholder(tf.float32, [None, nClasses], name="netOutput")

# RNN Weight and Bias Matrix
weight = {
    'out': tf.Variable(tf.random_normal([nHidden, nClasses]))
}
bias = {
    'out': tf.Variable(tf.random_normal([nClasses]))
}

In [15]:
def RNN(x, weight, bias):
    # Preprocess data to tensors
    # Raw data shape: (batchSize, nSamples, sampleLen)
    # Tensor shape  : list of nSamples tensors each with a shape of (batchSize, sampleLen)
    
    # Permuting batchSize and nRows
    #  Variable:   x[batchSize, nSamples, sampleLen]  =>  x[ nSamples, batchSize, sampleLen]
    # Dimension:   x[   0     ,     1   ,     2    ]  =>  x[    1    ,     0    ,     2    ]
    #x = tf.transpose(x, [1, 0, 2])
    # Reshape x to 2D => [batchSize * nSamples, sampleLen]
    #x = tf.reshape(x, [-1, sampleLen])
    # Split the dimension to get a list of nSamples tensors of shape [batchSize, sampleLen]
    x = tf.split(1, sampleLen, x)
    
    # Define a LSTM cell
    lstmCell = rnn_cell.BasicLSTMCell(nHidden, forget_bias=1.0)
    
    # Get LSTM cell output
    outputs, states = rnn.rnn(lstmCell, x, dtype=tf.float32)
    
    # Linear activation function
    return tf.matmul(outputs[-1], weight['out']) + bias['out']

# Use the function defined above to convert data into list of tensors
# And use LSTM RNN to predict the output
predY = RNN(x, weight, bias)

# Define cost function and optimizer
costFun = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(predY, y))
optimizerFun = tf.train.AdamOptimizer(learning_rate=learnRate).minimize(costFun)

# Evaluate Model
predResult = tf.equal(tf.argmax(predY, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(predResult, tf.float32))

# Initializing all variables
init = tf.global_variables_initializer()

In [19]:
# Launch the graph
with tf.Session() as sess:
    sess.run(init)
    step = 1
    # Keep training until reach max iterations
    while step * batchSize < maxIter:
        #
        batchX = dataX[(step-1)*batchSize : step*batchSize, :]
        batchY = dataY[(step-1)*batchSize : step*batchSize, :]
        
        # Run optimization op (backprop)
        sess.run(optimizerFun, feed_dict={x: batchX, y: batchY})
        
        if step % printStep == 0:
            # Calculate batch accuracy
            acc = sess.run(accuracy, feed_dict={x: batchX, y: batchY})
            # Calculate batch loss
            loss = sess.run(costFun, feed_dict={x: batchX, y: batchY})
            print("Iter ", str(step*batchSize), ", Minibatch Loss= ",
                  "{:.6f}".format(loss), ", Training Accuracy= ",
                  "{:.5f}".format(acc))
        step += 1
    print("Optimization Finished!")

    # Calculate accuracy for 128 mnist test images
    test_len = 10
    testData = dataX[:test_len, :]
    testLabel = dataY[:test_len, :]
    print("Testing Accuracy:",
          sess.run(accuracy, feed_dict={x: testData, y: testLabel}))

TypeError: The value of a feed cannot be a tf.Tensor object. Acceptable feed values include Python scalars, strings, lists, or numpy ndarrays.

In [21]:
batchY

<tf.Tensor 'strided_slice_1:0' shape=(10, 52) dtype=int32>