# Tensorflow Text Generator

This notebook shows how to create a recursive 
neural network based on LSTM cells to generate random text based on raw input from a novel, in this case the early 17th century novel Don Quixote, by Miguel de Cervantes

### Basic imports

In [1]:
import numpy as np
import tensorflow as tf
import time
import sys

### Opening the book

In [2]:
textfile=open('quijote.txt')
a=textfile.read()

#Removing useless characters
a=a.replace('\n',' ').replace('\r','').replace('  ',' ').replace('  ',' ').replace('  ',' ').replace('  ',' ')
a=np.array(list(a))

#Finding unique characters to define our dictionary
alphabet=np.unique(a)

#Replacing each character by its index in the dictionary
aind=np.zeros(a.shape,dtype=np.int32)
for i in range(len(alphabet)):
    aind[a==alphabet[i]]=i

#Since this will be a recursive neural network, the output will always be the next character, to the one used as input
training_inputs=aind[0:-1]
training_outputs=aind[1:]

### Defining the model

In [3]:
#Start an interactive session, more convenient for multicell execution in the notebook
session = tf.InteractiveSession()

#Some parameters
num_hidden=256
num_layers=2
data_type=np.float32
max_grad_norm = 5

init_scale = 0.1   #Random initialization scale
vocab_size=len(alphabet) #Number of possible numbers: the length of the one hot representation


#Place holders for inputws, outputs, and other config values
input_data = tf.placeholder(tf.int32, [None, None]) #Placeholder for the input data: It will be: Nbatch,Ntimesteps(,1 input variable)
targets = tf.placeholder(tf.int32, [None, None])  #Equivalent placeholder for the targets

batch_size = tf.placeholder(tf.int32)  #Placeholder for the batch_size, we will use different values training, and predicting
keep_prob = tf.placeholder(tf.float32) #Placeholder for the keeping probability in the droput technique


#Structure of LSTM cells
cellLSTM = tf.nn.rnn_cell.BasicLSTMCell(num_hidden, forget_bias=0.0, state_is_tuple=True) #Properties of the single LSTM layer
cellLSTM_drop = tf.nn.rnn_cell.DropoutWrapper(cellLSTM, output_keep_prob=keep_prob)
cellsLSTM = tf.nn.rnn_cell.MultiRNNCell([cellLSTM_drop] * num_layers, state_is_tuple=True)

initial_state = cellsLSTM.zero_state(batch_size, data_type)         #Initial state for the cells, for as many batches as required


inputs_cell = tf.one_hot(input_data,vocab_size)    #We convert to 1-hot representation before feeding the values
outputs, state = tf.nn.dynamic_rnn(cellsLSTM, inputs_cell, initial_state=initial_state,time_major = False) #Time_major=False, as the first dimension is the batch nubmer

output = tf.reshape(outputs, [-1, num_hidden])       #To calculate the error of the join the Nbatch,Ntimesteps axes together 

#We apply a final softmax layer, with the corresponding weights, and calculate the cost and cross entropy with the target values
softmax_w = tf.get_variable("softmax_w", [num_hidden, vocab_size], dtype=data_type)  
softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=data_type)
logits = tf.matmul(output, softmax_w) + softmax_b
loss = tf.nn.softmax_cross_entropy_with_logits(logits, tf.one_hot( tf.reshape(targets, [-1]),vocab_size))
cost = tf.reduce_sum(loss) / tf.cast(batch_size,np.float32)

#We store the final state of the cell in a tensor
final_state = state


#And the gradient descent part for training
lr = tf.Variable(0.0, trainable=False)   #Learning rate
tvars = tf.trainable_variables()
grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),max_grad_norm)
optimizer = tf.train.GradientDescentOptimizer(lr)
train_op = optimizer.apply_gradients(zip(grads, tvars))
new_lr = tf.placeholder(tf.float32, name="new_learning_rate") #To add the posibility to change the learning rate
lr_update = tf.assign(lr, new_lr)

### Initialising the variables, and loading if necessary

In [4]:
#We initialize all the variables
initializer = tf.random_uniform_initializer(-init_scale,init_scale)
tf.initialize_all_variables().run()
saver=tf.train.Saver(tf.all_variables())
name='rnn-model'


imaster=0
#iload=None
iload=4896
restoreFile='rnn-model-%i'%iload

if restoreFile:
    saver.restore(session,'%s-%i'%(name,iload))
    imaster=iload
    print('Loaded')


Loaded


### Defining a function to generate random text
It will keep generating characters, form an initial seed until it find a full-stop within a certain limits

In [15]:
def GenerateText(start='En',minlength=50,maxlength=400):
    
    start=np.array(list(start))
    startind=np.zeros(start.shape,dtype=np.int32)
   
    for i in range(len(alphabet)):
        startind[start==alphabet[i]]=i
    #Now we predict the ouputs for a given input 

    #Some functions to calculate softmax, and pick a value
    def softmax(probs,t):
        probsout=np.exp(probs/t)
        return (probsout.T/np.sum(probsout,axis=-1)).T

    def selectHigh(probs):
        return np.argmax(probs,axis=-1)

    def selectRandomHigh(probs):
        return np.random.choice(np.arange(len(probs)), p=probs)

    temperature=1 # For softmax
    
    #We will chose the next character by doign a weighted random sampling of the softmax output
    selectionFunction=selectRandomHigh
    val=startind[0]


    #Initial state of the LSTMs
    state_predict= session.run(initial_state,{batch_size:1})

    output=[]
    string=''
    array=np.zeros((1,1),np.int16)
    for i in range(maxlength):
        string+=alphabet[val]
        
        if len(string)>minlength and string[-1]=='.':
            break
        
        array[0,0]=val
        fetches = [logits,final_state] # Now we need the output and the final_state to forward it to the next iteration
        feed_dict = {batch_size:1,input_data:array,keep_prob:1} #We are now predicting character by character without dropout

        #We need to carry the state of the LSTM towards to next iteration
        for j, (c, h) in enumerate(initial_state):
            feed_dict[c] = state_predict[j].c
            feed_dict[h] = state_predict[j].h


        logits_a,state_predict= session.run(fetches, feed_dict)

        if (i+1)<len(startind):
            val=startind[i+1]
        else:
            val=selectionFunction(softmax(logits_a[0,:],temperature))
            
    return string

In [16]:
GenerateText('La')

'Las de Caldo, miradosa confis\xf3n que no pudo sufer que estas cruelles de Olacio, y apenas le hab\xeda de pasar esta batalla, dijo: cuya lugar que \xe9l este (que eran carneras a los \xe1rboles, o que le pagaba, solo las diante a este rey y dejando de buena perdici\xf3n y determinaro a poso, no los serv\xedan, le dijo: pues en poseci\xf3n, conquitan los fuercas y caballeros, tan caresan de cuatro coranis del nombre d'

### Traning loop

In [19]:
verbose=True

num_steps_training=70  #Number of timesteps
batch_size_training=20 #Number of batches per loop
keep_prob_training=0.5 #Dropout probability

learning_rate = 1.0    #Starting learning rate
lr_decay_cfg = 0.998   #Decay of the learning rate


#Reshaping the data into batches
crop=len(training_inputs)//batch_size_training*batch_size_training
training_inputs_batch=training_inputs[:crop].reshape(batch_size_training,-1)
training_outputs_batch=training_outputs[:crop].reshape(batch_size_training,-1)

def LogFile(string,path='log.txt'):
    filelog=open('log.txt','a+')
    print(repr(string))
    filelog.write('%s\n'%string)
    filelog.close()

#In a try-catch block, to stop it by stopping the kernel    
try:
    im=imaster+1
    while True:
        #Update the learning rate based on the decay
        lr_decay = lr_decay_cfg  ** max(im - 4, 0.0)
        session.run(lr_update, feed_dict={new_lr: learning_rate * lr_decay})

        LogFile("Epoch: %d Learning rate: %.3f" % (im , session.run(lr)))

        epoch_size = training_inputs_batch.shape[1]// num_steps_training  #Number of iterations we will have to run per epoch
        start_time = time.time()
        costs = 0.0
        iters = 0

        #We need an initial state
        state = session.run(initial_state,{batch_size:batch_size_training})
        for step in range(epoch_size):
            #We need:
                #cost: to check that it goes down
                #final_state, to forward to the next iteration
                #train_op: A training operation
            fetches = [cost, final_state, train_op]
            feed_dict = {}

            #feeding the number of batches
            feed_dict[batch_size]=batch_size_training
            feed_dict[keep_prob] = keep_prob_training

            #Feeding the right inputs and outputs
            ind1=step*(num_steps_training)
            ind2=(step+1)*(num_steps_training)
            feed_dict[input_data] = training_inputs_batch[:,ind1:ind2]
            feed_dict[targets] = training_outputs_batch[:,ind1:ind2]

            #Feeding the initial state
            for i, (c, h) in enumerate(initial_state):
                feed_dict[c] = state[i].c
                feed_dict[h] = state[i].h

            #Running the session
            cost_a, state, _ = session.run(fetches, feed_dict)

            costs += cost_a
            iters += num_steps_training

            if verbose and step % (epoch_size // 10) == 10:
                LogFile("%.3f perplexity: %.3f speed: %.0f cps" %(step * 1.0 / epoch_size, np.exp(costs / iters),iters * batch_size_training / (time.time() - start_time)))

        train_perplexity=np.exp(costs / iters)
        imaster=im

        saver.save(session, name, global_step=imaster)

        LogFile("%.3f perplexity: %.3f speed: %.0f cps" %(step * 1.0 / epoch_size, np.exp(costs / iters),iters * batch_size_training / (time.time() - start_time)))

        text=GenerateText()
        LogFile("Sample text: %s" %text)
        im+=1
except:
    pass
    

'Epoch: 4898 Learning rate: 0.000'
'0.045 perplexity: 2.047 speed: 1507 cps'
'0.145 perplexity: 2.064 speed: 1871 cps'
'0.245 perplexity: 2.058 speed: 1964 cps'
'0.345 perplexity: 2.043 speed: 2002 cps'
'0.445 perplexity: 2.040 speed: 2029 cps'
'0.545 perplexity: 2.035 speed: 2046 cps'
'0.645 perplexity: 2.034 speed: 2056 cps'
'0.745 perplexity: 2.037 speed: 2064 cps'
'0.845 perplexity: 2.033 speed: 2071 cps'
'0.945 perplexity: 2.032 speed: 2072 cps'
'0.995 perplexity: 2.030 speed: 2068 cps'
'Sample text: En mi provecho; y le di\xf3 tres delos caballeros pensaba decar esto que su amo nos ha piese y que para con otra cosa que no se ha de poner grande y pierra en el cabrero.'
'Epoch: 4899 Learning rate: 0.000'
'0.045 perplexity: 2.036 speed: 1289 cps'
'0.145 perplexity: 2.060 speed: 1709 cps'
'0.245 perplexity: 2.071 speed: 1837 cps'
