In [0]:
import pandas as pd
import string
import numpy as np
import tensorflow as tf


In [2]:
from keras.utils import np_utils

Using TensorFlow backend.


In [3]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt


In [0]:
#Loading the file location
text = open(path_to_file, encoding ='utf').read()
text = text[:1000000]

In [24]:
# Getting sorted listof char in text
chars = sorted(list(set(text)))
print('total chars: ', len(chars))

char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))

# Spitting text into sentences
maxlen = 100
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i: i + maxlen])
    next_chars.append(text[i + maxlen])
print('number of sequences:', len(sentences))

# Character level One - hot Encoding
x_train = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y_train = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x_train[i, t, char_indices[char]] = 1
    y_train[i, char_indices[next_chars[i]]] = 1

total chars:  65
number of sequences: 333300


In [0]:
def next_batch(x, y, batch_size):
    N = x.shape[0]
    batch_indices = np.random.permutation(N)[:batch_size]
    x_batch = x[batch_indices]
    y_batch = y[batch_indices]
    return x_batch, y_batch

In [0]:
# Data Dimensions
input_dim = len(chars)       # input dimension
seq_max_len = maxlen         # sequence maximum length
out_dim = len(chars)         # output dimension 

# Hyper-Parameters
learning_rate = 0.01    # The optimization initial learning rate
training_steps = 1000  # Total number of training steps
batch_size = 256        # batch size
display_freq = 100    # Frequency of displaying the training results
num_hidden_units = 1024   # number of hidden units 

In [0]:
tf.reset_default_graph()

In [0]:
# weight and bais wrappers
def weight_variable(shape):
    initer = tf.truncated_normal_initializer(stddev=0.01)
    return tf.get_variable('W',
                           dtype=tf.float64,
                           shape=shape,
                           initializer=initer)

def bias_variable(shape):
    initial = tf.constant(0., shape=shape, dtype=tf.float64)
    return tf.get_variable('b',
                           dtype=tf.float64,
                           initializer=initial)

with tf.name_scope('Input'):
    x = tf.placeholder(tf.float64, shape=[None, seq_max_len, input_dim], name='X')
    y = tf.placeholder(tf.float64, shape=[None, out_dim], name='Y')
    keep_prob = tf.placeholder(tf.float32)

In [0]:
def LSTM(x, num_hidden, out_dim, name, use_activation=True, keep_prob = None):
    with tf.variable_scope(name):
        # create weight matrix initialized randomely from N~(0, 0.01)
        weights = weight_variable(shape=[num_hidden_units, out_dim])

        # create bias vector initialized as zero
        biases = bias_variable(shape=[out_dim])

        cell = tf.nn.rnn_cell.BasicLSTMCell(num_hidden)
        outputs, states = tf.nn.dynamic_rnn(cell, x, dtype=tf.float64)
        out = tf.matmul(outputs[:, -1, :], weights) + biases
        
        layer = tf.nn.dropout(out, keep_prob)
        
        layer = tf.layers.dense(layer,units=out_dim)
        
        layer = tf.nn.softmax(layer)
        
    
        return layer

In [30]:
logits_out = LSTM(x, num_hidden_units, out_dim=out_dim, name = 'lstm',use_activation=True, keep_prob = 0.2)

W0702 19:19:08.798438 140049630615424 nn_ops.py:4224] Large dropout rate: 0.8 (>0.5). In TensorFlow 2.x, dropout() uses dropout rate instead of keep_prob. Please ensure that this is intended.


In [0]:
loss = tf.reduce_mean(tf.keras.losses.categorical_crossentropy(y, logits_out))
train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)

In [0]:
# Creating the op for initializing all variables
init = tf.global_variables_initializer()

sess = tf.Session()
sess.run(init)
for i in range(training_steps):
    x_batch, y_batch = next_batch(x_train, y_train, batch_size)
    _, batch_loss = sess.run([train_op, loss], feed_dict={x: x_batch, y: y_batch})
    if i % display_freq == 0:
        print('Step {}, Loss={}'.format(i, batch_loss))

Step 0, Loss=4.174720242098493


In [0]:
# Testing
def sample(preds, temperature):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)
  
import random

def generate_text(length, diversity):
    # Get random starting text
    start_index = 0 #random.randint(0, len(text) - maxlen - 1)
    generated = ''
    sentence = text[start_index: start_index + maxlen]
    generated += sentence
    for i in range(length):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.

            preds = sess.run(tf.squeeze(sess.run(logits_out, feed_dict = {x: x_pred})))
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]
            generated += next_char
            sentence = sentence[1:] + next_char
    return generated

In [23]:
for diversity in [0.2, 0.5, 1.0, 1.2]:
  print(diversity," - diversity----------------------------------------")
  print(generate_text(300, diversity))
  print('******************************************************')

0.2  - diversity----------------------------------------
mortal passado! the punto reverso! the
hai!

BENVOLIO:
The what?

MERCUTIO:
The pox of such antic, lont the mesd on hout to the thaut
The the the the the the tent or ther hand the I thou he found tho that the thow the hee the ther thhe thout a I the thound here the here the soud ne the thar the ntote hor have eand thas to erond the the the thor the the thet thes wish
the the hore the the deann the
******************************************************
0.5  - diversity----------------------------------------
 winter's pale.
The white sheet bleaching on the hedge,
With heigh! the sweet birds, O, how they sin erares and the tole sirse an yar plenes vean ane manthes as fars hefs or anm me thor heen the e anshars oush weurke thanLnentenathes preed bne anbee thile couss theur poukeg PimeQs hin ure
I touth ecp and me to the undes sarthe soon?

EGnIM, I Gole but mane lestet sot kertheg werth aot thee thou tC
*****************************