In [2]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import tensorflow as tf

In [3]:
structure = [['Correct Data Structure'],
             ['Vocabulary'],
             ['char ---> ind AND ind ---> char'], 
             ['Use indexes to ENCODE the text'],
             ['TensorSlicesDataset'],
             ['Sequences'],
             ['Shuffle & Batch'],
             ['Model'],
             ]

In [6]:
path_to_file = "shakespeare.txt"
text = open(path_to_file, 'r').read()

In [7]:
print(text[150:1000])

ecease,
  His tender heir might bear his memory:
  But thou contracted to thine own bright eyes,
  Feed'st thy light's flame with self-substantial fuel,
  Making a famine where abundance lies,
  Thy self thy foe, to thy sweet self too cruel:
  Thou that art now the world's fresh ornament,
  And only herald to the gaudy spring,
  Within thine own bud buriest thy content,
  And tender churl mak'st waste in niggarding:
    Pity the world, or else this glutton be,
    To eat the world's due, by the grave and thee.


                     2
  When forty winters shall besiege thy brow,
  And dig deep trenches in thy beauty's field,
  Thy youth's proud livery so gazed on now,
  Will be a tattered weed of small worth held:  
  Then being asked, where all thy beauty lies,
  Where all the treasure of thy lusty days;
  To say within thine own deep su


In [30]:
vocab = sorted(set(text))
vocab_size = len(vocab) #It is important when designing the final Dense layer

In [11]:
char_to_ind = {char:ind for ind,char in enumerate(vocab)}
char_to_ind['I']

34

In [14]:
ind_to_char =np.array(vocab)
ind_to_char[34]

'I'

In [15]:
encoded_text = np.array([char_to_ind[c] for c in text])
encoded_text.shape

(5445609,)

In [17]:
sequence_length = 120
total_sequences = len(text) // (sequence_length + 1)
total_sequences

45005

In [18]:
char_dataset = tf.data.Dataset.from_tensor_slices(encoded_text)

In [23]:
sequences = char_dataset.batch(sequence_length+1, drop_remainder=True)

In [24]:
def create_sequence_targets(seq):
    input_txt = seq[:-1] # Hello my nam
    target_txt = seq[1:] # ello my name
    return input_txt,target_txt    

In [25]:
dataset = sequences.map(create_sequence_targets)



In [26]:
for input_txt, target_txt in dataset.take(1):
    print(input_txt.numpy())
    print("".join(ind_to_char[input_txt.numpy()]))
    print('\n')
    print('\n')
    print(target_txt.numpy())
    print("".join(ind_to_char[target_txt.numpy()]))

[ 0  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 12  0
  1  1 31 73 70 68  1 61 56 64 73 60 74 75  1 58 73 60 56 75 76 73 60 74
  1 78 60  1 59 60 74 64 73 60  1 64 69 58 73 60 56 74 60  8  0  1  1 45
 63 56 75  1 75 63 60 73 60 57 80  1 57 60 56 76 75 80  5 74  1 73 70 74
 60  1 68 64 62 63 75  1 69 60 77 60 73  1 59 64 60  8  0  1  1 27 76 75]

                     1
  From fairest creatures we desire increase,
  That thereby beauty's rose might never die,
  But




[ 1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 12  0  1
  1 31 73 70 68  1 61 56 64 73 60 74 75  1 58 73 60 56 75 76 73 60 74  1
 78 60  1 59 60 74 64 73 60  1 64 69 58 73 60 56 74 60  8  0  1  1 45 63
 56 75  1 75 63 60 73 60 57 80  1 57 60 56 76 75 80  5 74  1 73 70 74 60
  1 68 64 62 63 75  1 69 60 77 60 73  1 59 64 60  8  0  1  1 27 76 75  1]
                     1
  From fairest creatures we desire increase,
  That thereby beauty's rose might never die,
  But 


In [49]:
batch_size = 128
buffer_size = 10000

dataset = dataset.shuffle(buffer_size).batch(batch_size,drop_remainder=True)
dataset

<BatchDataset shapes: ((128, 128, 128, 120), (128, 128, 128, 120)), types: (tf.int32, tf.int32)>

In [70]:
rnn_neurons = 1026
embed_dim = 64

In [50]:
from tensorflow.keras.losses import sparse_categorical_crossentropy
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, GRU, Dense

In [51]:
def sparse_cat_loss(y_true,y_pred):
    return sparse_categorical_crossentropy(y_true, y_pred, from_logits=True)

In [52]:
def create_model(vocab_size,embed_dim,rnn_neurons,batch_size):
    
    model = Sequential()
    
    model.add(Embedding(vocab_size, 64, batch_input_shape=[batch_size,None]))
    
    model.add(GRU(1026, return_sequences=True, stateful=True, recurrent_initializer = 'glorot_uniform')) 
    #One Time step ahead,  Last state of the batch will be used as the initial state of the next batch, glorot_uniform has better result than orthogonal    
    
    model.add(Dense(vocab_size))
    
    model.compile(optimizer = 'adam', loss= sparse_cat_loss)
    
    return model

In [57]:
model = create_model(vocab_size=vocab_size, embed_dim=64, rnn_neurons=1026, batch_size=batch_size)

In [58]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_2 (Embedding)      (128, None, 64)           5376      
_________________________________________________________________
gru_2 (GRU)                  (128, None, 1026)         3361176   
_________________________________________________________________
dense_2 (Dense)              (128, None, 84)           86268     
Total params: 3,452,820
Trainable params: 3,452,820
Non-trainable params: 0
_________________________________________________________________


In [60]:
for input_example_batch, target_example_batch in dataset.take(1):       
    example_batch_predictions = model(input_example_batch) 

In [66]:
sampled_indices = tf.random.categorical(example_batch_predictions[0],num_samples=1)
sampled_indices = tf.squeeze(sampled_indices,axis=-1).numpy()

In [67]:
epochs = 30

In [68]:
model.fit(dataset,epochs=epochs)

In [69]:
from tensorflow.keras.models import load_model

In [71]:
model = create_model(vocab_size,embed_dim,rnn_neurons,batch_size=1)
model.load_weights('shakespeare_gen.h5')
model.build(tf.TensorShape([1,None]))

In [72]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (1, None, 64)             5376      
_________________________________________________________________
gru_3 (GRU)                  (1, None, 1026)           3361176   
_________________________________________________________________
dense_3 (Dense)              (1, None, 84)             86268     
Total params: 3,452,820
Trainable params: 3,452,820
Non-trainable params: 0
_________________________________________________________________


In [73]:
def generate_text(model, start_seed, gen_size, temp = 1.0):
  # Evaluation step (generating text using the learned model)

  # Number of characters to generate
    num_generate = gen_size

  # Converting our start string to numbers (vectorizing)
    input_eval = [char_to_ind[s] for s in start_seed]
    input_eval = tf.expand_dims(input_eval, 0)

  # Empty string to store our results
    text_generated = []

  # Low temperatures results in more predictable text.
  # Higher temperatures results in more surprising text.
  # Experiment to find the best setting.
    temperature = temp

  # Here batch size == 1
    model.reset_states()
    for i in range(num_generate):
        predictions = model(input_eval)
      # remove the batch dimension
        predictions = tf.squeeze(predictions, 0)

      # using a categorical distribution to predict the character returned by the model
        predictions = predictions / temperature
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

      # We pass the predicted character as the next input to the model
      # along with the previous hidden state
        input_eval = tf.expand_dims([predicted_id], 0)

        text_generated.append(ind_to_char[predicted_id])

    return (start_seed + ''.join(text_generated))

In [74]:
print(generate_text(model,"JULIET", gen_size=1000))

JULIETO.
[Kents and Gift, they with brawlfrings of the field]

                        Enter CLIFFORD,
  Food brown the child nor out, youd will; and the which looks,
    Than twice faded
    when monstrous Lavio is misled on his hands on his age, thy old
    house, and will avoid down shepherd.
  PATROCLUS. Yes, for your lies, the wiD
  BASSANIO. Even for him govern you that made me not.
    Thus money comes; love not thee.
  AJAX. I warrant that, is something roar brother horrible
    Where French words, Catesby; you are winds. Farewell.
    I hope he is betrey where he was contented,
    Cannot ruter in their Tibet's, but there was not
    o'ernoward, thy lips and stop that villary follows madness write;
    Follow, burn! I will fetch me this.
  LLON. Mistress Margaret, the Queen tres but at once.
  Prince. What, shall ourse.
  Glou. Away, you!-chay'd no tongue, my lord.
  OTHELLO. O prison! eet, so
    I have no need of thee thy love,
    And now I hope. I think the odds is spotles