<a href="https://colab.research.google.com/github/hugueds/tensorflow-course/blob/master/90_NLP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## NLP NEURAL LANGUAGE PROCESSING

In [0]:
%tensorflow_version 2.x
import matplotlib.pyplot as plt
import numpy as np 
import pandas as pd
import tensorflow as tf

In [0]:
text = open('shakespeare.txt', 'r').read()

In [0]:
vocab = sorted(set(text))

In [0]:
vocab_size = len(vocab)

In [0]:
char_to_num = {char:num for num, char in enumerate(vocab)}

In [38]:
char_to_num['Z']

51

In [0]:
num_to_char = np.array(vocab)

In [40]:
num_to_char

array(['\n', ' ', '!', '"', '&', "'", '(', ')', ',', '-', '.', '0', '1',
       '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '>', '?',
       'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
       'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
       '[', ']', '_', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i',
       'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v',
       'w', 'x', 'y', 'z', '|', '}'], dtype='<U1')

In [0]:
encoded_text = np.array([char_to_num[c] for c in text])

In [42]:
print(encoded_text[:100])

[ 0  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 12  0
  1  1 31 73 70 68  1 61 56 64 73 60 74 75  1 58 73 60 56 75 76 73 60 74
  1 78 60  1 59 60 74 64 73 60  1 64 69 58 73 60 56 74 60  8  0  1  1 45
 63 56 75  1 75 63 60 73 60 57 80  1 57 60 56 76 75 80  5 74  1 73 70 74
 60  1 68 64]


In [43]:
print(text[50:500])
lines = '''
But thou contracted to thine own bright eyes,
  Feed'st thy light's flame with self-substantial fuel,
  Making a famine where abundance lies,
  Thy self thy foe, to thy sweet self too cruel:
'''
len(lines)

e desire increase,
  That thereby beauty's rose might never die,
  But as the riper should by time decease,
  His tender heir might bear his memory:
  But thou contracted to thine own bright eyes,
  Feed'st thy light's flame with self-substantial fuel,
  Making a famine where abundance lies,
  Thy self thy foe, to thy sweet self too cruel:
  Thou that art now the world's fresh ornament,
  And only herald to the gaudy spring,
  Within thine own bu


192

In [0]:
seq_len = 180

In [45]:
total_sequences = len(text) // (seq_len + 1)
total_sequences

30086

In [0]:
char_dataset = tf.data.Dataset.from_tensor_slices(encoded_text)

In [0]:
# for item in char_dataset.take(500):
#   print(num_to_char[item.numpy()])

In [0]:
sequences = char_dataset.batch(seq_len+1, drop_remainder=True)

In [0]:
def create_seq_targets(seq):
  input_text = seq[:-1]
  target_text = seq[1:]
  return input_text, target_text

In [0]:
dataset = sequences.map(create_seq_targets)

In [51]:
for input_text, target_text in dataset.take(1):
  print(input_text.numpy())
  print("".join(num_to_char[input_text.numpy()]))
  print('\n')
  print(target_text.numpy())
  print('')

[ 0  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 12  0
  1  1 31 73 70 68  1 61 56 64 73 60 74 75  1 58 73 60 56 75 76 73 60 74
  1 78 60  1 59 60 74 64 73 60  1 64 69 58 73 60 56 74 60  8  0  1  1 45
 63 56 75  1 75 63 60 73 60 57 80  1 57 60 56 76 75 80  5 74  1 73 70 74
 60  1 68 64 62 63 75  1 69 60 77 60 73  1 59 64 60  8  0  1  1 27 76 75
  1 56 74  1 75 63 60  1 73 64 71 60 73  1 74 63 70 76 67 59  1 57 80  1
 75 64 68 60  1 59 60 58 60 56 74 60  8  0  1  1 33 64 74  1 75 60 69 59
 60 73  1 63 60 64 73  1 68 64 62 63]

                     1
  From fairest creatures we desire increase,
  That thereby beauty's rose might never die,
  But as the riper should by time decease,
  His tender heir migh


[ 1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1 12  0  1
  1 31 73 70 68  1 61 56 64 73 60 74 75  1 58 73 60 56 75 76 73 60 74  1
 78 60  1 59 60 74 64 73 60  1 64 69 58 73 60 56 74 60  8  0  1  1 45 63
 56 75  1 75 63 60 73 60 57 80  1 57 60 56 76 75 

In [0]:
batch_size = 200
buffer_size = 10000

dataset = dataset.shuffle(buffer_size).batch(batch_size, drop_remainder=True)

In [0]:
embed_dim = 64

rnn_neurons = 1026

In [0]:
from tensorflow.keras.losses import sparse_categorical_crossentropy
from tensorflow.keras.layers import Embedding, GRU, Dense
from tensorflow.keras.models import Sequential

In [0]:
def sparse_cat_loss(y_true, y_pred):
  return sparse_categorical_crossentropy(y_true, y_pred, from_logits=True)

In [0]:
def create_model(vocab_size, embed_dim, rnn_neurons, batch_size):
  model = Sequential()
  model.add(Embedding(vocab_size, embed_dim, batch_input_shape=[batch_size, None]))
  model.add(GRU(rnn_neurons, return_sequences=True, stateful=True, recurrent_initializer='glorot_uniform'))
  model.add(Dense(vocab_size))
  model.compile(optimizer='adam', loss=sparse_cat_loss)
  return model

In [0]:
model = create_model(vocab_size, embed_dim, rnn_neurons, batch_size)

In [58]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (200, None, 64)           5376      
_________________________________________________________________
gru_1 (GRU)                  (200, None, 1026)         3361176   
_________________________________________________________________
dense_1 (Dense)              (200, None, 84)           86268     
Total params: 3,452,820
Trainable params: 3,452,820
Non-trainable params: 0
_________________________________________________________________


In [0]:
for input_example_batch, target_example_batch in dataset.take(1):
  ex_batch_predictions = model(input_example_batch)

In [0]:
sample_indexes = tf.random.categorical(ex_batch_predictions[0], num_samples=1)

In [0]:
sample_indexes = tf.squeeze(sample_indexes, axis=-1).numpy()

In [71]:
sample_indexes

array([ 0, 40, 40, 81, 76, 17,  1, 14,  1, 35, 11, 29, 50, 68, 51, 78, 19,
       45, 32, 71,  8, 35, 46, 34, 59, 73, 38, 19, 54,  8, 73, 10, 36, 40,
       76, 63, 23, 77, 37, 29, 76, 48, 68, 75, 55, 21,  4, 59, 30, 22, 22,
       78,  9, 15, 43, 83, 71, 41, 42, 64, 58,  2, 60, 52, 25, 74, 44, 28,
       10, 83, 62, 52, 48, 75, 82, 31, 26,  5, 72, 25,  1, 41, 26, 65, 65,
       56, 65, 70, 45, 75, 71, 76, 26, 69, 31, 43, 32, 30, 27, 65, 24, 10,
       12, 59, 22, 75, 48, 62, 82, 77, 27,  6, 53, 28, 14, 30, 20, 42, 22,
       19, 54, 36, 56,  7, 56, 26, 70, 29, 38, 42, 68, 43, 17,  3,  5, 83,
        1, 79, 41,  4,  7, 45, 53, 66, 53, 75, 65, 75, 17, 61, 52, 24, 10,
       63, 38, 19, 57, 77,  1, 80, 29, 58,  4, 66, 22, 53, 70, 36, 33, 61,
       66, 36, 29, 78, 22, 54, 15, 46, 81, 42])

In [72]:
num_to_char[sample_indexes]

array(['\n', 'O', 'O', 'z', 'u', '6', ' ', '3', ' ', 'J', '0', 'D', 'Y',
       'm', 'Z', 'w', '8', 'T', 'G', 'p', ',', 'J', 'U', 'I', 'd', 'r',
       'M', '8', '_', ',', 'r', '.', 'K', 'O', 'u', 'h', '<', 'v', 'L',
       'D', 'u', 'W', 'm', 't', '`', ':', '&', 'd', 'E', ';', ';', 'w',
       '-', '4', 'R', '}', 'p', 'P', 'Q', 'i', 'c', '!', 'e', '[', '?',
       's', 'S', 'C', '.', '}', 'g', '[', 'W', 't', '|', 'F', 'A', "'",
       'q', '?', ' ', 'P', 'A', 'j', 'j', 'a', 'j', 'o', 'T', 't', 'p',
       'u', 'A', 'n', 'F', 'R', 'G', 'E', 'B', 'j', '>', '.', '1', 'd',
       ';', 't', 'W', 'g', '|', 'v', 'B', '(', ']', 'C', '3', 'E', '9',
       'Q', ';', '8', '_', 'K', 'a', ')', 'a', 'A', 'o', 'D', 'M', 'Q',
       'm', 'R', '6', '"', "'", '}', ' ', 'x', 'P', '&', ')', 'T', ']',
       'k', ']', 't', 'j', 't', '6', 'f', '[', '>', '.', 'h', 'M', '8',
       'b', 'v', ' ', 'y', 'D', 'c', '&', 'k', ';', ']', 'o', 'K', 'H',
       'f', 'k', 'K', 'D', 'w', ';', '_', '4', 'U', 'z', 'Q'], 

In [74]:
epochs = 30
model.fit(dataset, epochs=epochs)

Train for 150 steps
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x7f3373d7ed68>

In [0]:
model.save('test.h5')

In [0]:
model_2 = create_model(vocab_size, embed_dim, rnn_neurons, batch_size=1)
model_2.load_weights('test.h5')
model_2.build(tf.TensorShape([1, None]))

In [0]:
def gen_text(model, start_seed, gen_size=500, temp=1.0):

  num_gen = gen_size
  input_eval = [char_to_num[s] for s in start_seed]

  input_eval = tf.expand_dims(input_eval, 0)
  text_gen = []

  temperature = temp
  model.reset_states()

  for i in range(num_gen):

    pred = model(input_eval)
    pred = tf.squeeze(pred, 0)
    pred = pred / temperature
    pred_id = tf.random.categorical(pred, num_samples=1)[-1,0].numpy()

    input_eval = tf.expand_dims([pred_id], 0)

    text_gen.append(num_to_char[pred_id])

  return (start_seed + ''.join(text_gen))



In [91]:
print(gen_text(model_2, 'test', gen_size=1500))

test,  
    Win of it edgels, pound him in his blest,
    The noble marnelly in Withol I never believ'd this, sir;
    And to the  lord you    Her yet appears in herbs have seen your faght hostessators
   For she for't. That, his father heard it fist,  
    To hang him out. Alas, I seem to hook;
    I know but not prophetical posies
    That any minute all other more kissing- but doth make pause approach'd?
  FERDINAND. The snd!
  GLOUCESTER. Then you are cordial; Morton. Who knows not you strong?
    Why do you know. But to this careful damn- a man of this.
    Truitor the beats of hell!
  KATHARINE. Did the  'Yea, dumb shall dry?
  OTHELLO. Even here I sent the knave to offer.
  Jul. Go, get thee gone, sir.
  EGABELLA. Yes, Claudio.
  Pedro. God hence! If she did possess, Chidial, sir, gratingly
    That he that parte the twain! No hope forget how to hear.
                                                               Exit FABIAN

  MARIA. Hans of day in him; he's a thousand coining 