In [1]:
%tensorflow_version 2.x
import tensorflow as tf
from tensorflow import keras

import numpy as np
import os
import time


Colab only includes TensorFlow 2.x; %tensorflow_version has no effect.


In [2]:
from google.colab import files
files.upload

path_to_fileDl = tf.keras.utils.get_file('Shakespear.txt', 'https://cs.stanford.edu/people/karpathy/char-rnn/shakespear.txt')

In [3]:
text = open(path_to_fileDl, 'rb').read().decode(encoding = 'utf-8')
print('Longitud del texto: {} caracteres'.format(len(text)))
vocab = sorted(set(text))

print('El texto esta compuesto de estos {} caracteres: '.format(len(vocab)))
print(vocab)

Longitud del texto: 99993 caracteres
El texto esta compuesto de estos 62 caracteres: 
['\n', ' ', '!', "'", ',', '-', '.', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']


In [4]:
char2idx = {u:i for i,u in enumerate(vocab)}
idx2char = np.array(vocab)

In [5]:
for char,_ in zip(char2idx, range(len(vocab))):
  print('  {:4s}: {:3d},'.format(repr(char),char2idx[char]))

  '\n':   0,
  ' ' :   1,
  '!' :   2,
  "'" :   3,
  ',' :   4,
  '-' :   5,
  '.' :   6,
  ':' :   7,
  ';' :   8,
  '?' :   9,
  'A' :  10,
  'B' :  11,
  'C' :  12,
  'D' :  13,
  'E' :  14,
  'F' :  15,
  'G' :  16,
  'H' :  17,
  'I' :  18,
  'J' :  19,
  'K' :  20,
  'L' :  21,
  'M' :  22,
  'N' :  23,
  'O' :  24,
  'P' :  25,
  'Q' :  26,
  'R' :  27,
  'S' :  28,
  'T' :  29,
  'U' :  30,
  'V' :  31,
  'W' :  32,
  'X' :  33,
  'Y' :  34,
  'Z' :  35,
  'a' :  36,
  'b' :  37,
  'c' :  38,
  'd' :  39,
  'e' :  40,
  'f' :  41,
  'g' :  42,
  'h' :  43,
  'i' :  44,
  'j' :  45,
  'k' :  46,
  'l' :  47,
  'm' :  48,
  'n' :  49,
  'o' :  50,
  'p' :  51,
  'q' :  52,
  'r' :  53,
  's' :  54,
  't' :  55,
  'u' :  56,
  'v' :  57,
  'w' :  58,
  'x' :  59,
  'y' :  60,
  'z' :  61,


In [6]:
text_as_int = np.array([char2idx[c] for c in text])

In [7]:
print('texto: {}'.format(repr(text[:50])))
print('{}'.format(repr(text_as_int[:50])))

texto: "That, poor contempt, or claim'd thou slept so fait"
array([29, 43, 36, 55,  4,  1, 51, 50, 50, 53,  1, 38, 50, 49, 55, 40, 48,
       51, 55,  4,  1, 50, 53,  1, 38, 47, 36, 44, 48,  3, 39,  1, 55, 43,
       50, 56,  1, 54, 47, 40, 51, 55,  1, 54, 50,  1, 41, 36, 44, 55])


In [8]:
char_dataset  = tf.data.Dataset.from_tensor_slices(text_as_int)

seq_length = 100

sequence = char_dataset.batch(seq_length+1, drop_remainder=True)

In [9]:
for item in sequence.take(10):
  print(''.join(idx2char[item.numpy()]))

That, poor contempt, or claim'd thou slept so faithful,
I may contrive our father; and, in their defe
ated queen,
Her flesh broke me and puttance of expedition house,
And in that same that ever I lament 
this stomach,
And he, nor Butly and my fury, knowing everything
Grew daily ever, his great strength a
nd thought
The bright buds of mine own.

BIONDELLO:
Marry, that it may not pray their patience.'

KIN
G LEAR:
The instant common maid, as we may less be
a brave gentleman and joiner: he that finds us wit
h wax
And owe so full of presence and our fooder at our
staves. It is remorsed the bridal's man his g
race
for every business in my tongue, but I was thinking
that he contends, he hath respected thee.

B
IRON:
She left thee on, I'll die to blessed and most reasonable
Nature in this honour, and her bosom 
is safe, some
others from his speedy-birth, a bill and as
Forestem with Richard in your heart
Be ques
tion'd on, nor that I was enough:
Which of a partier forth the obsers d'punish'd t

In [10]:
def split_input_target(chunk):
  input_text = chunk[:-1]
  target_text = chunk[1:]
  return input_text, target_text

dataset = sequence.map(split_input_target)

In [11]:
for input_example, target_example in dataset.take(1):
  print('Input data: ', repr(''.join(idx2char[input_example.numpy()])))
  print('Target data: ', repr(''.join(idx2char[target_example.numpy()])))

Input data:  "That, poor contempt, or claim'd thou slept so faithful,\nI may contrive our father; and, in their def"
Target data:  "hat, poor contempt, or claim'd thou slept so faithful,\nI may contrive our father; and, in their defe"


In [12]:
print(dataset)

<_MapDataset element_spec=(TensorSpec(shape=(100,), dtype=tf.int64, name=None), TensorSpec(shape=(100,), dtype=tf.int64, name=None))>


In [13]:

BATCH_SIZE = 64

BUFFER_SIZE = 10000

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

print (dataset)

<_BatchDataset element_spec=(TensorSpec(shape=(64, 100), dtype=tf.int64, name=None), TensorSpec(shape=(64, 100), dtype=tf.int64, name=None))>


In [14]:

vocab_size = len(vocab)
embedding_dim = 256
rnn_units = 1024


In [15]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  model = Sequential()
  model.add(Embedding(input_dim=vocab_size,
                      output_dim=embedding_dim,
                      batch_input_shape=[batch_size, None]))
  model.add(LSTM(rnn_units,
                 return_sequences=True,
                 stateful=True,
                 recurrent_initializer='glorot_uniform'))
  model.add(Dense(vocab_size))
  return model

In [16]:

model = build_model(
  vocab_size = len(vocab),
  embedding_dim=embedding_dim,
  rnn_units=rnn_units,
  batch_size=BATCH_SIZE)

In [17]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (64, None, 256)           15872     
                                                                 
 lstm (LSTM)                 (64, None, 1024)          5246976   
                                                                 
 dense (Dense)               (64, None, 62)            63550     
                                                                 
Total params: 5326398 (20.32 MB)
Trainable params: 5326398 (20.32 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [18]:
for input_example_batch, target_example_batch in dataset.take(1):
  print("Input:", input_example_batch.shape, "# (batch_size, sequence_length)")
  print("Target:", target_example_batch.shape, "# (batch_size, sequence_length)")

Input: (64, 100) # (batch_size, sequence_length)
Target: (64, 100) # (batch_size, sequence_length)


In [19]:
for input_example_batch, target_example_batch in dataset.take(1):
  example_batch_predictions = model(input_example_batch)
  print('Predictions:',example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

Predictions: (64, 100, 62) # (batch_size, sequence_length, vocab_size)


In [20]:
sampled_indices = tf.random.categorical(
    example_batch_predictions[0],
    num_samples = 1
)

sampled_indices_characters = tf.squeeze(
    sampled_indices,axis=-1).numpy()

In [21]:
print(sampled_indices_characters)

[26  0 34 55 37 49 29  0 40 39 50 44 37 14 49 21  7 13 50 28  0 55  1 44
  1 58 42 19 12 47 59  5  0 34 59 45 23 46 10 24 23 23 26 52 31 20 61  4
 44 17  7 38 14 55 58 44 12 51 14 13 56 18 41 26 55 50  0 14 25  9 47 32
 12  0 54 48 12  1 41 49 43 20 48 35  2 30 39 49 19 12  7 59 12  0 45 59
 53 58 45 47]


In [22]:
def loss(labels, logits):
  return tf.keras.losses.sparse_categorical_crossentropy(
      labels, logits,from_logits=True
  )

In [23]:
model.compile(optimizer = 'adam', loss = loss)

In [24]:
 # directorio
checkpoint_dir = './training_checkpoints'
# nombre fichero
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [25]:
EPOCHS=50
history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [26]:
!ls training_chekpoints

ckpt_1	 ckpt_14  ckpt_19  ckpt_23  ckpt_28  ckpt_32  ckpt_37  ckpt_41	ckpt_46  ckpt_50
ckpt_10  ckpt_15  ckpt_2   ckpt_24  ckpt_29  ckpt_33  ckpt_38  ckpt_42	ckpt_47  ckpt_6
ckpt_11  ckpt_16  ckpt_20  ckpt_25  ckpt_3   ckpt_34  ckpt_39  ckpt_43	ckpt_48  ckpt_7
ckpt_12  ckpt_17  ckpt_21  ckpt_26  ckpt_30  ckpt_35  ckpt_4   ckpt_44	ckpt_49  ckpt_8
ckpt_13  ckpt_18  ckpt_22  ckpt_27  ckpt_31  ckpt_36  ckpt_40  ckpt_45	ckpt_5	 ckpt_9


In [28]:
model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)

model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))

model.build(tf.TensorShape([1, None]))

In [29]:
def generate_text(model, start_string):

  num_generate = 10000
  input_eval = [char2idx[s] for s in start_string]

  input_eval = tf.expand_dims(input_eval, 0)
  text_generated = []


  temperature = 0.5

  model.reset_states()
  for i in range(num_generate):
      predictions = model(input_eval)

      predictions = tf.squeeze(predictions, 0)

      predictions = predictions / temperature
      predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()


      input_eval = tf.expand_dims([predicted_id], 0)

      text_generated.append(idx2char[predicted_id])

  return (start_string + ''.join(text_generated))


In [30]:
print(generate_text(model, start_string=u"A"))

ANLILESY VILESSYINESIUSSBKENINCHEM:
MILIANUSSVISISUSIUSUNKCHESLILE:
ESSINIUS:
I was a warrant she tor and not spirit, and thing
the moth of Brutus?

CORIOLANUS:
Let him wear it: brother!

LEONATO:
Wither him, I run.

PETER:
As I shall say I hove to would not strend the dear,
When I was be is and best and strain's back,
And preschiles the kingdom all ourselford;
My my to ore another, and all their martial brows did walk upon our officer;
being one not speak'd lord, if bawd,
That all this holy boy I have they hearth to mert.

KING PHILIP:
A fourther armond.

LOCANDAND:
Your heart, if with mear and in eyes the bother's shadew,
Of the conrection that you had not speak.

Second Lord:
Present we shall be rather than I see the worth the throce your shing wo would rang,
And therefore dave between the store of thee, if he wonder
And in a glace do heaven op the caurt
As I think that who may no lest that with his ways, and this with his ranes,
Who is but seen a mischeet of my son, I would not kno