In [2]:
import tensorflow as tf

import numpy as np
import os
import time

In [3]:
tf.config.run_functions_eagerly(True)

In [4]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')


Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt


In [5]:
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
# length of text is the number of characters in it
print(f'Length of text: {len(text)} characters')


Length of text: 1115394 characters


In [6]:
vocab = set(text)

In [7]:
ids_from_chars = tf.keras.layers.StringLookup(
    vocabulary=list(vocab), mask_token=None)

In [8]:
chars_from_ids = tf.keras.layers.StringLookup(
    vocabulary=ids_from_chars.get_vocabulary(), invert=True, mask_token=None)


In [9]:
all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))
all_ids


<tf.Tensor: shape=(1115394,), dtype=int64, numpy=array([45, 60, 55, ...,  4,  7,  9])>

In [10]:
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)

In [11]:
window_size = 100 # may be a hyperparam to optimize

In [12]:
sequences = ids_dataset.batch(window_size+1, drop_remainder=True)
sequences

<BatchDataset element_spec=TensorSpec(shape=(101,), dtype=tf.int64, name=None)>

In [13]:
def create_df(sequence):
    input_seq = sequence[:-1]
    label_seq = sequence[1:]
    return input_seq, label_seq
create_df("test")

('tes', 'est')

In [14]:
dataset = sequences.map(create_df)
dataset

  "Even though the `tf.config.experimental_run_functions_eagerly` "


<MapDataset element_spec=(TensorSpec(shape=(100,), dtype=tf.int64, name=None), TensorSpec(shape=(100,), dtype=tf.int64, name=None))>

In [15]:
for input_example, target_example in dataset.take(1):
    print("Input :", input_example)
    print("Target:", target_example)


Input : tf.Tensor(
[45 60 55 13 33 21 12 60 33 60 61 30 56 24  9 35 30 57  6 55 30 21 65 30
 21 51 55  6  3 30 30 23 21 37 56 46 21 57 62 55 33 42 30 55  2 21 42 30
 37 55 21 20 30 21 13 51 30 37 38  7  9  9 22 47 47 24  9 29 51 30 37 38
  2 21 13 51 30 37 38  7  9  9 45 60 55 13 33 21 12 60 33 60 61 30 56 24
  9 58  6 62], shape=(100,), dtype=int64)
Target: tf.Tensor(
[60 55 13 33 21 12 60 33 60 61 30 56 24  9 35 30 57  6 55 30 21 65 30 21
 51 55  6  3 30 30 23 21 37 56 46 21 57 62 55 33 42 30 55  2 21 42 30 37
 55 21 20 30 21 13 51 30 37 38  7  9  9 22 47 47 24  9 29 51 30 37 38  2
 21 13 51 30 37 38  7  9  9 45 60 55 13 33 21 12 60 33 60 61 30 56 24  9
 58  6 62 21], shape=(100,), dtype=int64)


In [16]:
BATCH_SIZE = 64
BUFFER_SIZE = 1000 # tf.data shuffles the data in a buffer instead of the memory so we have to give the buffer size (it does so cause
# it could shuffle infinte size of data)

In [17]:
dataset = (dataset
        .shuffle(BUFFER_SIZE)
        .batch(BATCH_SIZE, drop_remainder=True)
        .prefetch(tf.data.experimental.AUTOTUNE)# overlaps the training and the preprocessing when executing step s the pipeline prepares data for step s+1
          )
dataset

<PrefetchDataset element_spec=(TensorSpec(shape=(64, 100), dtype=tf.int64, name=None), TensorSpec(shape=(64, 100), dtype=tf.int64, name=None))>

In [18]:
vocab_size = len(ids_from_chars.get_vocabulary())

embedding = 256

lstm_units = 1024

In [19]:
class mySuperModel(tf.keras.Model):
    def __init__(self, vocab_size, dim_embedding, lstm_units):
        super().__init__(self)
        
        self.embedding = tf.keras.layers.Embedding(vocab_size, dim_embedding)
        self.lstm_1 = tf.keras.layers.LSTM(lstm_units,
                                        return_sequences=True,
                                        return_state=True)
        self.lstm_2 = tf.keras.layers.LSTM(lstm_units,
                                        return_sequences=True,
                                        return_state=True)
        self.dense = tf.keras.layers.Dense(vocab_size)
    
    def call(self, inputs, states=None, return_state=False, training=False):
        x = inputs
        x = self.embedding(x, training=training)
        
        if states is None:
            state_1_h, state_1_c = self.lstm_1.get_initial_state(x)
            states = [state_1_h, state_1_c]
        x, state_1_h, state_1_c = self.lstm_1(x, initial_state=states, training=training)
        states = [state_1_h, state_1_c]
        x, state_2_h, state_2_c = self.lstm_2(x, initial_state=states, training=training)
        states = [state_2_h, state_2_c]
        x = self.dense(x, training=training)
        
        if  return_state:
            return x, states
        else:
            return x
        

In [20]:
my_model = mySuperModel(vocab_size, embedding, lstm_units)

In [21]:
for input_seq, target in dataset.take(2):
    prediction = my_model(input_seq)

In [22]:
my_model.summary()

Model: "my_super_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       multiple                  16896     
                                                                 
 lstm (LSTM)                 multiple                  5246976   
                                                                 
 lstm_1 (LSTM)               multiple                  8392704   
                                                                 
 dense (Dense)               multiple                  67650     
                                                                 
Total params: 13,724,226
Trainable params: 13,724,226
Non-trainable params: 0
_________________________________________________________________


In [23]:
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)
# why from_logits=True: https://datascience.stackexchange.com/
#questions/73093/what-does-from-logits-true-do-in-sparsecategoricalcrossentropy-loss-function

#https://stats.stackexchange.com/questions/326065/cross-entropy-vs-sparse-cross-entropy-when-to-use-one-over-the-other



In [24]:
example_batch_mean_loss = loss(target, prediction)
print("Prediction shape: ", prediction.shape, " # (batch_size, sequence_length, vocab_size)")
print("Mean loss:        ", example_batch_mean_loss)


Prediction shape:  (64, 100, 66)  # (batch_size, sequence_length, vocab_size)
Mean loss:         tf.Tensor(4.189484, shape=(), dtype=float32)


In [25]:
def text_from_ids(ids):
    return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)


In [25]:
my_model.compile(optimizer='adam', loss=loss)

In [28]:
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)


In [26]:
EPOCHS = 50


In [30]:
hist_training = my_model.fit(dataset, epochs=EPOCHS)#, callbacks=[checkpoint_callback])


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [31]:
for input_seq, target in dataset.take(1):
    prediction = my_model(input_seq)

In [32]:
def text_from_ids(ids):
    return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)

In [None]:
text_from_ids(my_model.predict(input_seq))

In [34]:
class OneStep(tf.keras.Model):
    def __init__(self, model, chars_from_ids, ids_from_char):
        super().__init__()
        
        self.model = model
        self.chars_from_ids = chars_from_ids
        self.ids_from_chars = ids_from_char
        
        skip_ids = self.ids_from_chars(['[UNK]'])[:, None]
        sparse_mask = tf.SparseTensor(
            # Put a -inf at each bad index.
            values=[-float('inf')]*len(skip_ids),
            indices=skip_ids,
            # Match the shape to the vocabulary
            dense_shape=[len(ids_from_chars.get_vocabulary())])
        self.prediction_mask = tf.sparse.to_dense(sparse_mask)
        
    @tf.function
    def generate_the_next_step(self, inputs, states=None, run_eagerly=True):
        
        inputs_ids = self.ids_from_chars(tf.strings.unicode_split(inputs, 'UTF-8')).to_tensor()
        predicted_logits, states = self.model(inputs_ids, states, return_state=True)
        predicted_logits = predicted_logits[:, -1, :]
        predicted_logits = predicted_logits + self.prediction_mask
        
        #https://stackoverflow.com/questions/55063120/
        #can-anyone-give-a-tiny-example-to-explain-the-params-of-tf-random-categorical
        predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
        predicted_ids = tf.squeeze(predicted_ids, axis=-1)
        
        predicted_chars = self.chars_from_ids(predicted_ids)
        return predicted_chars, states

        

In [35]:
one_step_model = OneStep(my_model, chars_from_ids, ids_from_chars)

In [88]:
start = time.time()
states = None
next_char = tf.constant(['if I had received them for the hire'])
result = [next_char]

for n in range(1000):
    next_char, states = one_step_model.generate_the_next_step(next_char, states=states)
    #states = [state_h, state_c]
    result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)
print('\nRun time:', end - start)


if I had received them for the hire more; than broore, me to hororouse, than youredoure, than elous me thave eachie, he or thiar havith he bane bre, thilal hay the he oneste than bracar haince an ar have eacle an incle elace he on Clar he onoo hoone bracar ved ed yo be horedous:
Bince an Ks houres yourge me me we me, thilar than e wit houre, an eare elovot me we me, thilar, she tha, bre and ve me, din the me me, me, not me thave more, me, te me we bele Cle ele an ince exe me thacle thored JUSESe an inglat he bre; me, noth thilan thial have me thably thile brorest me, he brouchie hoonour imoust tha thedowour'd the than JAndothat tha, he bres he onoo; me, de thavilachive, me we me bes me, thal he blachie he bro; swes me, house, thar inche blorere moouse, than Pre, me, car we more, cle an ince an ince wesoure, me me, dorery me thar the blachied he blores thiall Je havile de an ince ewele horestoure, thap thithoouth than O that me tar he on mo ve he onesto, than Cle an ince an ince ewe, th

In [87]:
text_from_ids(input_seq)

<tf.Tensor: shape=(64,), dtype=string, numpy=
array([b"uld hide,\nAs if I had received them for the hire\nOf their breath only!\n\nMENENIUS:\nDo not stand upon'",
       b'till cupboarding the viand, never bearing\nLike labour with the rest, where the other instruments\nDid',
       b' Cominius\nWith thee awhile: determine on some course,\nMore than a wild exposture to each chance\nThat',
       b"US:\nI heard him swear,\nWere he to stand for consul, never would he\nAppear i' the market-place nor on",
       b' honour; and so, I pray, go with us.\n\nVIRGILIA:\nGive me excuse, good madam; I will obey you in every',
       b" vouch, is more than that he hath,\nBy many an ounce--he dropp'd it for his country;\nAnd what is left",
       b' may be sworn by, both divine and human,\nSeal what I end withal! This double worship,\nWhere one part',
       b"More than thy fame and envy. Fix thy foot.\n\nMARCIUS:\nLet the first budger die the other's slave,\nAnd",
       b'll\npatience; and, in roa

test: 2
