In [1]:
import tensorflow as tf

import numpy as np
import os
import time

In [2]:
tf.config.run_functions_eagerly(True)

In [3]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')


In [42]:
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
# length of text is the number of characters in it
print(f'Length of text: {len(text)} characters')


Length of text: 1115394 characters


In [43]:
vocab = set(text)

In [44]:
ids_from_chars = tf.keras.layers.StringLookup(
    vocabulary=list(vocab), mask_token=None)

In [45]:
chars_from_ids = tf.keras.layers.StringLookup(
    vocabulary=ids_from_chars.get_vocabulary(), invert=True, mask_token=None)


In [46]:
all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))
all_ids


<tf.Tensor: shape=(1115394,), dtype=int64, numpy=array([20, 53,  7, ..., 22, 29, 13], dtype=int64)>

In [47]:
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)

In [124]:
window_size = 200 # may be a hyperparam to optimize

In [133]:
sequences = ids_dataset.batch(window_size+1, drop_remainder=True)
sequences

<BatchDataset element_spec=TensorSpec(shape=(201,), dtype=tf.int64, name=None)>

In [134]:
def create_df(sequence):
    input_seq = sequence[:-1]
    label_seq = sequence[1:]
    return input_seq, label_seq
create_df("test")

('tes', 'est')

In [135]:
dataset = sequences.map(create_df)
dataset

<MapDataset element_spec=(TensorSpec(shape=(200,), dtype=tf.int64, name=None), TensorSpec(shape=(200,), dtype=tf.int64, name=None))>

In [136]:
for input_example, target_example in dataset.take(1):
    print("Input :", input_example)
    print("Target:", target_example)


Input : tf.Tensor(
[20 53  7  2 32 59 36 53 32 53  8 55 61 17 13 65 55 30 46  7 55 59 42 55
 59  5  7 46  6 55 55 35 59 40 61 60 59 30 15  7 32 18 55  7 31 59 18 55
 40  7 59 23 55 59  2  5 55 40 57 29 13 13 24 54 54 17 13 19  5 55 40 57
 31 59  2  5 55 40 57 29 13 13 20 53  7  2 32 59 36 53 32 53  8 55 61 17
 13 33 46 15 59 40  7 55 59 40 54 54 59  7 55  2 46 54 43 55 35 59  7 40
 32 18 55  7 59 32 46 59 35 53 55 59 32 18 40 61 59 32 46 59 30 40 23 53
  2 18 27 13 13 24 54 54 17 13 39 55  2 46 54 43 55 35 29 59  7 55  2 46
 54 43 55 35 29 13 13 20 53  7  2 32 59 36 53 32 53  8 55 61 17 13 20 53
  7  2 32 31 59 60 46 15], shape=(200,), dtype=int64)
Target: tf.Tensor(
[53  7  2 32 59 36 53 32 53  8 55 61 17 13 65 55 30 46  7 55 59 42 55 59
  5  7 46  6 55 55 35 59 40 61 60 59 30 15  7 32 18 55  7 31 59 18 55 40
  7 59 23 55 59  2  5 55 40 57 29 13 13 24 54 54 17 13 19  5 55 40 57 31
 59  2  5 55 40 57 29 13 13 20 53  7  2 32 59 36 53 32 53  8 55 61 17 13
 33 46 15 59 40  7 55 59 40 54 5

In [137]:
BATCH_SIZE = 64
BUFFER_SIZE = 1000 # tf.data shuffles the data in a buffer instead of the memory so we have to give the buffer size (it does so cause
# it could shuffle infinte size of data)

In [138]:
dataset = (dataset
        .shuffle(BUFFER_SIZE)
        .batch(BATCH_SIZE, drop_remainder=True)
        .prefetch(tf.data.experimental.AUTOTUNE)# overlaps the training and the preprocessing when executing step s the pipeline prepares data for step s+1
          )
dataset

<PrefetchDataset element_spec=(TensorSpec(shape=(64, 200), dtype=tf.int64, name=None), TensorSpec(shape=(64, 200), dtype=tf.int64, name=None))>

In [139]:
class mySuperModel(tf.keras.Model):
    def __init__(self, vocab_size, dim_embedding, lstm_units):
        super().__init__(self)
        
        self.embedding = tf.keras.layers.Embedding(vocab_size, dim_embedding)
        self.lstm = tf.keras.layers.LSTM(lstm_units,
                                        return_sequences=True,
                                        return_state=True)
        self.dense = tf.keras.layers.Dense(vocab_size)
    
    def call(self, inputs, states=None, return_state=False, training=False):
        x = inputs
        x = self.embedding(x,training=training)
        
        if states is None:
            states = self.lstm.get_initial_state(x)
        x, states, seq = self.lstm(x,initial_state=states, training=training)
        x = self.dense(x, training=training)
        
        if  return_state:
            return x, states
        else:
            return x
        

In [140]:
vocab_size = len(ids_from_chars.get_vocabulary())

embedding = 256

lstm_units = 1024

In [141]:
my_model = mySuperModel(vocab_size, embedding, lstm_units)

In [144]:
for input_seq, target in dataset.take(1):
    prediction = my_model(input_seq)

In [142]:
vocab_size

66

In [145]:
my_model.summary()

Model: "my_super_model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_24 (Embedding)    multiple                  16896     
                                                                 
 lstm_3 (LSTM)               multiple                  5246976   
                                                                 
 dense_3 (Dense)             multiple                  67650     
                                                                 
Total params: 5,331,522
Trainable params: 5,331,522
Non-trainable params: 0
_________________________________________________________________


In [146]:
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)
# why from_logits=True: https://datascience.stackexchange.com/
#questions/73093/what-does-from-logits-true-do-in-sparsecategoricalcrossentropy-loss-function

#https://stats.stackexchange.com/questions/326065/cross-entropy-vs-sparse-cross-entropy-when-to-use-one-over-the-other



In [147]:
example_batch_mean_loss = loss(target, prediction)
print("Prediction shape: ", prediction.shape, " # (batch_size, sequence_length, vocab_size)")
print("Mean loss:        ", example_batch_mean_loss)


Prediction shape:  (64, 200, 66)  # (batch_size, sequence_length, vocab_size)
Mean loss:         tf.Tensor(4.1899667, shape=(), dtype=float32)


In [148]:
def text_from_ids(ids):
    return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)


In [149]:
my_model.compile(optimizer='adam', loss=loss)

In [150]:
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True,
    verbose=1)


In [151]:
EPOCHS = 5

5

In [152]:
history_traning = my_model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/5
Epoch 1: saving model to ./training_checkpoints\ckpt_1
Epoch 2/5
Epoch 2: saving model to ./training_checkpoints\ckpt_2
Epoch 3/5

KeyboardInterrupt: 

In [163]:
model = mySuperModel()

NameError: name 'model' is not defined

In [30]:
my_model.save("text_generator_v1")



INFO:tensorflow:Assets written to: text_generator_v1\assets


INFO:tensorflow:Assets written to: text_generator_v1\assets


In [156]:
my_model = tf.keras.models.load_model(checkpoint_prefix)


OSError: No file or directory found at ./training_checkpoints\ckpt_{epoch}

In [29]:
skip_ids = ids_from_chars(['[UNK]'])[:, None]
skip_ids

<tf.Tensor: shape=(1, 1), dtype=int64, numpy=array([[0]], dtype=int64)>

In [121]:
class OneStep(tf.keras.Model):
    def __init__(self, model, chars_from_ids, ids_from_char):
        super().__init__()
        
        self.model = model
        self.chars_from_ids = chars_from_ids
        self.ids_from_chars = ids_from_char
        
        skip_ids = self.ids_from_chars(['[UNK]'])[:, None]
        sparse_mask = tf.SparseTensor(
            # Put a -inf at each bad index.
            values=[-float('inf')]*len(skip_ids),
            indices=skip_ids,
            # Match the shape to the vocabulary
            dense_shape=[len(ids_from_chars.get_vocabulary())])
        self.prediction_mask = tf.sparse.to_dense(sparse_mask)
    @tf.function
    def generate_the_next_step(self, inputs, states=None, run_eagerly=True):
        
        inputs_ids = self.ids_from_chars(tf.strings.unicode_split(inputs, 'UTF-8')).to_tensor()
        print(inputs_ids.numpy())
        predicted_logits, states = self.model(inputs_ids, states, return_state=True)
        print(predicted_logits.shape)
        predicted_logits = predicted_logits + self.prediction_mask

        #https://stackoverflow.com/questions/55063120/
        #can-anyone-give-a-tiny-example-to-explain-the-params-of-tf-random-categorical
        predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
        predicted_ids = tf.squeeze(predicted_ids, axis=-1)
        
        predicted_chars = self.chars_from_ids(predicted_ids)
        
        return predicted_chars, states

        

In [122]:
one_step_model = OneStep(my_model, chars_from_ids, ids_from_chars)

In [123]:
start = time.time()
states = None
next_char = tf.constant(['ROMEO:'])
result = [next_char]

for n in range(1000):
    print(next_char)
    next_char, states = one_step_model.generate_the_next_step(next_char, states=states)
    result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)
print('\nRun time:', end - start)


tf.Tensor([b'ROMEO:'], shape=(1,), dtype=string)
[[39 48 12  9 48 17]]


ValueError: Exception encountered when calling layer "my_super_model" "                 f"(type mySuperModel).

Could not find matching concrete function to call loaded from the SavedModel. Got:
  Positional arguments (4 total):
    * <tf.Tensor 'inputs:0' shape=(1, 6) dtype=int64>
    * None
    * True
    * False
  Keyword arguments: {}

 Expected these arguments to match one of the following 4 option(s):

Option 1:
  Positional arguments (4 total):
    * TensorSpec(shape=(None, 50), dtype=tf.int64, name='inputs')
    * None
    * False
    * False
  Keyword arguments: {}

Option 2:
  Positional arguments (4 total):
    * TensorSpec(shape=(None, 50), dtype=tf.int64, name='inputs')
    * None
    * False
    * True
  Keyword arguments: {}

Option 3:
  Positional arguments (4 total):
    * TensorSpec(shape=(None, 50), dtype=tf.int64, name='input_1')
    * None
    * False
    * False
  Keyword arguments: {}

Option 4:
  Positional arguments (4 total):
    * TensorSpec(shape=(None, 50), dtype=tf.int64, name='input_1')
    * None
    * False
    * True
  Keyword arguments: {}

Call arguments received by layer "my_super_model" "                 f"(type mySuperModel):
  • args=('tf.Tensor(shape=(1, 6), dtype=int64)', 'None')
  • kwargs={'return_state': 'True', 'training': 'False'}

In [105]:
#generer le tensor de la couche embedding pour voir l'entree de notre reseau

embedding_layer = tf.keras.layers.Embedding(vocab_size, 200)
embedding_layer(dummy_input)
loss(my_model.predict(dummy_input), dummy_target)



ValueError: `labels.shape` must equal `logits.shape` except for the last dimension. Received: labels.shape=(211200,) and logits.shape=(64, 50)

In [107]:
print(dummy_target.shape)
my_model.predict(dummy_input).shape

(64, 50)


(64, 50, 66)

In [169]:
sampled_indices = tf.random.categorical(model_2.predict(dummy_input)[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices, axis=-1).numpy()


print("Input:\n", text_from_ids(dummy_input[0]).numpy())
print()
print("Next Char Predictions:\n", text_from_ids(sampled_indices).numpy())




Input:
 b"e\nWill then cry out of Marcius 'O if he\nHad borne "

Next Char Predictions:
 b"r:gYzdCPBcA!R?P'py'.SAEy lixMPfKZrquid Qw Fx'JYKjU"


In [170]:
my_model.load_weights(checkpoint_dir+"/ckpt_10")

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x2709f654280>

In [168]:
model_2 = mySuperModel(vocab_size, embedding, lstm_units)

In [68]:
dummy_input, dummy_target = list(dataset.as_numpy_iterator())[0]