In [1]:
import tensorflow as tf

import numpy as np
import os
import time

In [2]:
tf.config.run_functions_eagerly(True)

In [3]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')


In [4]:
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
# length of text is the number of characters in it
print(f'Length of text: {len(text)} characters')


Length of text: 1115394 characters


In [5]:
vocab = set(text)

In [6]:
ids_from_chars = tf.keras.layers.StringLookup(
    vocabulary=list(vocab), mask_token=None)

In [7]:
chars_from_ids = tf.keras.layers.StringLookup(
    vocabulary=ids_from_chars.get_vocabulary(), invert=True, mask_token=None)


In [8]:
all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))
all_ids


<tf.Tensor: shape=(1115394,), dtype=int64, numpy=array([20, 53,  7, ..., 22, 29, 13], dtype=int64)>

In [9]:
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)

In [10]:
window_size = 50 # may be a hyperparam to optimize

In [11]:
sequences = ids_dataset.batch(window_size+1, drop_remainder=True)
sequences

<BatchDataset element_spec=TensorSpec(shape=(51,), dtype=tf.int64, name=None)>

In [12]:
def create_df(sequence):
    input_seq = sequence[:-1]
    label_seq = sequence[1:]
    return input_seq, label_seq
create_df("test")

('tes', 'est')

In [13]:
dataset = sequences.map(create_df)
dataset



<MapDataset element_spec=(TensorSpec(shape=(50,), dtype=tf.int64, name=None), TensorSpec(shape=(50,), dtype=tf.int64, name=None))>

In [14]:
for input_example, target_example in dataset.take(1):
    print("Input :", input_example)
    print("Target:", target_example)


Input : tf.Tensor(
[20 53  7  2 32 59 36 53 32 53  8 55 61 17 13 65 55 30 46  7 55 59 42 55
 59  5  7 46  6 55 55 35 59 40 61 60 59 30 15  7 32 18 55  7 31 59 18 55
 40  7], shape=(50,), dtype=int64)
Target: tf.Tensor(
[53  7  2 32 59 36 53 32 53  8 55 61 17 13 65 55 30 46  7 55 59 42 55 59
  5  7 46  6 55 55 35 59 40 61 60 59 30 15  7 32 18 55  7 31 59 18 55 40
  7 59], shape=(50,), dtype=int64)


In [15]:
BATCH_SIZE = 64
BUFFER_SIZE = 1000 # tf.data shuffles the data in a buffer instead of the memory so we have to give the buffer size (it does so cause
# it could shuffle infinte size of data)

In [16]:
dataset = (dataset
        .shuffle(BUFFER_SIZE)
        .batch(BATCH_SIZE, drop_remainder=True)
        .prefetch(tf.data.experimental.AUTOTUNE)# overlaps the training and the preprocessing when executing step s the pipeline prepares data for step s+1
          )
dataset

<PrefetchDataset element_spec=(TensorSpec(shape=(64, 50), dtype=tf.int64, name=None), TensorSpec(shape=(64, 50), dtype=tf.int64, name=None))>

In [17]:
class mySuperModel(tf.keras.Model):
    def __init__(self, vocab_size, dim_embedding, lstm_units):
        super().__init__(self)
        
        self.embedding = tf.keras.layers.Embedding(vocab_size, dim_embedding)
        self.lstm = tf.keras.layers.LSTM(lstm_units,
                                        return_sequences=True,
                                        return_state=True)
        self.dense = tf.keras.layers.Dense(vocab_size)
    
    def call(self, inputs, states=None, return_state=False, training=False):
        x = inputs
        x = self.embedding(x,training=training)
        
        if states is None:
            states = self.lstm.get_initial_state(x)
        x, states, seq = self.lstm(x,initial_state=states, training=training)
        x = self.dense(x, training=training)
        
        if  return_state:
            return x, states
        else:
            return x
        

In [18]:
vocab_size = len(ids_from_chars.get_vocabulary())

embedding = 256

lstm_units = 1024

In [19]:
my_model = mySuperModel(vocab_size, embedding, lstm_units)

In [20]:
for input_seq, target in dataset.take(1):
    prediction = my_model(input_seq)

In [21]:
my_model.summary()

Model: "my_super_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       multiple                  16896     
                                                                 
 lstm (LSTM)                 multiple                  5246976   
                                                                 
 dense (Dense)               multiple                  67650     
                                                                 
Total params: 5,331,522
Trainable params: 5,331,522
Non-trainable params: 0
_________________________________________________________________


In [22]:
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)
# why from_logits=True: https://datascience.stackexchange.com/
#questions/73093/what-does-from-logits-true-do-in-sparsecategoricalcrossentropy-loss-function

#https://stats.stackexchange.com/questions/326065/cross-entropy-vs-sparse-cross-entropy-when-to-use-one-over-the-other



In [23]:
example_batch_mean_loss = loss(target, prediction)
print("Prediction shape: ", prediction.shape, " # (batch_size, sequence_length, vocab_size)")
print("Mean loss:        ", example_batch_mean_loss)


Prediction shape:  (64, 50, 66)  # (batch_size, sequence_length, vocab_size)
Mean loss:         tf.Tensor(4.189948, shape=(), dtype=float32)


In [24]:
def text_from_ids(ids):
    return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)


In [25]:
my_model.compile(optimizer='adam', loss=loss)

In [26]:
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True,
    verbose=1)


In [27]:
EPOCHS = 20

In [28]:
history = my_model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/20
Epoch 1: saving model to ./training_checkpoints\ckpt_1
Epoch 2/20
Epoch 2: saving model to ./training_checkpoints\ckpt_2
Epoch 3/20
Epoch 3: saving model to ./training_checkpoints\ckpt_3
Epoch 4/20
Epoch 4: saving model to ./training_checkpoints\ckpt_4
Epoch 5/20
Epoch 5: saving model to ./training_checkpoints\ckpt_5
Epoch 6/20
Epoch 6: saving model to ./training_checkpoints\ckpt_6
Epoch 7/20
Epoch 7: saving model to ./training_checkpoints\ckpt_7
Epoch 8/20
Epoch 8: saving model to ./training_checkpoints\ckpt_8
Epoch 9/20
Epoch 9: saving model to ./training_checkpoints\ckpt_9
Epoch 10/20
Epoch 10: saving model to ./training_checkpoints\ckpt_10
Epoch 11/20
Epoch 11: saving model to ./training_checkpoints\ckpt_11
Epoch 12/20
Epoch 12: saving model to ./training_checkpoints\ckpt_12
Epoch 13/20
Epoch 13: saving model to ./training_checkpoints\ckpt_13
Epoch 14/20
Epoch 14: saving model to ./training_checkpoints\ckpt_14
Epoch 15/20
Epoch 15: saving model to ./training_checkpoints\

In [30]:
my_model.save("text_generator_v1")



INFO:tensorflow:Assets written to: text_generator_v1\assets


INFO:tensorflow:Assets written to: text_generator_v1\assets


In [28]:
my_model = mySuperModel(vocab_size, embedding, lstm_units)
my_model = tf.keras.models.load_model("text_generator_v1")


In [29]:
skip_ids = ids_from_chars(['[UNK]'])[:, None]
skip_ids

<tf.Tensor: shape=(1, 1), dtype=int64, numpy=array([[0]], dtype=int64)>

In [30]:
class OneStep(tf.keras.Model):
    def __init__(self, model, chars_from_ids, ids_from_char):
        super().__init__()
        
        self.model = model
        self.chars_from_ids = chars_from_ids
        self.ids_from_chars = ids_from_char
        
        skip_ids = self.ids_from_chars(['[UNK]'])[:, None]
        sparse_mask = tf.SparseTensor(
            # Put a -inf at each bad index.
            values=[-float('inf')]*len(skip_ids),
            indices=skip_ids,
            # Match the shape to the vocabulary
            dense_shape=[len(ids_from_chars.get_vocabulary())])
        self.prediction_mask = tf.sparse.to_dense(sparse_mask)
    @tf.function
    def generate_the_next_step(self, inputs, states=None, run_eagerly=True):
        
        inputs_ids = self.ids_from_chars(tf.strings.unicode_split(inputs, 'UTF-8')).to_tensor()
        print(inputs_ids.numpy())
        predicted_logits, states = self.model(inputs_ids, states, return_state=True)
        print(predicted_logits.shape)
        predicted_logits = predicted_logits + self.prediction_mask

        #https://stackoverflow.com/questions/55063120/
        #can-anyone-give-a-tiny-example-to-explain-the-params-of-tf-random-categorical
        predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
        predicted_ids = tf.squeeze(predicted_ids, axis=-1)
        
        predicted_chars = self.chars_from_ids(predicted_ids)
        
        return predicted_chars, states

        

In [31]:
one_step_model = OneStep(my_model, chars_from_ids, ids_from_chars)

In [34]:
start = time.time()
states = None
next_char = tf.constant(['ROMEO:'])
result = [next_char]

for n in range(1000):
    print(next_char)
    next_char, states = one_step_model.generate_the_next_step(next_char, states=states)
    result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)
print('\nRun time:', end - start)


tf.Tensor([b'ROMEO:'], shape=(1,), dtype=string)
[[39 48 12  9 48 17]]


ValueError: Exception encountered when calling layer "my_super_model" "                 f"(type mySuperModel).

Could not find matching concrete function to call loaded from the SavedModel. Got:
  Positional arguments (4 total):
    * <tf.Tensor 'inputs:0' shape=(1, 6) dtype=int64>
    * None
    * True
    * False
  Keyword arguments: {}

 Expected these arguments to match one of the following 4 option(s):

Option 1:
  Positional arguments (4 total):
    * TensorSpec(shape=(None, 50), dtype=tf.int64, name='inputs')
    * None
    * False
    * False
  Keyword arguments: {}

Option 2:
  Positional arguments (4 total):
    * TensorSpec(shape=(None, 50), dtype=tf.int64, name='inputs')
    * None
    * False
    * True
  Keyword arguments: {}

Option 3:
  Positional arguments (4 total):
    * TensorSpec(shape=(None, 50), dtype=tf.int64, name='input_1')
    * None
    * False
    * False
  Keyword arguments: {}

Option 4:
  Positional arguments (4 total):
    * TensorSpec(shape=(None, 50), dtype=tf.int64, name='input_1')
    * None
    * False
    * True
  Keyword arguments: {}

Call arguments received by layer "my_super_model" "                 f"(type mySuperModel):
  • args=('tf.Tensor(shape=(1, 6), dtype=int64)', 'None')
  • kwargs={'return_state': 'True', 'training': 'False'}

In [113]:
#generer le tensor de la couche embedding pour voir l'entree de notre reseau

In [None]:
! pip install tf-nightly 2.1.0.dev20191230
