# **RNR for Baudelaire poem immitation**

In [1]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)


%pip install -q -U tensorflow-addons
%pip install -q -U transformers

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

# TensorFlow ≥2.0 is required
import tensorflow as tf
from tensorflow import keras
assert tf.__version__ >= "2.0"

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)
tf.random.set_seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m611.8/611.8 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.1/9.1 MB[0m [31m29.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
np.random.seed(42)
tf.random.set_seed(42)

n_steps = 5
dataset = tf.data.Dataset.from_tensor_slices(tf.range(15))
dataset = dataset.window(n_steps, shift=2, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(n_steps))
dataset = dataset.shuffle(10).map(lambda window: (window[:-1], window[1:]))
dataset = dataset.batch(3).prefetch(1)
for index, (X_batch, Y_batch) in enumerate(dataset):
    print("_" * 20, "Batch", index, "\nX_batch")
    print(X_batch.numpy())
    print("=" * 5, "\nY_batch")
    print(Y_batch.numpy())

____________________ Batch 0 
X_batch
[[6 7 8 9]
 [2 3 4 5]
 [4 5 6 7]]
===== 
Y_batch
[[ 7  8  9 10]
 [ 3  4  5  6]
 [ 5  6  7  8]]
____________________ Batch 1 
X_batch
[[ 0  1  2  3]
 [ 8  9 10 11]
 [10 11 12 13]]
===== 
Y_batch
[[ 1  2  3  4]
 [ 9 10 11 12]
 [11 12 13 14]]


**Data Importation**

In [None]:
with open('Les_fleurs_du_mal_Baudelaire.txt', 'r', encoding='utf-8') as fichier:
    # Lire le contenu du fichier
    Baudelaire_text = fichier.read()

In [None]:
print(Baudelaire_text[:148])

La sottise, l’erreur, le péché, la lésine,
Occupent nos esprits et travaillent nos corps,
Et nous alimentons nos aimables remords,
Comme les mendian


In [None]:
"".join(sorted(set(Baudelaire_text.lower())))

'\n !(),-.:;?abcdefghijklmnopqrstuvwxyz«»àâæçèéêëîïôùûüœ—’…'

**We encode each character**

In [None]:
tokenizer = keras.preprocessing.text.Tokenizer(char_level=True)
tokenizer.fit_on_texts(Baudelaire_text)

In [None]:
test = tokenizer.texts_to_sequences(["Abdelrhman El Masry"])
test

[[4, 22, 12, 2, 10, 5, 24, 13, 4, 6, 1, 2, 10, 1, 13, 4, 3, 5, 33]]

In [None]:
tokenizer.sequences_to_texts(test)

['a b d e l r h m a n   e l   m a s r y']

In [None]:
max_id = len(tokenizer.word_index) # number of distinct characters
dataset_size = tokenizer.document_count # total number of characters

We will now try to find the next character of each sequences of 100 characters

In [None]:
[encoded] = np.array(tokenizer.texts_to_sequences([Baudelaire_text])) - 1
train_size = dataset_size * 90 // 100
dataset = tf.data.Dataset.from_tensor_slices(encoded[:train_size])
test_data = tf.data.Dataset.from_tensor_slices(encoded[train_size:])

In [None]:
n_steps = 100
window_length = n_steps + 1 # target = input shifted 1 character ahead
dataset = dataset.window(window_length, shift=1, drop_remainder=True)
test_data = test_data.window(window_length, shift=1, drop_remainder=True)

In [None]:
dataset = dataset.flat_map(lambda window: window.batch(window_length))
test_data = test_data.flat_map(lambda window: window.batch(window_length))

In [None]:
np.random.seed(42)
tf.random.set_seed(42)

In [None]:
batch_size = 32
dataset = dataset.shuffle(100000).batch(batch_size)
test_data = test_data.shuffle(100000).batch(batch_size)
dataset = dataset.map(lambda windows: (windows[:, :-1], windows[:, 1:]))
test_data = test_data.map(lambda windows: (windows[:, :-1], windows[:, 1:]))

In [None]:
dataset = dataset.map(
    lambda X_batch, Y_batch: (tf.one_hot(X_batch, depth=max_id), Y_batch))

test_data = test_data.map(
    lambda X_batch, Y_batch: (tf.one_hot(X_batch, depth=max_id), Y_batch))

In [None]:
dataset = dataset.prefetch(1)
test_data = test_data.prefetch(1)

In [None]:
for X_batch, Y_batch in dataset.take(1):
    print(X_batch.shape, Y_batch.shape)

(32, 100, 57) (32, 100)


# **Model training**

In [None]:
model = keras.models.Sequential([
    keras.layers.GRU(128, return_sequences=True, input_shape=[None, max_id],
                     #dropout=0.2, recurrent_dropout=0.2),
                     dropout=0.2),
    keras.layers.GRU(128, return_sequences=True,
                     #dropout=0.2, recurrent_dropout=0.2),
                     dropout=0.2),
    keras.layers.TimeDistributed(keras.layers.Dense(max_id,
                                                    activation="softmax"))
])
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam",metrics=["accuracy"])
history = model.fit(dataset, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


# **Test of the model**

In [None]:
loss, accuracy = model.evaluate(test_data)



In [None]:
def preprocess(texts):
    X = np.array(tokenizer.texts_to_sequences(texts)) - 1
    return tf.one_hot(X, max_id)

In [None]:

X_new = preprocess(["fleu"])
#Y_pred = model.predict_classes(X_new)
Y_pred = np.argmax(model(X_new), axis=-1)
tokenizer.sequences_to_texts(Y_pred + 1)[0][-1] # 1st sentence, last char

'r'

In [None]:
import numpy as np
tf.random.set_seed(42)

tf.random.categorical([[np.log(0.5), np.log(0.4), np.log(0.1)]], num_samples=40).numpy()

array([[0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0,
        2, 0, 0, 1, 1, 1, 0, 0, 1, 2, 0, 0, 1, 1, 0, 0, 0, 0]])

# **Poem creation using our model**

In [None]:
def next_char(text, temperature=1):
    X_new = preprocess([text])
    y_proba = model(X_new)[0, -1:, :]
    rescaled_logits = tf.math.log(y_proba) / temperature
    char_id = tf.random.categorical(rescaled_logits, num_samples=1) + 1
    return tokenizer.sequences_to_texts(char_id.numpy())[0]

In [None]:
tf.random.set_seed(42)

next_char("étoil", temperature=0.2)

'e'

In [None]:

def complete_text(text, n_chars=400, temperature=1):
    for _ in range(n_chars):
        text += next_char(text, temperature)
    return text

In [None]:
tf.random.set_seed(42)

print(complete_text("Les fleurs", temperature=0.2))

Les fleurs et des soleils marches,
des chats pleins de larmes,
et les parfums de la paresse et de la main de l’antique fleurir de l’antique souvent,
et des cœurs mortels et de sourir le cour de la carieuse,
et les cours et des fleurs et des fleurs et des fleurs,
et les charmes sont pleins de la vie et de la vie et de la chair ;
le soleil de la vie et le courage et la bouche infini de la carieuse,
au fond d’


In [None]:
print(complete_text("Les fleurs", temperature=0.5))

Les fleurs en la forme de ton âme se mire ;
et de voir moi, même pour l’adire,
la fond de la volupté de la douleur au siaistre,
je veux par l’air et les pieds d’amour de l’azur et de la race de ces vines
d’un grand passion et l’air de mon cœur sont les ailes riches ;
ainsi qu’une foute du chat on ne secret.
c’est un cammenu par le navire
dans les palmes,
et par le flot de la taille parfumé d’un rayon sonfer


In [None]:
print(complete_text("Les fleurs", temperature=0.7))

Les fleurs, ces bras soir, ton cour sombre et l’azur, marguerait l’œil clair immonde.
je veux te regard minci notre hater
et divent qu’en ma vent
dont l’odeur de la couleur
de dans le ciel, où la pason soit des rêves
de penserr tu de l’hémal
de sa chair léconde où la satité de pitié !
je veux par ses pitiés froisses d’ordre,
et les désarts de noir le rémons)
ainsi pourtur et la dévoté de conterses,
ta tête 
