<a href="https://colab.research.google.com/github/UpasanaGhosh/NLP-Text-Generation-using-Deep-Learning-Models/blob/master/Character_Level_text_generation_using_LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Setting up
from tensorflow import keras
from tensorflow.keras import layers

import numpy as np
import random
import io

In [None]:
# Preparing the data
path = keras.utils.get_file(
    "nietzsche.txt", origin="https://s3.amazonaws.com/text-datasets/nietzsche.txt"
)
with io.open(path, encoding="utf-8") as f:
    text = f.read().lower()
text = text.replace("\n", " ")  # We remove newlines chars for nicer display
print("Corpus length:", len(text))

chars = sorted(list(set(text)))
print("Total chars:", len(chars))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))
#print(char_indices)
#print(indices_char)

# cut the text in semi-redundant sequences of maxlen characters
maxlen = 40
step = 3
sentences = []
next_chars = []
for i in range(0, len(text) - maxlen, step):
    sentences.append(text[i : i + maxlen])
    next_chars.append(text[i + maxlen])
print("Number of sequences:", len(sentences))

x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool)
y = np.zeros((len(sentences), len(chars)), dtype=np.bool)
for i, sentence in enumerate(sentences):
    for t, char in enumerate(sentence):
        x[i, t, char_indices[char]] = 1
    y[i, char_indices[next_chars[i]]] = 1

Downloading data from https://s3.amazonaws.com/text-datasets/nietzsche.txt
Corpus length: 600893
Total chars: 56
Number of sequences: 200285


In [None]:
model = keras.Sequential(
    [
        keras.Input(shape=(maxlen, len(chars))),
        layers.LSTM(128),
        layers.Dense(len(chars), activation="softmax"),
    ]
)
optimizer = keras.optimizers.RMSprop(learning_rate=0.01)
model.compile(loss="categorical_crossentropy", optimizer=optimizer)

In [None]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype("float64")
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

In [None]:
epochs = 40
batch_size = 128

for epoch in range(epochs):
    model.fit(x, y, batch_size=batch_size, epochs=1)
    print()
    print("Generating text after epoch: %d" % epoch)

    start_index = random.randint(0, len(text) - maxlen - 1)
    for diversity in [0.2, 0.5, 1.0, 1.2]:
        print("...Diversity:", diversity)

        generated = ""
        sentence = text[start_index : start_index + maxlen]
        print('...Generating with seed: "' + sentence + '"')

        for i in range(400):
            x_pred = np.zeros((1, maxlen, len(chars)))
            for t, char in enumerate(sentence):
                x_pred[0, t, char_indices[char]] = 1.0
            preds = model.predict(x_pred, verbose=0)[0]
            next_index = sample(preds, diversity)
            next_char = indices_char[next_index]
            sentence = sentence[1:] + next_char
            generated += next_char

        print("...Generated: ", generated)
        print()


Generating text after epoch: 0
...Diversity: 0.2
...Generating with seed: "ree spirits who are their heralds and fo"
...Generated:  r the such as the such as the sense and the such as the sense of the sense of the morality and as the sense of the such and the desire the such as the sense of the world and the sense of such as a present of the werpent the such as the world and the such and the still of the sense of the morality of the sense of and the such as the sense of the morality and has the sense of the present the such as

...Diversity: 0.5
...Generating with seed: "ree spirits who are their heralds and fo"
...Generated:  r its deement, the does the reason the unconsciritiom of the something of exten and the prove serfelte the fear present and interpresed are prosents of the does do state of strunged the darneds and alletther as were the man" the relief the prosed the self-one conscient but the have to his consequent and the his called in the spreasis in the strangely for the str

  after removing the cwd from sys.path.


...Generated:   is the sense of the self-desting to the self-desting to the most same state of the self-desting in the self-desting and a man of the most a man and a does not a man and a more a strength of the strength of the self-desting for the self-desire the mankind in the self-desire the self-destince of the most all the such a soul and the philosophical desire the interpreted the state of the sense of the 

...Diversity: 0.5
...Generating with seed: "s aims thousands of years ahead; so that"
...Generated:   it is all in its worthy and the command into the distinction of the following of this mankind be the master of the head of point, the makes the judged of the same will thegers and community of the miscortation of the made the innocence of the soul of such a thing in which now command and pridection and strenction and a soul of the origin the intention to the fact that it is all themselves, and of

...Diversity: 1.0
...Generating with seed: "s aims thousands of years ahead; so 