<img src="../Pics/MLSb-T.png" width="160">
<br><br>
<center><u><H1>Text Generation</H1></u></center>

In [None]:
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.log_device_placement = True
sess = tf.Session(config=config)
set_session(sess)

In [None]:
import numpy as np
from keras.models import Sequential, load_model
from keras.layers import Dense, Embedding, CuDNNLSTM, Dropout
from keras.utils import to_categorical
from random import randint

In [None]:
file = open('../data/Ancient_Modern_Physics.txt','r')
text = file.read()
file.close()
text[:1000]

## Cleaning the text

In [None]:
tokens = text.lower()
print(tokens[:500])

In [None]:
n_chars = len(tokens)
unique_vocab = len(set(tokens))
print('Total Tokens: %d' % n_chars)
print('Unique Tokens: %d' % unique_vocab)

In [None]:
characters = sorted(list(set(tokens)))
n_vocab = len(characters)
n_vocab

In [None]:
int_to_char = {n:char for n, char in enumerate(characters)}
char_to_int = {char:n for n, char in enumerate(characters)}

## Creating datasets:

In [None]:
X = []
y = []
seq_length = 100

for i in range(0, n_chars - seq_length, 1):
    seq_in = tokens[i:i + seq_length]
    seq_out = tokens[i + seq_length]
    X.append([char_to_int[char] for char in seq_in])
    y.append(char_to_int[seq_out])

In [None]:
print(X[0])

In [None]:
print(y[0])

In [None]:
X_new = np.reshape(X, (len(X), seq_length, 1)) #samples, time steps, features
X_new = X_new / float(n_vocab) #normalizing the values

y_new = to_categorical(y) #one hot encode

In [None]:
print("X_new shape:", X_new.shape)
print("y_new shape:", y_new.shape)

In [None]:
y_new[0]

## Creating the model:

In [None]:
model = Sequential()
model.add(CuDNNLSTM(700, input_shape=(X_new.shape[1], X_new.shape[2]), return_sequences=True))
model.add(Dropout(0.2))
model.add(CuDNNLSTM(700, return_sequences=True))
model.add(Dropout(0.2)) 
model.add(CuDNNLSTM(700))
model.add(Dropout(0.2))
model.add(Dense(y_new.shape[1], activation='softmax'))

In [None]:
model.summary()

In [None]:
model.compile(loss='categorical_crossentropy', optimizer='adam')

## Training the model:

In [None]:
%%time
model.fit(X_new, y_new, batch_size=64, epochs=5)

## Save the model:

In [None]:
model.save('../data/text_generation/text_generation_model.h5')

## Load Model:

In [None]:
model_ = load_model('../data/text_generation/text_generation_model.h5')

## Testing a random sample:

In [None]:
ini = np.random.randint(0, len(X)-1)
token_string = X[ini]

In [None]:
complete_string = [int_to_char[value] for value in token_string]

print ("\"", ''.join(complete_string), "\"")

In [None]:
for i in range(500):
    x = np.reshape(token_string, (1, len(token_string), 1))
    x = x / float(n_vocab)
    
    prediction = model_.predict(x, verbose=0)

    id_pred = np.argmax(prediction)
    seq_in = [int_to_char[value] for value in token_string]
    
    complete_string.append(int_to_char[id_pred])
    
    token_string.append(id_pred)
    token_string = token_string[1:len(token_string)] 

## Showing the generated text:

In [None]:
text = ""
for char in complete_string:
    text = text + char
print(text)