### Importing The Required Libraries:

In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
import pickle
import numpy as np
import os

In [3]:
#  Dataset: http://www.gutenberg.org/cache/epub/5200/pg5200.txt

file = open("metamorphosis_clean.txt", "r", encoding = "utf8")
lines = []

for i in file:
    lines.append(i)
    
print("The First Line: ", lines[0])
print("The Last Line: ", lines[-1])

The First Line:  ﻿One morning, when Gregor Samsa woke from troubled dreams, he found

The Last Line:  first to get up and stretch out her young body.


### Cleaning the data:

In [4]:
data = ""

for i in lines:
    data = ' '. join(lines)
    
data = data.replace('\n', '').replace('\r', '').replace('\ufeff', '')
data[:360]

'One morning, when Gregor Samsa woke from troubled dreams, he found himself transformed in his bed into a horrible vermin.  He lay on his armour-like back, and if he lifted his head a little he could see his brown belly, slightly domed and divided by arches into stiff sections.  The bedding was hardly able to cover it and seemed ready to slide off any moment.'

In [5]:
import string

translator = str.maketrans(string.punctuation, ' '*len(string.punctuation)) #map punctuation to space
new_data = data.translate(translator)

new_data[:500]

'One morning  when Gregor Samsa woke from troubled dreams  he found himself transformed in his bed into a horrible vermin   He lay on his armour like back  and if he lifted his head a little he could see his brown belly  slightly domed and divided by arches into stiff sections   The bedding was hardly able to cover it and seemed ready to slide off any moment   His many legs  pitifully thin compared with the size of the rest of him  waved about helplessly as he looked    What s happened to me   he'

In [6]:
z = []

for i in data.split():
    if i not in z:
        z.append(i)
        
data = ' '.join(z)
data[:500]

'One morning, when Gregor Samsa woke from troubled dreams, he found himself transformed in his bed into a horrible vermin. He lay on armour-like back, and if lifted head little could see brown belly, slightly domed divided by arches stiff sections. The bedding was hardly able to cover it seemed ready slide off any moment. His many legs, pitifully thin compared with the size of rest him, waved about helplessly as looked. "What\'s happened me?" thought. It wasn\'t dream. room, proper human room altho'

### Tokenization:

In [7]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts([data])

# saving the tokenizer for predict function.
pickle.dump(tokenizer, open('tokenizer1.pkl', 'wb'))

sequence_data = tokenizer.texts_to_sequences([data])[0]
sequence_data[:10]

[17, 53, 293, 2, 18, 729, 135, 730, 294, 8]

In [8]:
vocab_size = len(tokenizer.word_index) + 1
print(vocab_size)

2617


In [9]:
sequences = []

for i in range(1, len(sequence_data)):
    words = sequence_data[i-1:i+1]
    sequences.append(words)
    
print("The Length of sequences are: ", len(sequences))
sequences = np.array(sequences)
sequences[:10]

The Length of sequences are:  3889


array([[ 17,  53],
       [ 53, 293],
       [293,   2],
       [  2,  18],
       [ 18, 729],
       [729, 135],
       [135, 730],
       [730, 294],
       [294,   8],
       [  8, 731]])

In [10]:
X = []
y = []

for i in sequences:
    X.append(i[0])
    y.append(i[1])
    
X = np.array(X)
y = np.array(y)

In [11]:
print("The Data is: ", X[:5])
print("The responses are: ", y[:5])

The Data is:  [ 17  53 293   2  18]
The responses are:  [ 53 293   2  18 729]


In [12]:
y = to_categorical(y, num_classes=vocab_size)
y[:5]

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

### Creating the Model:

In [13]:
model = Sequential()
model.add(Embedding(vocab_size, 10, input_length=1))
model.add(LSTM(1000, return_sequences=True))
model.add(LSTM(1000))
model.add(Dense(1000, activation="relu"))
model.add(Dense(vocab_size, activation="softmax"))

In [14]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (None, 1, 10)             26170     
_________________________________________________________________
lstm (LSTM)                  (None, 1, 1000)           4044000   
_________________________________________________________________
lstm_1 (LSTM)                (None, 1000)              8004000   
_________________________________________________________________
dense (Dense)                (None, 1000)              1001000   
_________________________________________________________________
dense_1 (Dense)              (None, 2617)              2619617   
Total params: 15,694,787
Trainable params: 15,694,787
Non-trainable params: 0
_________________________________________________________________


### Plot The Model:

In [26]:
from tensorflow import keras
from tensorflow.keras.utils import plot_model

keras.utils.plot_model(model, to_file='model.png', show_layer_names=True)

('You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) ', 'for plot_model/model_to_dot to work.')


### Callbacks:

In [19]:
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.callbacks import TensorBoard

checkpoint = ModelCheckpoint("nextword1.h5", monitor='loss', verbose=1,
    save_best_only=True, mode='auto')

### Compile The Model:

In [21]:
model.compile(loss="categorical_crossentropy", optimizer=Adam(lr=0.001), metrics=['accuracy'])

### Fit The Model:

In [22]:
model.fit(X, y, epochs=150, batch_size=64, callbacks=[checkpoint])

Epoch 1/150

Epoch 00001: loss improved from inf to 7.87547, saving model to nextword1.h5
Epoch 2/150

Epoch 00002: loss improved from 7.87547 to 7.86013, saving model to nextword1.h5
Epoch 3/150

Epoch 00003: loss improved from 7.86013 to 7.80642, saving model to nextword1.h5
Epoch 4/150

Epoch 00004: loss improved from 7.80642 to 7.61363, saving model to nextword1.h5
Epoch 5/150

Epoch 00005: loss improved from 7.61363 to 7.41077, saving model to nextword1.h5
Epoch 6/150

Epoch 00006: loss improved from 7.41077 to 7.24344, saving model to nextword1.h5
Epoch 7/150

Epoch 00007: loss improved from 7.24344 to 7.12998, saving model to nextword1.h5
Epoch 8/150

Epoch 00008: loss improved from 7.12998 to 7.03157, saving model to nextword1.h5
Epoch 9/150

Epoch 00009: loss improved from 7.03157 to 6.89656, saving model to nextword1.h5
Epoch 10/150

Epoch 00010: loss improved from 6.89656 to 6.70195, saving model to nextword1.h5
Epoch 11/150

Epoch 00011: loss improved from 6.70195 to 6.4603

Epoch 46/150

Epoch 00046: loss improved from 3.53920 to 3.45961, saving model to nextword1.h5
Epoch 47/150

Epoch 00047: loss improved from 3.45961 to 3.39924, saving model to nextword1.h5
Epoch 48/150

Epoch 00048: loss improved from 3.39924 to 3.33659, saving model to nextword1.h5
Epoch 49/150

Epoch 00049: loss improved from 3.33659 to 3.25609, saving model to nextword1.h5
Epoch 50/150

Epoch 00050: loss improved from 3.25609 to 3.20324, saving model to nextword1.h5
Epoch 51/150

Epoch 00051: loss improved from 3.20324 to 3.11143, saving model to nextword1.h5
Epoch 52/150

Epoch 00052: loss improved from 3.11143 to 3.04128, saving model to nextword1.h5
Epoch 53/150

Epoch 00053: loss improved from 3.04128 to 2.95271, saving model to nextword1.h5
Epoch 54/150

Epoch 00054: loss improved from 2.95271 to 2.88410, saving model to nextword1.h5
Epoch 55/150

Epoch 00055: loss improved from 2.88410 to 2.82079, saving model to nextword1.h5
Epoch 56/150

Epoch 00056: loss improved from 2.82

Epoch 91/150

Epoch 00091: loss improved from 1.46567 to 1.43937, saving model to nextword1.h5
Epoch 92/150

Epoch 00092: loss improved from 1.43937 to 1.42806, saving model to nextword1.h5
Epoch 93/150

Epoch 00093: loss improved from 1.42806 to 1.40338, saving model to nextword1.h5
Epoch 94/150

Epoch 00094: loss improved from 1.40338 to 1.37579, saving model to nextword1.h5
Epoch 95/150

Epoch 00095: loss improved from 1.37579 to 1.36530, saving model to nextword1.h5
Epoch 96/150

Epoch 00096: loss improved from 1.36530 to 1.35108, saving model to nextword1.h5
Epoch 97/150

Epoch 00097: loss improved from 1.35108 to 1.34122, saving model to nextword1.h5
Epoch 98/150

Epoch 00098: loss improved from 1.34122 to 1.30447, saving model to nextword1.h5
Epoch 99/150

Epoch 00099: loss improved from 1.30447 to 1.27918, saving model to nextword1.h5
Epoch 100/150

Epoch 00100: loss improved from 1.27918 to 1.22632, saving model to nextword1.h5
Epoch 101/150

Epoch 00101: loss did not improve 

<tensorflow.python.keras.callbacks.History at 0x1d136e14d00>

### Prediction


In [24]:
# Importing the Libraries

from tensorflow.keras.models import load_model
import numpy as np
import pickle

# Load the model and tokenizer

model = load_model('nextword1.h5')
tokenizer = pickle.load(open('tokenizer1.pkl', 'rb'))

def Predict_Next_Words(model, tokenizer, text):
    for i in range(3):
        sequence = tokenizer.texts_to_sequences([text])[0]
        sequence = np.array(sequence)
        
        preds = model.predict_classes(sequence)
        print(preds)
        predicted_word = ""
        
        for key, value in tokenizer.word_index.items():
            if value == preds:
                predicted_word = key
                break
        
        print(predicted_word)
        return predicted_word

In [25]:
while(True):

    text = input("Enter your line: ")
    
    if text == "stop the script":
        print("Ending The Program.....")
        break
    
    else:
        try:
            text = text.split(" ")
            text = text[-1]

            text = ''.join(text)
            Predict_Next_Words(model, tokenizer, text)
            
        except:
            continue

Enter your line: collection of textile




[763]
samples
Enter your line: stop the script
Ending The Program.....
