## Import all Packages

In [1]:
import tensorflow as tf

from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Embedding, LSTM, Dense, Bidirectional
from tensorflow.keras.layers import Dropout

from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
import numpy as np 

## Load the text file

In [2]:
data=open('text_file.txt').read()

corpus=data.split('\n')

## Tokenizing the data

In [3]:
tokenizer=Tokenizer()
tokenizer.fit_on_texts(corpus)
word_index=tokenizer.word_index

total_words=len(word_index)+1
total_words

2690

## Create input sequences using list of tokens

In [4]:
input_seq=[]
for line in corpus:
  token_list=tokenizer.texts_to_sequences([line])[0]
  for i in range(1,len(token_list)):
    set_sequences=token_list[:i+1]
    input_seq.append(set_sequences)
# print(input_seq)
# print(corpus)

## Pad Sequences

In [5]:
max_seq_len=max(len(x) for x in input_seq)
input_seq=np.array(pad_sequences(input_seq,maxlen=max_seq_len,padding='pre'))
input_seq

array([[   0,    0,    0, ...,    0,   51,   12],
       [   0,    0,    0, ...,   51,   12,   96],
       [   0,    0,    0, ...,   12,   96, 1217],
       ...,
       [   0,    0,    0, ...,    0,   47,  105],
       [   0,    0,    0, ...,   47,  105,  138],
       [   0,    0,    0, ...,  105,  138,  184]], dtype=int32)

## Create predictors and label

In [6]:
xs,labels=input_seq[:,:-1],input_seq[:,-1]

ys=tf.keras.utils.to_categorical(labels,num_classes=total_words)

## Build a Model

In [8]:
model = Sequential()
model.add(Embedding(total_words, 100, input_length=max_seq_len-1))
model.add(Bidirectional(LSTM(150)))

model.add(Dense(total_words, activation='softmax'))
adam = Adam(lr=0.01)
model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])
history = model.fit(xs, ys, epochs=20, verbose=1)
model.summary()


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 15, 100)           269000    
_________________________________________________________________
bidirectional_1 (Bidirection (None, 300)               301200    
_________________________________________________________________
dense_1 (Dense)              (None, 2690)              809690    
Total params: 1,379,890
Trainable params: 1,379,890
Non-trainable params: 0
_________________________________________________________________


## Predicting next word

In [10]:
seed_text = " Help me"
next_words = 100
  
for _ in range(next_words):
	token_list = tokenizer.texts_to_sequences([seed_text])[0]
	token_list = pad_sequences([token_list], maxlen=max_seq_len-1, padding='pre')
	predicted = model.predict_classes(token_list, verbose=0)
	output_word = ""
	for word, index in tokenizer.word_index.items():
		if index == predicted:
			output_word = word
			break
	seed_text += " " + output_word
print(seed_text)

 Help me a smiling was listening to me it makes the heart inside together off gone away your thyme away in your eyes on the tree to drumslieve to the tree again may is soon than ra you took the wooden hand of a mountain hiii my bride by and love the heart you so gone and the land i was gone and the land i was never the sound over old of sweet since love her she gone by the bridle and i gone away and a little ship no longer gone and i love the ground whereon he goes home
