In [1]:
## importing dependencies
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import matplotlib.pyplot as plt

In [2]:
input = "In the town of Athy one Jeremy Lanigan\n Battered away til he hadnt a pound.\n His father died and made him a man again\n Left him a farm and ten acres of ground.\n He gave a grand party for friends and relations\n Who didnt forget him when come to the wall,\n And if youll but listen Ill make your eyes glisten\n Of the rows and the ructions of Lanigan’s Ball.\n Myself to be sure got free invitation,\n For all the nice girls and boys I might ask,\n And just in a minute both friends and relations\n Were dancing round merry as bees round a cask.\n Judy ODaly, that nice little milliner,\n She tipped me a wink for to give her a call,\n And I soon arrived with Peggy McGilligan\n Just in time for Lanigans Ball."

In [3]:
tokenizer = Tokenizer()
corpus = input.lower().split("\n")
tokenizer.fit_on_texts(corpus)
total_words = len(tokenizer.word_index)+1
print(total_words)

97


In [4]:
input_sequences = []
for line in corpus:
  token_list = tokenizer.texts_to_sequences([line])[0]
  for i in range (1, len(token_list)):
    n_gram_sequence = token_list[:i+1]
    input_sequences.append(n_gram_sequence)
print(input_sequences[:5])

[[6, 3], [6, 3, 17], [6, 3, 17, 4], [6, 3, 17, 4, 18], [6, 3, 17, 4, 18, 19]]


In [5]:
print(pad_sequences(input_sequences[:10]))

[[ 0  0  0  0  0  0  6  3]
 [ 0  0  0  0  0  6  3 17]
 [ 0  0  0  0  6  3 17  4]
 [ 0  0  0  6  3 17  4 18]
 [ 0  0  6  3 17  4 18 19]
 [ 0  6  3 17  4 18 19 20]
 [ 6  3 17  4 18 19 20 21]
 [ 0  0  0  0  0  0 22 23]
 [ 0  0  0  0  0 22 23 24]
 [ 0  0  0  0 22 23 24  9]]


In [6]:
max_sequence_len = max([len(x) for x in input_sequences])
input_sequences = np.array(pad_sequences(
    input_sequences, maxlen=max_sequence_len, padding='pre'))
print(input_sequences)

[[ 0  0  0 ...  0  6  3]
 [ 0  0  0 ...  6  3 17]
 [ 0  0  0 ...  3 17  4]
 ...
 [ 0  0  0 ...  6 95  5]
 [ 0  0  0 ... 95  5 96]
 [ 0  0  0 ...  5 96 12]]


In [7]:
## spliting to featue and labels
xs = input_sequences[:,:-1]
lables = input_sequences[:,-1]

In [8]:
ys = tf.keras.utils.to_categorical(lables, num_classes=total_words)
print(ys)

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 0. 0.]]


## Creating the model

In [9]:
model = Sequential()
model.add(tf.keras.layers.Embedding(total_words,8))
model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(max_sequence_len-1)))
model.add(tf.keras.layers.Dense(total_words, activation='softmax'))

In [10]:
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [None]:
history = model.fit(xs, ys, epochs=1500, verbose=1)

Epoch 1/1500
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.0000e+00 - loss: 4.5747
Epoch 2/1500
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.0464 - loss: 4.5720
Epoch 3/1500
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.0461 - loss: 4.5701    
Epoch 4/1500
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.0784 - loss: 4.5678
Epoch 5/1500
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.0519 - loss: 4.5647
Epoch 6/1500
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.0738 - loss: 4.5602
Epoch 7/1500
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.0675 - loss: 4.5567
Epoch 8/1500
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.0811 - loss: 4.5518
Epoch 9/1500
[1m4/4[0m [32m━━━━━━━━━━

In [None]:
## Creating function to display the graph
def plot_graph(fitted_history):
  epochs = range(1, len(fitted_history.history['accuracy']) + 1)
  plt.figure(figsize=(10,6))
  plt.plot(epochs, fitted_history.history['accuracy'])
  plt.xlabel('Epochs')
  plt.ylabel('accuracy')
  plt.show()

In [None]:
plot_graph(history)

## Predicting Text

In [None]:
seed_text = "in the town of athy"

In [None]:
token_list = tokenizer.texts_to_sequences([seed_text])[0]
token_list = pad_sequences([token_list], maxlen=max_sequence_len, padding='pre')
predicted = np.argmax(model.predict(token_list), axis=-1)
print(predicted)

In [None]:
for word, index in tokenizer.word_index.items():
  if index == predicted:
    print(word)
    break

## compounding predictions to generate text

In [None]:
seed_text = "sweet jeremy saw dublin"
next_words = 10
for _ in range(next_words):
  token_list = tokenizer.texts_to_sequences([seed_text])[0]
  token_list = pad_sequences([token_list], maxlen=max_sequence_len, padding='pre')
  predicted = np.argmax(model.predict(token_list, verbose=0), axis=-1)
  output_word = ""
  for word, index in tokenizer.word_index.items():
    if index == predicted:
      output_word += word
      break
  seed_text += " " + output_word
print(seed_text)

## Extending the Dataset

In [None]:
!wget https://storage.googleapis.com/tensorflow-1-public/course3/irish-lyrics-eof.txt

In [None]:
data = open('/content/irish-lyrics-eof.txt').read()
corpus = data.lower().split('\n')

In [None]:
print(corpus)

In [None]:
history1 = model.fit(xs, ys, epochs=1000, verbose=1)

In [None]:
plot_graph(history1)