<a href="https://colab.research.google.com/github/harishahamed26/NLP_ReinforcementLearning/blob/main/Main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# New section

In [1]:
# import python libraries
import numpy as np
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Embedding, LSTM, Bidirectional, Dropout
from tensorflow.keras.utils import to_categorical
from keras.preprocessing.text import Tokenizer
from random import randint
import re

import keras
import nltk 
from nltk.tokenize import word_tokenize
nltk.download('punkt')

from nltk.translate.bleu_score import sentence_bleu

nltk.download('gutenberg')  # downloads a library that NLTK uses

from nltk.corpus import gutenberg as gut  # downloads the gutenberg dataset


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package gutenberg to /root/nltk_data...
[nltk_data]   Unzipping corpora/gutenberg.zip.


In [None]:
# get the book text
book_text = nltk.corpus.gutenberg.raw('blake-poems.txt')
print(book_text)

In [3]:
# Data preprocessing
def preprocess_text(sen):
    # Remove punctuations and numbers
    sentence = re.sub('[^a-zA-Z]', ' ', sen)

    # Single character removal
    sentence = re.sub(r"\s+[a-zA-Z]\s+", ' ', sentence)

    # Removing multiple spaces
    sentence = re.sub(r'\s+', ' ', sentence)

    return sentence.lower()
book_text = preprocess_text(book_text)

book_text


' poems by william blake songs of innocence and of experience and the book of thel songs of innocence introduction piping down the valleys wild piping songs of pleasant glee on cloud saw child and he laughing said to me pipe song about lamb so piped with merry cheer piper pipe that song again so piped he wept to hear drop thy pipe thy happy pipe sing thy songs of happy cheer so sang the same again while he wept with joy to hear piper sit thee down and write in book that all may read so he vanish from my sight and pluck a hollow reed and made rural pen and stain the water clear and wrote my happy songs every child may joy to hear the shepherd how sweet is the shepherd sweet lot from the morn to the evening he stays he shall follow his sheep all the day and his tongue shall be filled with praise for he hears the lambs innocent call and he hears the ewes tender reply he is watching while they are in peace for they know when their shepherd is nigh the echoing green the sun does arise and m

In [None]:
book_text = book_text.lower()

In [13]:
# convert words to numbers

book_text_words = (word_tokenize(book_text))
n_words = len(book_text_words)
unique_words = len(set(book_text_words))

tokenizer = Tokenizer(num_words=unique_words)
tokenizer.fit_on_texts(book_text_words)

vocab_size = len(tokenizer.word_index) + 1    # word_index is the dictionary. Store the number of unique words in vocab_size variable
word_2_index = tokenizer.word_index           # store the dictionary in the variable called word_2_index

# Create the input sequences
input_sequence_words = []  # input sequences in words (used for metric evaluation later on)
input_sequence = []   # empty list to hold the sequences that will be input into our model
output_words = []     # empty list to hold the output words
input_seq_length = 25  # length of the input sequence
for i in range(0, n_words - input_seq_length , 1):
    in_seq = book_text_words[i:i + input_seq_length]
    input_sequence_words.append(in_seq)
    out_seq = book_text_words[i + input_seq_length]
    input_sequence.append([word_2_index[word] for word in in_seq])
    output_words.append(word_2_index[out_seq])

# reshape the input sequences to be 3-dimensional
X = np.reshape(input_sequence, (len(input_sequence), input_seq_length, 1))

# Normalise the data by dividing by the max number of unique words (the vocab size)
X = X / float(vocab_size)

# one-hot encode the output words so that they can be used by the model (converts the output to 2-dimensions)
y = to_categorical(output_words)

     

In [14]:

# create, compile and fit the model
model = Sequential()
model.add(LSTM(1000, input_shape=(X.shape[1], X.shape[2]), return_sequences=False))
model.add(Dropout(0.5))
model.add(Dense(y.shape[1], activation='softmax'))
model.summary()

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X, y, batch_size=100, epochs=50, verbose=1)

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_7 (LSTM)               (None, 1000)              4008000   
                                                                 
 dropout_7 (Dropout)         (None, 1000)              0         
                                                                 
 dense_7 (Dense)             (None, 1506)              1507506   
                                                                 
Total params: 5,515,506
Trainable params: 5,515,506
Non-trainable params: 0
_________________________________________________________________
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch

<keras.callbacks.History at 0x7f0a57b46a00>

In [15]:
seq = [' '.join(w) for w in input_sequence_words]
reference = seq

def generate_poem(PoemLength):
      # Make Predictions
    random_seq_index = np.random.randint(0, PoemLength-1)    # select a random number from within the range of the number of input sequences
    random_seq = input_sequence[random_seq_index]                     # get the input sequence that occurs at the randomly selected index (this is a list of integers)

    index_2_word = dict(map(reversed, word_2_index.items())) # convert the integer sequence to its words
    seed_word_sequence = [index_2_word[value] for value in random_seq]  # get the list of words that correspond to the integers in the randomly picked sequence


    word_sequence = []
    for i in range(PoemLength):
        int_sample = np.reshape(random_seq, (1, len(random_seq), 1))    # reshape to make 3-D input (1 sequence, length of the sequence, 1 because the first LSTM requires another dimension)
        int_sample = int_sample / float(vocab_size)                     # normalise (as we normalised the training data)
        
        predicted_word_index = model.predict(int_sample, verbose=0)     # predict the next word.  An array of the probabilities for each word in the vocab is returned.
        predicted_word_id = np.argmax(predicted_word_index)             # get the index of the maximum value (they are categorical so the max value gives the word in the vocab with the highest probability)
        word_sequence.append(index_2_word[ predicted_word_id])          # get the predicted word by finding the word at the predicted index and add it to our predicted word sequence list

        random_seq.append(predicted_word_id)                            # append the predicted word index to the next seuqence to be input into the model predict method
        random_seq = random_seq[1:len(random_seq)]                      # remove the first element of the sequence so it now has the new word but is the same length.

    # BLEU score
    candidate = ' '.join(word_sequence) # make the list of words into a string
    score = sentence_bleu(reference, candidate)

    print('%s'%(candidate))
    print('\n')
    print('BLEU Score for predicted words: %s'%(score))
    print('\n')


In [16]:
# Predicting the poems

print('Poem 1')
generate_poem(25)
print('\n')

print('Poem 2')
generate_poem(25)
print('\n')

print('Poem 3')
generate_poem(25)
print('\n')

Poem 1
of pleasant glee on cloud saw child and he laughing said to me pipe song about lamb so piped with merry cheer piper pipe that


BLEU Score for predicted words: 1.0




Poem 2
with merry cheer piper pipe that song again so piped he wept to hear drop thy pipe thy happy pipe sing thy songs of happy


BLEU Score for predicted words: 1.0




Poem 3
merry cheer piper pipe that song again so piped he wept to hear drop thy pipe thy happy pipe sing thy songs of happy cheer


BLEU Score for predicted words: 1.0




