# **Implement with TensorFlow/Keras (RNN)**


# **Data from Project Gutenberg**
**"Pride and Prejudice" by Jane Austen**

In [1]:
import requests

# URL of the dataset
url = "https://www.gutenberg.org/files/1342/1342-0.txt"
response = requests.get(url)
text_data = response.text

# Preview the first 1000 characters
print(text_data[:1000])

*** START OF THE PROJECT GUTENBERG EBOOK 1342 ***




                            [Illustration:

                             GEORGE ALLEN
                               PUBLISHER

                        156 CHARING CROSS ROAD
                                LONDON

                             RUSKIN HOUSE
                                   ]

                            [Illustration:

               _Reading Janeâs Letters._      _Chap 34._
                                   ]




                                PRIDE.
                                  and
                               PREJUDICE

                                  by
                             Jane Austen,

                           with a Preface by
                           George Saintsbury
                                  and
                           Illustrations by
                             Hugh Thomson

                         [Illustration: 1894]

       

**Prepare Data**

In [2]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

tokenizer = Tokenizer()
tokenizer.fit_on_texts([text_data])

# Convert text to sequences of integers
sequences = tokenizer.texts_to_sequences([text_data])[0]

# Create sequences and labels
sequence_length = 5
sequences_list = []
labels_list = []

for i in range(sequence_length, len(sequences)):
    sequence = sequences[i-sequence_length:i]
    label = sequences[i]
    sequences_list.append(sequence)
    labels_list.append(label)

# Convert to numpy arrays
X = np.array(sequences_list)
y = np.array(labels_list)

# One-hot encode the labels
y = tf.keras.utils.to_categorical(y, num_classes=len(tokenizer.word_index) + 1)

# Preview the sequences and labels
print(X[:5])
print(y[:5])

[[2804    4    2 2805 3520]
 [   4    2 2805 3520 3521]
 [   2 2805 3520 3521 3522]
 [2805 3520 3521 3522    1]
 [3520 3521 3522    1    1]]
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]]


**Build Model**

In [3]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense

# Define the model
model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=50, input_length=sequence_length))
model.add(SimpleRNN(100, return_sequences=False))
model.add(Dense(len(tokenizer.word_index) + 1, activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 5, 50)             435450    
                                                                 
 simple_rnn (SimpleRNN)      (None, 100)               15100     
                                                                 
 dense (Dense)               (None, 8709)              879609    
                                                                 
Total params: 1330159 (5.07 MB)
Trainable params: 1330159 (5.07 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


**Training**

In [4]:
model.fit(X, y, epochs=20, batch_size=128, verbose=1)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x7df9ec9357b0>

**Below Paragraph is taken from the from the book for test purose (to check the prediction of next word)**.

I think, however, though the thought will doubtless seem heretical to
more than one school of critics, that construction is not the highest
merit, the choicest gift, of the novelist. __It sets off his other gifts
and graces most advantageously to the__ critical eye; and the want of it
will sometimes mar those graces--appreciably, though not quite
consciously--to eyes by no means ultra-critical. But a very badly-built
novel which excelled in pathetic or humorous character, or which
displayed consummate command of dialogue--perhaps the rarest of all
faculties--would be an infinitely better thing than a faultless plot
acted and told by puppets with pebbles in their mouths. And despite the
ability which Miss Austen has shown in working out the story, I for one
should put_ Pride and Prejudice _far lower if it did not contain what
seem to me the very masterpieces of Miss Austenâ€™s humour and of her
faculty of character-creation--masterpieces who may indeed admit John
Thorpe, the Eltons, Mrs. Norris, and one or two others to their company,
but who, in one instance certainly, and perhaps in others, are still
superior to them.

**Generate Text**

In [10]:
def predict_next_word(model, tokenizer, text, sequence_length):
    # Tokenize the input text
    sequence = tokenizer.texts_to_sequences([text])[0]
    sequence = pad_sequences([sequence], maxlen=sequence_length, truncating='pre')

    # Predict the next word
    predicted = np.argmax(model.predict(sequence), axis=-1)

    # Convert the predicted integer back to a word
    for word, index in tokenizer.word_index.items():
        if index == predicted:
            return word
    return None

# Highlighted Text is taken from above para to predict the next word
seed_text = " It sets off his other gifts and graces most advantageously to the"
next_word = predict_next_word(model, tokenizer, seed_text, sequence_length)
print(f"Seed text: {seed_text}")
print(f"Predicted next word: {next_word}")

Seed text:  It sets off his other gifts and graces most advantageously to the
Predicted next word: critical
