# Next Word Prediction based on trained dataset and using SimpleRNN,LSTM and GRU

In [1]:
from keras.models import Sequential
from keras.layers import Embedding, SimpleRNN, Dense,Input,LSTM,GRU
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np


In [2]:

# Define the dataset
sentences = [
    "I like cats",
    "Dogs are cool",
    "I enjoy reading books",
    "The sun is shining"
]

# Tokenize the sentences
tokenizer = Tokenizer()
tokenizer.fit_on_texts(sentences)
sequences = tokenizer.texts_to_sequences(sentences)

# Create input and target sequences
X = []
y = []
for seq in sequences:
    for i in range(len(seq)-1):
        X.append(seq[:i+1])
        y.append(seq[i+1])

# Convert sequences to numpy arrays
X = pad_sequences(X, maxlen=max(map(len, X)))
y = np.array(y)

# Define the vocabulary size and maximum sequence length
vocab_size = len(tokenizer.word_index) + 1
max_len = max(map(len, X))


# SIMPLE RNN MODEL

In [3]:

# Build the RNN model
model_rnn = Sequential()
model_rnn.add(Input(shape=(max_len-1,)))
model_rnn.add(Embedding(vocab_size, 8))
model_rnn.add(SimpleRNN(32))
model_rnn.add(Dense(vocab_size, activation='softmax'))

# Compile and train the model
model_rnn.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model_rnn.fit(X, y, epochs=100, verbose=1)

Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step - accuracy: 0.1000 - loss: 2.6445
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step - accuracy: 0.2000 - loss: 2.6371
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 0.2000 - loss: 2.6299
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - accuracy: 0.3000 - loss: 2.6227
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 83ms/step - accuracy: 0.4000 - loss: 2.6156
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 0.4000 - loss: 2.6086
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step - accuracy: 0.3000 - loss: 2.6016
Epoch 8/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - accuracy: 0.3000 - loss: 2.5945
Epoch 9/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

<keras.src.callbacks.history.History at 0x219a56bbb90>

In [4]:
# Generate text using the trained models
def generate_text(seed_text, model, max_sequence_len, num_words):
    for _ in range(num_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
        predicted = np.argmax(model.predict(token_list), axis=-1)
        output_word = ""
        for word, index in tokenizer.word_index.items():
            if index == predicted:
                output_word = word
                break
        seed_text += " " + output_word
    return seed_text


In [5]:
generated_text_rnn = generate_text("This is", model_rnn, max_len, num_words=4)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 136ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step


In [6]:
print("Generated Text (SimpleRNN):", generated_text_rnn)

Generated Text (SimpleRNN): This is are shining books reading


# LSTM MODEL

In [7]:
# Build the LSTM model
model_lstm = Sequential()
model_lstm.add(Input(shape=(max_len-1,)))
model_lstm.add(Embedding(vocab_size, 8))
model_lstm.add(LSTM(32))  # Replace SimpleRNN with LSTM
model_lstm.add(Dense(vocab_size, activation='softmax'))

# Compile and train the model
model_lstm.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model_lstm.fit(X, y, epochs=100, verbose=1)

Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.1000 - loss: 2.6408
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.1000 - loss: 2.6391
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.1000 - loss: 2.6374
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - accuracy: 0.1000 - loss: 2.6357
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - accuracy: 0.1000 - loss: 2.6339
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 0.1000 - loss: 2.6322
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.1000 - loss: 2.6305
Epoch 8/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step - accuracy: 0.1000 - loss: 2.6288
Epoch 9/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

<keras.src.callbacks.history.History at 0x219a6f71890>

In [8]:
generated_text_lstm = generate_text("This is", model_lstm, max_len, num_words=4)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 150ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step


In [9]:
print("Generated Text (LSTM):", generated_text_lstm)

Generated Text (LSTM): This is sun shining shining is


# GRU MODEL

In [10]:
# Build and train the GRU model
model_gru = Sequential()
model_gru.add(Input(shape=(max_len-1,)))
model_gru.add(Embedding(vocab_size, 8))
model_gru.add(GRU(100))  # Replace LSTM with GRU
model_gru.add(Dense(vocab_size, activation='softmax'))

# Compile and train the model
model_gru.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model_gru.fit(X, y, epochs=100, verbose=1)


Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step - accuracy: 0.1000 - loss: 2.6368
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step - accuracy: 0.3000 - loss: 2.6326
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.3000 - loss: 2.6284
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - accuracy: 0.3000 - loss: 2.6240
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - accuracy: 0.3000 - loss: 2.6196
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step - accuracy: 0.3000 - loss: 2.6150
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - accuracy: 0.3000 - loss: 2.6102
Epoch 8/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step - accuracy: 0.3000 - loss: 2.6051
Epoch 9/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

<keras.src.callbacks.history.History at 0x219ac84e910>

In [11]:
generated_text_gru = generate_text("This is", model_gru, max_len, num_words=4)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 177ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step


In [12]:
print("Generated Text (GRU):", generated_text_gru)

Generated Text (GRU): This is sun shining shining shining


In [1]:
import numpy as np

In [8]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, GRU, LSTM, Dense, Embedding, Input
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [9]:
# Text data
text_data = ["This is the first document.",
    "This document is the second document.",
    "And this is the third one.",
    "Is this the first document?"]

In [10]:
# Tokenize the text data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(text_data)
total_words = len(tokenizer.word_index) + 1

In [11]:
# Create input sequences and labels for training
import tensorflow as tf
input_sequences = []
for line in text_data:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)
        
max_sequence_length = max(len(seq) for seq in input_sequences)
input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_length, padding='pre')

X, y = input_sequences[:, :-1], input_sequences[:, -1]
y = tf.keras.utils.to_categorical(y, num_classes=total_words)
print(X.shape)
print(y.shape)

(18, 5)
(18, 10)


In [12]:
# Build and train the SimpleRNN model
model_rnn = Sequential()
model_rnn.add(Input(shape=(max_sequence_length-1,)))  # Add Input layer with correct shape
model_rnn.add(Embedding(total_words, 50))
model_rnn.add(SimpleRNN(100))
model_rnn.add(Dense(total_words, activation='softmax'))

model_rnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model_rnn.fit(X, y, epochs=100, verbose=0)

<keras.src.callbacks.history.History at 0x2096c922a10>

In [13]:
# Build and train the GRU model
model_gru = Sequential()
model_rnn.add(Input(shape=(max_sequence_length-1,)))
model_gru.add(Embedding(total_words, 50))
model_gru.add(GRU(100))
model_gru.add(Dense(total_words, activation='softmax'))
model_gru.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model_gru.fit(X, y, epochs=100, verbose=0)

ValueError: Sequential model 'sequential_2' has already been configured to use input shape (None, 5). You cannot add a different Input layer to it.