In [25]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.optimizers import Adam

# Sample text data
text = """To be, or not to be, that is the question:
Whether 'tis nobler in the mind to suffer
The slings and arrows of outrageous fortune,
Or to take arms against a sea of troubles
And by opposing end them."""

# Preprocess the text
text = text.lower().replace('\n', ' ').replace(':', '').replace(',', '').replace("'", '').replace('.', '')

# Tokenize the text
tokenizer = Tokenizer()
tokenizer.fit_on_texts([text])
total_words = len(tokenizer.word_index) + 1

# Convert text to sequences
input_sequence = tokenizer.texts_to_sequences([text])[0]

# Set sequence length
sequence_length = 5

# Create input-output pairs
sequences = []
for i in range(sequence_length, len(input_sequence)):
    seq = input_sequence[i-sequence_length:i]
    label = input_sequence[i]
    sequences.append((seq, label))

# Convert to numpy arrays
X = np.array([seq for seq, label in sequences])
y = np.array([label for seq, label in sequences])
y = tf.keras.utils.to_categorical(y, num_classes=total_words)

# Print shapes to debug
print(f'X shape: {X.shape}')
print(f'y shape: {y.shape}')
print(f'Example X: {X[0]}')
print(f'Example y: {y[0]}')

# Build the RNN model
model = Sequential()
model.add(Embedding(input_dim=total_words, output_dim=64, input_length=sequence_length))  # Correct input_length to sequence_length
model.add(LSTM(100))
model.add(Dense(total_words, activation='softmax'))

model.compile(optimizer=Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X, y, epochs=100, verbose=1)




X shape: (34, 5)
y shape: (34, 31)
Example X: [1 3 4 7 1]
Example y: [0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0.]
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100

<keras.src.callbacks.History at 0x792e852dec50>

In [28]:
# Function to generate text
def generate_text(seed_text, next_words, model, max_sequence_len):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len, padding='pre')
        predicted = model.predict(token_list, verbose=0)
        predicted_index = np.argmax(predicted, axis=1)[0]
        output_word = ""
        for word, index in tokenizer.word_index.items():
            if index == predicted_index:
                output_word = word
                break
        seed_text += " " + output_word
    return seed_text

# Generate text
seed_text = "and"
next_words = 10
generated_text = generate_text(seed_text, next_words, model, sequence_length)
print(generated_text)

and arrows arrows or or or to take against against a
