In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding
from tensorflow.keras.utils import to_categorical
import requests

In [2]:
# Download Shakespeare's text from Project Gutenberg
"""url = "https://www.gutenberg.org/files/100/100-0.txt"
response = requests.get(url)

# Save the text file with UTF-8 encoding
with open('shakespeare.txt', 'w', encoding='utf-8') as file:
    file.write(response.text)
"""
# Load the text with UTF-8 encoding
with open('shakespeare.txt', 'r', encoding='utf-8') as file:
    text = file.read().lower()


In [5]:
text = text[:10000] # taking a small set due to memory issues 

In [6]:
# Create character to integer mapping
chars = sorted(list(set(text)))
char_to_int = {c: i for i, c in enumerate(chars)}
int_to_char = {i: c for i, c in enumerate(chars)}

In [7]:
# Prepare the dataset
seq_length = 25
sequences = []
next_chars = []

for i in range(0, len(text) - seq_length, 1):
    seq_in = text[i:i + seq_length] # input sequence
    seq_out = text[i + seq_length] # output sequence
    sequences.append([char_to_int[char] for char in seq_in])
    next_chars.append(char_to_int[seq_out])

In [8]:
# Reshape and normalize the input
X = np.reshape(sequences, (len(sequences), seq_length, 1))
X = X / float(len(chars))
y = to_categorical(next_chars)

In [9]:
# Build the RNN model
model = Sequential()
model.add(LSTM(256, input_shape=(X.shape[1], X.shape[2])))
model.add(Dense(y.shape[1], activation='softmax'))

  super().__init__(**kwargs)


In [10]:
# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam')

# Train the model
model.fit(X, y, epochs=20, batch_size=128)

Epoch 1/20
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 77ms/step - loss: 3.2888
Epoch 2/20
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 79ms/step - loss: 2.9977
Epoch 3/20
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 85ms/step - loss: 2.9908
Epoch 4/20
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 93ms/step - loss: 2.9761
Epoch 5/20
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 97ms/step - loss: 2.9609
Epoch 6/20
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 94ms/step - loss: 2.9676
Epoch 7/20
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 87ms/step - loss: 2.9171
Epoch 8/20
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 87ms/step - loss: 2.8690
Epoch 9/20
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 90ms/step - loss: 2.8286
Epoch 10/20
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 90ms/step - loss: 2.7757

<keras.src.callbacks.history.History at 0x1ff418ae210>

In [11]:
# Text Generation
def generate_text(model, seed, length):
    result = seed
    for _ in range(length):
        x = np.reshape([char_to_int[char] for char in seed], (1, len(seed), 1))
        x = x / float(len(chars))
        prediction = model.predict(x, verbose=0)
        index = np.argmax(prediction)
        result += int_to_char[index]
        seed = seed[1:] + int_to_char[index]
    return result


In [22]:
# Example usage
seed_text = "hello, my name is"
generated_text = generate_text(model, seed_text,20)

In [23]:
print(generated_text) # generate_text

hello, my name ise teet 

   
       
