## Realitic example

In [3]:
import tensorflow as tf
import numpy as np
import requests
import os

# Step 1: Download the text
url = 'https://www.gutenberg.org/files/11/11-0.txt'
response = requests.get(url)
text = response.text

print(f"Downloaded text length: {len(text)} characters")

# Step 2: Preprocess the text
# Only keep ASCII characters to avoid weird symbols
text = ''.join(c for c in text if ord(c) < 128)

# Create character to index and index to character mappings
chars = sorted(list(set(text)))
vocab_size = len(chars)
print(f"Unique characters: {vocab_size}")

char_to_idx = {ch: i for i, ch in enumerate(chars)}
idx_to_char = {i: ch for i, ch in enumerate(chars)}

# Encode entire text
encoded_text = np.array([char_to_idx[c] for c in text])

# Step 3: Create input-output sequences
seq_length = 100  # each input will be 100 characters
step = 1          # move 1 character forward each time
sequences = []
next_chars = []

for i in range(0, len(encoded_text) - seq_length, step):
    sequences.append(encoded_text[i:i+seq_length])
    next_chars.append(encoded_text[i+seq_length])

print(f"Total sequences: {len(sequences)}")

# Convert to TensorFlow dataset
X = np.array(sequences)
y = np.array(next_chars)

dataset = tf.data.Dataset.from_tensor_slices((X, y))
dataset = dataset.shuffle(10000).batch(64, drop_remainder=True)

# Step 4: Build the model
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, 64, input_length=seq_length),
    tf.keras.layers.LSTM(128, return_sequences=False),
    tf.keras.layers.Dense(vocab_size)
])

model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              optimizer='adam')


Downloaded text length: 148080 characters
Unique characters: 71
Total sequences: 144732
Epoch 1/20




[1m2261/2261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m187s[0m 81ms/step - loss: 2.6123
Epoch 2/20
[1m2261/2261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m176s[0m 78ms/step - loss: 1.9740
Epoch 3/20
[1m2261/2261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m174s[0m 77ms/step - loss: 1.8007
Epoch 4/20
[1m2261/2261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m181s[0m 80ms/step - loss: 1.6969
Epoch 5/20
[1m2261/2261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m181s[0m 80ms/step - loss: 1.6240
Epoch 6/20
[1m2261/2261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m184s[0m 82ms/step - loss: 1.5638
Epoch 7/20
[1m2261/2261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m184s[0m 81ms/step - loss: 1.5168
Epoch 8/20
[1m2261/2261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m185s[0m 82ms/step - loss: 1.4792
Epoch 9/20
[1m2261/2261[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m191s[0m 84ms/step - loss: 1.4448
Epoch 10/20
[1m2261/2261[0m [32m━━━━━━━━━━━━━━━━━━━━[

AttributeError: 'Sequential' object has no attribute 'reset_states'

In [None]:
# Step 5: Train
EPOCHS = 2 # 50

class TimeHistory(tf.keras.callbacks.Callback):
    def on_train_begin(self, logs=None):
        self.times = []
    
    def on_epoch_begin(self, epoch, logs=None):
        self.epoch_time_start = time.time()
    
    def on_epoch_end(self, epoch, logs=None):
        self.times.append(time.time() - self.epoch_time_start)

# Create callback
time_callback = TimeHistory()

# Train model with callback
history = model.fit(dataset, epochs=EPOCHS, callbacks=[time_callback])

In [11]:
# Step 6: Text Generation
def generate_text(model, start_string, gen_length=500):
    input_eval = [char_to_idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)

    text_generated = []
    temperature = 0.01  # Higher = more random, Lower = more greedy

    for _ in range(gen_length):
        predictions = model(input_eval)
        predictions = predictions / temperature  # No slicing needed!

        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1, 0].numpy()

        input_eval = tf.expand_dims([predicted_id], 0)
        text_generated.append(idx_to_char[predicted_id])

    return start_string + ''.join(text_generated)

# Try it!
print("\n--- GENERATED TEXT ---\n")
print(generate_text(model, start_string="Alice", gen_length=500))



--- GENERATED TEXT ---

AlicetZid:_                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              
