In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# -----------------------------
# Character-level next-char model with a GRU (complete, runnable)
# -----------------------------
text = (
    "hello Georgetown from scratch!\n"
    "this is a slightly longer training string so the model has more to learn.\n"
    "gru models usually learn faster than vanilla rnn.\n"
)

# Build vocabulary
chars = sorted(set(text))
stoi = {ch: i for i, ch in enumerate(chars)}
itos = {i: ch for ch, i in stoi.items()}
V = len(chars)

encoded = np.array([stoi[ch] for ch in text], dtype=np.int32)

# -----------------------------
# Dataset: (input_seq -> target_seq), where target is the next char at each step
# -----------------------------
seq_len = 40
batch_size = 32

def make_dataset(encoded, seq_len, batch_size):
    xs, ys = [], []
    for i in range(len(encoded) - seq_len):
        xs.append(encoded[i : i + seq_len])
        ys.append(encoded[i + 1 : i + seq_len + 1])
    xs = np.stack(xs).astype(np.int32)  # (N, T)
    ys = np.stack(ys).astype(np.int32)  # (N, T)

    ds = tf.data.Dataset.from_tensor_slices((xs, ys))
    ds = ds.shuffle(min(2048, len(xs))).batch(batch_size).prefetch(tf.data.AUTOTUNE)
    return ds

ds = make_dataset(encoded, seq_len, batch_size)

# -----------------------------
# Model: Embedding -> GRU -> Dense(vocab)
# -----------------------------
embed_dim = 64
hidden_size = 256

model = keras.Sequential([
    layers.Input(shape=(seq_len,), dtype="int32"),
    layers.Embedding(input_dim=V, output_dim=embed_dim),
    layers.GRU(hidden_size, return_sequences=True),
    layers.Dense(V)  # logits per time step
])

loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)

optimizer = keras.optimizers.Adam(learning_rate=1e-3, clipnorm=1.0)

model.compile(optimizer=optimizer, loss=loss_fn, metrics=["sparse_categorical_accuracy"])
model.summary()

# Train
model.fit(ds, epochs=30, verbose=2)

# -----------------------------
# Sampling / generation
# -----------------------------
@tf.function
def last_step_logits(model, x):
    # x: (1, T) int32
    logits = model(x, training=False)      # (1, T, V)
    return logits[:, -1, :]                # (1, V)

def sample(model, start_text="h", n=200, temperature=0.9):
    # Seed context: if start_text shorter than seq_len, pad with its first char
    start_ids = [stoi[c] for c in start_text if c in stoi]
    if len(start_ids) == 0:
        start_ids = [0]

    context = (start_ids + [start_ids[0]] * seq_len)[-seq_len:]  # length = seq_len
    out_ids = list(start_ids)

    for _ in range(n - len(out_ids)):
        x = tf.constant([context], dtype=tf.int32)
        logits = last_step_logits(model, x) / float(temperature)
        probs = tf.nn.softmax(logits, axis=-1).numpy().ravel()
        next_id = int(np.random.choice(V, p=probs))

        out_ids.append(next_id)
        context = context[1:] + [next_id]

    return "".join(itos[i] for i in out_ids)

print("\n--- samples ---")
for temp in (0.6, 0.9, 1.2):
    print(f"\nTemperature={temp}")
    print(sample(model, start_text="hello ", n=300, temperature=temp))