In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# -----------------------------
# Tiny demo: character-level LM (next-char prediction) with Keras
# -----------------------------
data = "hello Georgetown from scratch!\n" # would more data help? why or why not?
chars = sorted(list(set(data)))
stoi = {ch: i for i, ch in enumerate(chars)}
itos = {i: ch for ch, i in stoi.items()}
V = len(chars)

encoded = np.array([stoi[ch] for ch in data], dtype=np.int32)

seq_len = 12 # would shorter/longer sequences help? why or why not?

def make_dataset(encoded, seq_len, batch_size=32):
    # Build (input_seq, target_seq) pairs, where target is next char for each time step
    xs, ys = [], []
    for i in range(len(encoded) - seq_len):
        x = encoded[i : i + seq_len]
        y = encoded[i + 1 : i + seq_len + 1]
        xs.append(x)
        ys.append(y)
    xs = np.stack(xs)  # (N, T)
    ys = np.stack(ys)  # (N, T)
    ds = tf.data.Dataset.from_tensor_slices((xs, ys))
    return ds.shuffle(512).batch(batch_size).prefetch(tf.data.AUTOTUNE)

ds = make_dataset(encoded, seq_len, batch_size=16)

# -----------------------------
# Model: Embedding -> SimpleRNN -> Dense(vocab)
# -----------------------------
hidden_size = 32
embed_dim = 16

model = keras.Sequential([
    layers.Input(shape=(seq_len,), dtype="int32"),
    layers.Embedding(input_dim=V, output_dim=embed_dim),
    layers.SimpleRNN(hidden_size, return_sequences=True, activation="tanh"),
    layers.Dense(V)  # logits over vocab at each time step
])

loss_fn = keras.losses.SparseCategoricalCrossentropy(from_logits=True)

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1e-2),
    loss=loss_fn,
)

model.summary()
model.fit(ds, epochs=50, verbose=2)

# -----------------------------
# Sampling / generation
# -----------------------------
@tf.function
def step_logits(model, x):
    # x: (1, T) int32
    logits = model(x, training=False)  # (1, T, V)
    return logits[:, -1, :]            # (1, V) last-step logits

def sample(model, start_char="h", n=60, temperature=1.0):
    start_idx = stoi[start_char]
    # start with repeated start char so input is length seq_len
    context = [start_idx] * seq_len
    out = [start_idx]

    for _ in range(n - 1):
        x = tf.constant([context], dtype=tf.int32)  # (1, T)
        logits = step_logits(model, x) / temperature
        probs = tf.nn.softmax(logits).numpy().ravel()
        next_idx = int(np.random.choice(V, p=probs))

        out.append(next_idx)
        context = context[1:] + [next_idx]

    return "".join(itos[i] for i in out)

print(sample(model, start_char="h", n=80, temperature=0.9))