In [None]:
import numpy as np
import walnut
from walnut import Tensor

In [None]:
with open("data/tinyshakespeare.txt", "r") as f:
    data = "".join(f.readlines())

In [None]:
characters = sorted(set(data))
i_to_s = {i:s for i,s in enumerate(characters)}
s_to_i = {s:i for i,s in enumerate(characters)}
vocab_size = len(characters)
"".join(characters)

In [None]:
def encode(word):
    return np.array([s_to_i[s] for s in word])

def decode(vector):
    return ''.join([i_to_s[i] for i in vector])

In [None]:
num_samples = 10000
block_size = 8

X_array = np.zeros((num_samples, block_size, vocab_size))
Y_array = np.zeros((num_samples, vocab_size))
rand_indices = np.random.randint(0, len(data) - block_size, (num_samples,))

for i, index in enumerate(rand_indices):
    # get characters
    context = data[index : index + block_size]
    label = data[index + block_size]

    # encode characters to get indices
    context_enc = encode(context)
    label_enc = encode(label)

    # one-hot-encode indices
    X_array[i] = np.eye(vocab_size)[context_enc]
    Y_array[i] = np.eye(vocab_size)[label_enc]

X = Tensor(X_array, dtype="int")
Y = Tensor(Y_array, dtype="int")

print(f"{X.shape=}")
print(f"{Y.shape=}")

In [None]:
import walnut.nn as nn

model = nn.Sequential(layers=[
    nn.layers.Block(10, input_shape=(block_size, vocab_size)),
    nn.layers.Linear(100, act="tanh", norm="layer"),
    nn.layers.Linear(vocab_size, act="softmax")
])

In [None]:
model.compile(nn.optimizers.Adam(), nn.losses.Crossentropy(), nn.metrics.Accuracy())

In [None]:
model

In [None]:
# high score 0.023509 with 10000 epochs (~1h training time)
train_hist, val_hist = model.train(X, Y, epochs=5000, verbose="reduced")

In [None]:
traces = {
    "train_loss" : train_hist,
    "val_loss" : val_hist
}
nn.analysis.plot_curve(traces=traces, figsize=(20, 4), title="loss history", x_label="epoch", y_label="loss")

In [None]:
context_list = [1] * block_size # use " " as start characters
context_enc = np.eye(vocab_size)[context_list]
context = walnut.expand_dims(Tensor(context_enc, dtype="int"), 0)
context.shape

for i in range(1000):
    pred = model(context) # get model prediction for a character
    index = walnut.choice(pred) # choose following character
    print(i_to_s[index], end="")
    context_list = np.append(context_list[1:], index)
    context_enc = np.eye(vocab_size)[context_list]
    context = walnut.expand_dims(Tensor(context_enc, dtype="int"), 0)