In [None]:
import compyute as cp

In [None]:
device = "cuda" if cp.engine.gpu_available() else "cpu"
device

# Example 5.3

### Language Model: LSTM

A dense neural network is not able to capture the sequential and time-dependent character of text. An alternative is the LSTM, which is able to memorize past tokens.

### Step 1: Prepare data
Again, the tinyshakespeare dataset is used. (https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt)

In [None]:
with open("../data/tinyshakespeare.txt", "r") as f:
    data = f.read()

### Step 2: Tokenization

In [None]:
from compyute.preprocessing.text import BPETokenizer, save_tokenizer, load_tokenizer

# tokenizer = BPETokenizer()
# tokenizer.fit(data, vocab_size=1000)

# save_tokenizer(tokenizer, "nn_tokenizer.cp")
tokenizer = load_tokenizer("nn_tokenizer.cp")

tokenizer.vocab_size

In [None]:
data_enc = tokenizer.encode(data)
len(data_enc)

### Step 3: Build dataset

In [None]:
block_size = 16

In [None]:
X = cp.stack([data_enc[i : i + block_size] for i in range(len(data_enc) - block_size)])
y = cp.tensor([data_enc[i + 1 : i + 1 + block_size] for i in range(len(data_enc) - block_size)])

n = int(len(X) * 0.9)

X_train = X.int()[:n]
y_train = y.int()[:n]
X_val = X.int()[n:]
y_val = y.int()[n:]

print(f"{X_train.shape=}")
print(f"{y_train.shape=}")
print(f"{X_val.shape=}")
print(f"{y_val.shape=}")

### Step 4: Build the neural network structure

Now, `LSTM`-layers are used, followed by a dense layer.

In [None]:
import compyute.nn as nn

embed_dims = 32

model = nn.Sequential(
    nn.Embedding(tokenizer.vocab_size, embed_dims),

    nn.Layernorm((block_size, embed_dims)),
    nn.LSTM(embed_dims, 256),

    nn.Layernorm((block_size, 256)),
    nn.Linear(256, tokenizer.vocab_size)
)

model.to_device(device)

In [None]:
model.summary(input_shape=(block_size,), input_dtype="int32")

### Step 5: Train the model

In [None]:
from compyute.nn.trainer import optimizers, Trainer
from compyute.nn.trainer.callbacks import AdaptiveLR, EarlyStopping, History, ProgressBar

history = History()

trainer = Trainer(
    model=model,
    optimizer=optimizers.Adam(lr=3e-4),
    loss="cross_entropy",
    metric="accuracy",
    callbacks=[
        history,
        EarlyStopping(target="val_loss", patience=5),
        AdaptiveLR(target="val_loss", epoch_range=3),
        ProgressBar()
    ]
)

In [None]:
epochs = 25
batch_size = 2048

trainer.train(X_train, y_train, epochs=epochs, batch_size=batch_size, val_data=(X_val, y_val))

In [None]:
import matplotlib.pyplot as plt

def plot_history(t1, t2):
    trace1 = history[t1]
    trace2 = history[t2]
    plt.figure(figsize=(10, 3))
    plt.plot(cp.arange(start=1, stop=len(trace1) + 1).to_numpy(), trace1, linewidth=1)
    plt.plot(cp.arange(start=1, stop=len(trace2) + 1).to_numpy(), trace2, linewidth=1)
    plt.legend([t1, t2])
    plt.grid(color="gray", linestyle="--", linewidth=0.5)

plot_history("loss", "accuracy_score")

### Step 6: Generate text

In [None]:
context = "KING HENRY"
print(context, end="")

context = tokenizer.encode(context)
context = cp.reshape(cp.pad(context, padding=(block_size - len(context), 0)), shape=(1, -1))
context.to_device(model.device)

for _ in range(250):
    pred, _ = cp.nn.functional.softmax(model(context))
    index = cp.random.multinomial(x=tokenizer.vocab_size, p=pred[0, -1], shape=(1,))
    char = tokenizer.decode(index)
    print(char, end="")
    context = cp.append(context, values=cp.reshape(index, shape=(1, 1)), axis=1).int()
    context = context[:, 1:]