In [19]:
import numpy as np

In [20]:
#sample data: "hello"

char_to_index = {'h':0,'e':1,'l':2,'o':3}
index_to_char = {0:'h',1:'e',2:'l',3:'o'}
text = 'hello'

In [21]:
input_size = len(char_to_index)
hidden_size = 8
output_size = len(char_to_index)

In [22]:
U = np.random.randn(hidden_size, input_size) * 0.1
W = np.random.randn(hidden_size, hidden_size) * 0.1
V = np.random.randn(output_size, hidden_size) * 0.1

b = np.zeros((hidden_size, 1)) #output bias
c = np.zeros((output_size, 1)) #hidden bias

In [23]:
def forward_pass(input_index, h_prev):
    x = np.zeros((input_size, 1))
    x[input_index] = 1

    h = np.tanh(np.dot(U, x) + np.dot(W, h_prev) + b)
    o = np.dot(V, h) + c
    y_hat = np.exp(o) / np.sum(np.exp(o))

    return y_hat, h, x


In [24]:
def cross_entropy_loss(y_hat, target_index):
    return -np.log(y_hat[target_index, 0])

In [25]:
def backward_pass(y_hat, target_index, h, h_prev, x):
    do = np.copy(y_hat)
    do[target_index] -= 1
    dV = np.dot(do, h.T)
    dc = do

    dh = np.dot(V.T, do)
    dhraw = (1-h * h) * dh

    db = dhraw
    dU = np.dot(dhraw, x.T)
    dW = np.dot(dhraw, h_prev.T)

    return dU, dW, dV, db, dc

In [26]:
# Hyperparameters
learning_rate = 0.1
n_epochs = 100

# Initialize hidden state
h_prev = np.zeros((hidden_size, 1))

for epoch in range(n_epochs):
    loss = 0
    dU = np.zeros_like(U)
    dW = np.zeros_like(W)
    dV = np.zeros_like(V)
    db = np.zeros_like(b)
    dc = np.zeros_like(c)

    h_prev = np.zeros((hidden_size, 1))  # reset per epoch

    for t in range(len(text) - 1):
        input_index = char_to_index[text[t]]
        target_index = char_to_index[text[t + 1]]

        h_prev_old = h_prev
        y_hat, h_prev, x = forward_pass(input_index, h_prev)

        loss += cross_entropy_loss(y_hat, target_index)

        dU_t, dW_t, dV_t, db_t, dc_t = backward_pass(
            y_hat, target_index, h_prev, h_prev_old, x
        )

        dU += dU_t
        dW += dW_t
        dV += dV_t
        db += db_t
        dc += dc_t

    U -= learning_rate * dU
    W -= learning_rate * dW
    V -= learning_rate * dV
    b -= learning_rate * db
    c -= learning_rate * dc

    print(f"Epoch {epoch+1}, Loss: {loss:.4f}")


Epoch 1, Loss: 5.5774
Epoch 2, Loss: 5.3423
Epoch 3, Loss: 5.1518
Epoch 4, Loss: 4.9939
Epoch 5, Loss: 4.8597
Epoch 6, Loss: 4.7426
Epoch 7, Loss: 4.6373
Epoch 8, Loss: 4.5395
Epoch 9, Loss: 4.4452
Epoch 10, Loss: 4.3512
Epoch 11, Loss: 4.2547
Epoch 12, Loss: 4.1530
Epoch 13, Loss: 4.0440
Epoch 14, Loss: 3.9257
Epoch 15, Loss: 3.7966
Epoch 16, Loss: 3.6555
Epoch 17, Loss: 3.5016
Epoch 18, Loss: 3.3348
Epoch 19, Loss: 3.1553
Epoch 20, Loss: 2.9639
Epoch 21, Loss: 2.7624
Epoch 22, Loss: 2.5532
Epoch 23, Loss: 2.3399
Epoch 24, Loss: 2.1269
Epoch 25, Loss: 1.9189
Epoch 26, Loss: 1.7205
Epoch 27, Loss: 1.5356
Epoch 28, Loss: 1.3669
Epoch 29, Loss: 1.2158
Epoch 30, Loss: 1.0825
Epoch 31, Loss: 0.9662
Epoch 32, Loss: 0.8655
Epoch 33, Loss: 0.7786
Epoch 34, Loss: 0.7038
Epoch 35, Loss: 0.6394
Epoch 36, Loss: 0.5837
Epoch 37, Loss: 0.5353
Epoch 38, Loss: 0.4932
Epoch 39, Loss: 0.4564
Epoch 40, Loss: 0.4240
Epoch 41, Loss: 0.3954
Epoch 42, Loss: 0.3700
Epoch 43, Loss: 0.3473
Epoch 44, Loss: 0.32

In [27]:
h_prev = np.zeros((hidden_size, 1))
predicted_text = 'h'
current_char = 'h'

for _ in range(len(text) - 1):
    input_index = char_to_index[current_char]
    y_hat, h_prev, _ = forward_pass(input_index, h_prev)
    predicted_index = np.argmax(y_hat)
    current_char = index_to_char[predicted_index]
    predicted_text += current_char

print(predicted_text)


hello
