In [None]:
import numpy as np

words = ["Liverpool", "is", "wonderful", "club"]
vocab = list(set(words))
word_to_idx = {w: i for i, w in enumerate(vocab)}
idx_to_word = {i: w for w, i in word_to_idx.items()}
vocab_size = len(vocab)

def one_hot(idx, size):
    vec = np.zeros(size)
    vec[idx] = 1
    return vec

X = [one_hot(word_to_idx[w], vocab_size) for w in ["Liverpool", "is", "wonderful"]]
Y = word_to_idx["club"]


input_size = vocab_size
hidden_size = 8
output_size = vocab_size


Wxh = np.random.randn(hidden_size, input_size) * 0.01
Whh = np.random.randn(hidden_size, hidden_size) * 0.01
Why = np.random.randn(output_size, hidden_size) * 0.01
bh = np.zeros(hidden_size)
by = np.zeros(output_size)


def softmax(x):
    exp_x = np.exp(x - np.max(x))
    return exp_x / exp_x.sum()


learning_rate = 0.1
for epoch in range(1000):
    h = np.zeros(hidden_size)
    
    # Forward
    for x in X:
        h = np.tanh(Wxh @ x + Whh @ h + bh)

    y = Why @ h + by
    probs = softmax(y)

    loss = -np.log(probs[Y])

    # Backward
    dy = probs.copy()
    dy[Y] -= 1

    dWhy = np.outer(dy, h)
    dby = dy

   
    Why -= learning_rate * dWhy
    by -= learning_rate * dby

    if (epoch + 1) % 100 == 0:
        pred = idx_to_word[np.argmax(probs)]
        print(f"Epoch {epoch+1} | Loss: {loss:.4f} | Prediction: {pred}")


Epoch 100 | Loss: 0.0930 | Prediction: club
Epoch 200 | Loss: 0.0428 | Prediction: club
Epoch 300 | Loss: 0.0276 | Prediction: club
Epoch 400 | Loss: 0.0203 | Prediction: club
Epoch 500 | Loss: 0.0160 | Prediction: club
Epoch 600 | Loss: 0.0132 | Prediction: club
Epoch 700 | Loss: 0.0113 | Prediction: club
Epoch 800 | Loss: 0.0098 | Prediction: club
Epoch 900 | Loss: 0.0087 | Prediction: club
Epoch 1000 | Loss: 0.0078 | Prediction: club
