<a href="https://colab.research.google.com/github/elh10604/NN-by-scratch/blob/main/Untitled2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np

In [None]:
def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)

def tanh(x):
    return np.tanh(x)

def tanh_derivative(x):
    return 1 - np.tanh(x)**2


In [None]:
def initialize_parameters(input_dim, hidden_dim, output_dim):
    np.random.seed(1)
    Wx = np.random.randn(hidden_dim, input_dim) * 0.01
    Wh = np.random.randn(hidden_dim, hidden_dim) * 0.01
    Wy = np.random.randn(output_dim, hidden_dim) * 0.01
    return Wx, Wh, Wy


In [None]:
# Forward propagation
def forward(x_seq, Wx, Wh, Wy):
    h_prev = np.zeros((Wh.shape[0], 1))
    hs = {}
    ys = {}
    for t, x in enumerate(x_seq):
        x = x.reshape(-1, 1)
        a = np.dot(Wx, x) + np.dot(Wh, h_prev)
        h = tanh(a)
        y = softmax(np.dot(Wy, h))
        hs[t] = h
        ys[t] = y
        h_prev = h
    return hs, ys

In [None]:
def compute_loss(ys, targets):
    loss = 0
    for t in range(len(targets)):
        loss -= np.log(ys[t][targets[t], 0])
    return loss / len(targets)

In [None]:
def backward(x_seq, targets, hs, ys, Wx, Wh, Wy, learning_rate=0.01):
    dWx = np.zeros_like(Wx)
    dWh = np.zeros_like(Wh)
    dWy = np.zeros_like(Wy)
    dh_next = np.zeros((Wh.shape[0], 1))

    for t in reversed(range(len(x_seq))):
        x = x_seq[t].reshape(-1, 1)
        h = hs[t]

        dy = ys[t].copy()
        dy[targets[t]] -= 1  # derivative of softmax + cross-entropy
        dWy += np.dot(dy, h.T)

        dh = np.dot(Wy.T, dy) + dh_next
        da = dh * tanh_derivative(h)

        dWx += np.dot(da, x.T)
        dWh += np.dot(da, hs[t-1].T) if t != 0 else 0

        dh_next = np.dot(Wh.T, da)
        Wx -= learning_rate * dWx
    Wh -= learning_rate * dWh
    Wy -= learning_rate * dWy

    return Wx, Wh, Wy

In [None]:
def train(x_seq, targets, input_dim, hidden_dim, output_dim, epochs=1000):
    Wx, Wh, Wy = initialize_parameters(input_dim, hidden_dim, output_dim)

    for epoch in range(epochs):
        hs, ys = forward(x_seq, Wx, Wh, Wy)
        loss = compute_loss(ys, targets)
        Wx, Wh, Wy = backward(x_seq, targets, hs, ys, Wx, Wh, Wy)

        if epoch % 100 == 0:
            print(f"Epoch {epoch}: Loss = {loss:.4f}")

    return Wx, Wh, Wy


In [None]:
if __name__ == "__main__":
    # Input: one-hot vectors for 'd', 'o', 'g'
    x_seq = [np.array([1,0,0,0]), np.array([0,1,0,0]), np.array([0,0,1,0])]  # 4-dimensional one-hot
    targets = [1,2,3]  # assume we want to predict the next character

    input_dim = 4
    hidden_dim = 3
    output_dim = 4

    train(x_seq, targets, input_dim, hidden_dim, output_dim)


Epoch 0: Loss = 1.3863
Epoch 100: Loss = 1.3842
Epoch 200: Loss = 1.3536
Epoch 300: Loss = 1.0474
Epoch 400: Loss = 0.5458
Epoch 500: Loss = 0.2979
Epoch 600: Loss = 0.1881
Epoch 700: Loss = 0.1326
Epoch 800: Loss = 0.1008
Epoch 900: Loss = 0.0805
