In [14]:
import numpy as np

# Set random seed for reproducibility
np.random.seed(42)a

# Define the activation function and its derivative
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

# Initialize hyperparameters
input_size = 2
hidden_size = 3
output_size = 1
learning_rate = 0.01
epochs = 1000

# Initialize weights and biases
Wxh = np.random.randn(input_size, hidden_size) * 0.01  # input to hidden
Whh = np.random.randn(hidden_size, hidden_size) * 0.01  # hidden to hidden
Why = np.random.randn(hidden_size, output_size) * 0.01  # hidden to output
bh = np.zeros((1, hidden_size))  # hidden bias
by = np.zeros((1, output_size))  # output bias

# Forward pass
def forward_pass(X, h_prev):
    hs, ys = [], []
    h = h_prev
    for t in range(len(X)):
        h = sigmoid(np.dot(X[t], Wxh) + np.dot(h, Whh) + bh)  # hidden state
        y = sigmoid(np.dot(h, Why) + by)  # output
        hs.append(h)
        ys.append(y)
    return np.array(hs), np.array(ys), h

# Compute loss
def compute_loss(Y, ys):
    return 0.5 * np.sum((Y - ys) ** 2)

# Backward pass
def backward_pass(X, Y, hs, ys, h_prev):
    global Wxh, Whh, Why, bh, by
    dWxh, dWhh, dWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
    dbh, dby = np.zeros_like(bh), np.zeros_like(by)
    dh_next = np.zeros_like(hs[0])

    for t in reversed(range(len(X))):
        dy = ys[t] - Y[t]
        dWhy += np.dot(hs[t].reshape(-1, 1), dy)
        dby += dy
        dh = np.dot(dy, Why.T) + dh_next
        dh_raw = sigmoid_derivative(hs[t]) * dh
        dbh += dh_raw
        dWxh += np.dot(X[t].reshape(-1, 1), dh_raw)
        if t != 0:
            dWhh += np.dot(hs[t-1].reshape(-1, 1), dh_raw)
        else:
            dWhh += np.dot(h_prev.reshape(-1, 1), dh_raw)
        dh_next = np.dot(dh_raw, Whh.T)

    # Gradient clipping to prevent exploding gradients
    for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
        np.clip(dparam, -1, 1, out=dparam)

    # Update weights and biases
    Wxh -= learning_rate * dWxh
    Whh -= learning_rate * dWhh
    Why -= learning_rate * dWhy
    bh -= learning_rate * dbh
    by -= learning_rate * dby

# Sample data
X = np.array([
    [1, 2],
    [2, 3],
    [3, 4],
    [4, 5]
])

Y = np.array([
    [0],
    [1],
    [0],
    [1]
])

# Training the RNN
h_prev = np.zeros((1, hidden_size))  # initial hidden state

for epoch in range(epochs):
    hs, ys, h_prev = forward_pass(X, h_prev)
    loss = compute_loss(Y, ys)
    backward_pass(X, Y, hs, ys, h_prev)
    if epoch % 100 == 0:
        print(f'Epoch {epoch}, Loss: {loss}')

# Making predictions
def predict(X):
    h_prev = np.zeros((1, hidden_size))  # initial hidden state
    _, ys, _ = forward_pass(X, h_prev)
    return ys

# Sample prediction
X_new = np.array([
    [5, 6],
    [6, 7]
])

predictions = predict(X_new)
print("Predictions:")
print(predictions)


Epoch 0, Loss: 2.0000202660359845
Epoch 100, Loss: 2.0000000014709256
Epoch 200, Loss: 2.0000005293458534
Epoch 300, Loss: 2.0000007760873237
Epoch 400, Loss: 2.000001138365745
Epoch 500, Loss: 2.000002160899667
Epoch 600, Loss: 2.000005252243332
Epoch 700, Loss: 2.000014909146844
Epoch 800, Loss: 2.0000445300506584
Epoch 900, Loss: 2.0001274411842243
Predictions:
[[[0.51420625]]

 [[0.51752195]]]


In [15]:
import numpy as np

# Set random seed for reproducibility
np.random.seed(42)

# Define the activation function and its derivative
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

# Initialize hyperparameters
input_size = 2      # Number of input features
hidden_dim = 100    # Hidden layer size
output_dim = 80     # Number of unique words in the vocabulary
learning_rate = 0.01
epochs = 1000

# Initialize weights and biases
Wxh = np.random.uniform(0, 1, (input_size, hidden_dim))         # input to hidden
Whh = np.random.uniform(0, 1, (hidden_dim, hidden_dim))         # hidden to hidden
Why = np.random.uniform(0, 1, (hidden_dim, output_dim))         # hidden to output
bh = np.zeros((1, hidden_dim))                                  # hidden bias
by = np.zeros((1, output_dim))                                  # output bias

# Forward pass
def forward_pass(X, h_prev):
    hs, ys = [], []
    h = h_prev
    for t in range(len(X)):
        h = sigmoid(np.dot(X[t], Wxh) + np.dot(h, Whh) + bh)  # hidden state
        y = sigmoid(np.dot(h, Why) + by)  # output
        hs.append(h)
        ys.append(y)
    return np.array(hs), np.array(ys), h

# Compute loss
def compute_loss(Y, ys):
    return 0.5 * np.sum((Y - ys) ** 2)

# Backward pass
def backward_pass(X, Y, hs, ys, h_prev):
    global Wxh, Whh, Why, bh, by
    dWxh, dWhh, dWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
    dbh, dby = np.zeros_like(bh), np.zeros_like(by)
    dh_next = np.zeros_like(hs[0])

    for t in reversed(range(len(X))):
        dy = ys[t] - Y[t]
        dWhy += np.dot(hs[t].T, dy)
        dby += dy
        dh = np.dot(dy, Why.T) + dh_next
        dh_raw = sigmoid_derivative(hs[t]) * dh
        dbh += dh_raw
        dWxh += np.dot(X[t].reshape(-1, 1), dh_raw.reshape(1, -1))
        if t != 0:
            dWhh += np.dot(hs[t-1].T, dh_raw)
        else:
            dWhh += np.dot(h_prev.T, dh_raw)
        dh_next = np.dot(dh_raw, Whh.T)

    # Gradient clipping to prevent exploding gradients
    for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
        np.clip(dparam, -1, 1, out=dparam)

    # Update weights and biases
    Wxh -= learning_rate * dWxh
    Whh -= learning_rate * dWhh
    Why -= learning_rate * dWhy
    bh -= learning_rate * dbh
    by -= learning_rate * dby

# Sample data
X = np.array([
    [1, 2],
    [2, 3],
    [3, 4],
    [4, 5]
])

Y = np.zeros((4, output_dim))  # Sample target output with the shape (4, 80)

# For demonstration, we'll set some random target outputs
Y[1][10] = 1
Y[3][20] = 1

# Training the RNN
h_prev = np.zeros((1, hidden_dim))  # initial hidden state

for epoch in range(epochs):
    hs, ys, h_prev = forward_pass(X, h_prev)
    loss = compute_loss(Y, ys)
    backward_pass(X, Y, hs, ys, h_prev)
    if epoch % 100 == 0:
        print(f'Epoch {epoch}, Loss: {loss}')

# Making predictions
def predict(X):
    h_prev = np.zeros((1, hidden_dim))  # initial hidden state
    _, ys, _ = forward_pass(X, h_prev)
    return ys

# Sample prediction
X_new = np.array([
    [5, 6],
    [6, 7]
])

predictions = predict(X_new)
print("Predictions:")
print(predictions)


Epoch 0, Loss: 636.0
Epoch 100, Loss: 3.0147836318468806
Epoch 200, Loss: 3.0016705204946925
Epoch 300, Loss: 3.0006047184006155
Epoch 400, Loss: 3.0003093698977406
Epoch 500, Loss: 3.0001874554842365
Epoch 600, Loss: 3.0001256242545717
Epoch 700, Loss: 3.000090014983956
Epoch 800, Loss: 3.000067651636078
Epoch 900, Loss: 3.000052694646399
Predictions:
[[[4.64772794e-04 3.64919280e-04 3.61392048e-04 3.43196787e-04
   3.38379246e-04 2.47938500e-04 3.21399764e-04 2.78308806e-04
   4.41324920e-04 3.54342972e-04 3.04295433e-01 3.35455008e-04
   3.28791588e-04 4.26942227e-04 3.51885986e-04 3.46303167e-04
   3.18824836e-04 2.24743182e-04 3.61248742e-04 3.47248646e-04
   2.67127483e-01 4.48779168e-04 3.86545178e-04 3.73710602e-04
   3.46941040e-04 3.50624755e-04 2.94908848e-04 4.30997650e-04
   3.20177204e-04 4.83900455e-04 3.34429344e-04 3.55071713e-04
   2.96161849e-04 2.63630066e-04 2.67032459e-04 2.70776519e-04
   3.93399087e-04 2.93507130e-04 2.31401410e-04 3.46795801e-04
   2.53442321e-