In [14]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

class RecurrentNeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        np.random.seed(42)
        self.Wx = np.random.randn(hidden_size, input_size) * np.sqrt(2 / (input_size + hidden_size))
        self.Wh = np.random.randn(hidden_size, hidden_size) * np.sqrt(1 / hidden_size)
        self.Wy = np.random.randn(output_size, hidden_size) * np.sqrt(2 / (hidden_size + output_size))
        self.bh = np.zeros((hidden_size, 1))
        self.by = np.zeros((output_size, 1))

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def forward(self, x, h_prev):
        h = np.tanh(self.Wx @ x + self.Wh @ h_prev + self.bh)
        y = self.sigmoid(self.Wy @ h + self.by)
        return y, h
    
    def sigmoid_derivative(self, x):
        return x * (1 - x)

    def tanh_derivative(self, x):
        return 1 - x ** 2

    def backward(self, x, h_prev, y_true, y_pred, h_hist, learning_rate):
        dWy = np.zeros_like(self.Wy)
        dby = np.zeros_like(self.by)

        dh_next = np.zeros_like(h_prev)
        dWh = np.zeros_like(self.Wh)
        dWx = np.zeros_like(self.Wx)
        dbh = np.zeros_like(self.bh)

        for t in reversed(range(len(x))):
            dy = (y_pred[t] - y_true[t]) * self.sigmoid_derivative(y_pred[t])
            dWy += dy @ h_hist[t].T
            dby += dy

            dh = (self.Wy.T @ dy) + dh_next
            dh_raw = dh * self.tanh_derivative(h_hist[t])
            dWh += dh_raw @ (h_hist[t - 1] if t > 0 else h_prev).T
            dWx += dh_raw @ x[t].reshape(-1, 1).T
            dbh += dh_raw
            dh_next = self.Wh.T @ dh_raw

        self.Wy -= learning_rate * dWy
        self.by -= learning_rate * dby
        self.Wh -= learning_rate * dWh
        self.Wx -= learning_rate * dWx
        self.bh -= learning_rate * dbh


input_size, hidden_size, output_size = 1, 5, 1
self = RecurrentNeuralNetwork(input_size, hidden_size, output_size)


In [15]:
# Parameter split_percent defines the ratio of training examples
def get_train_test(url, split_percent=0.8):
    df = pd.read_csv(url, usecols=[1], engine='python')
    data = np.array(df.values.astype('float32'))
    scaler = MinMaxScaler(feature_range=(0, 1))
    data = scaler.fit_transform(data).flatten()
    n = len(data)
    # Point for splitting data into train and test
    split = int(n*split_percent)
    train_data = data[range(split)]
    test_data = data[split:]
    return train_data, test_data, data
 
sunspots_url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/monthly-sunspots.csv'
train_data, test_data, data = get_train_test(sunspots_url)

In [16]:
num_epochs = 1000
learning_rate = 0.01

x = train_data[:-1]  # Input data
y_true = train_data[1:]  # Target data

h_prev = np.zeros((hidden_size, 1))

for epoch in range(num_epochs):
    y_pred, h_hist = [], [h_prev]
    for t in range(len(x)):
        y_t, h_t = self.forward(x[t].reshape(-1, 1), h_hist[-1])
        y_pred.append(y_t)
        h_hist.append(h_t)
    y_pred = np.array(y_pred)

    self.backward(x, h_prev, y_true, y_pred, h_hist, learning_rate)

    if epoch % 100 == 0:
        loss = 0.5 * np.sum((y_true - y_pred) ** 2)
        print(f"Epoch {epoch}, Loss: {loss}")


Epoch 0, Loss: 400119.37406021985
Epoch 100, Loss: 954878.8294869354
Epoch 200, Loss: 955155.3422559198


In [4]:
X

array([[[0.22852638],
        [0.24665089],
        [0.2758077 ],
        ...,
        [0.29944837],
        [0.3506698 ],
        [0.4546887 ]]], dtype=float32)

In [5]:
Y_target

array([[[0.24665089],
        [0.2758077 ],
        [0.21946414],
        ...,
        [0.3506698 ],
        [0.4546887 ],
        [0.4862096 ]]], dtype=float32)

In [6]:
h_prev = np.zeros((hidden_size, 1))

In [7]:
x = X[:,0,:]
y_target = Y_target[:,0,:]

y_pred, h = self.forward(x, h_prev)

In [58]:
dL_dy = y_pred - y_target
dL_dbeta = dL_dy * y_pred * (1 - y_pred)
dL_dWy = h @ dL_dbeta
dL_dby = dL_dbeta
dL_dh = self.Wy.T @ dL_dbeta
dL_dalpha = dL_dh * (1 - h**2)
dL_dWx = dL_dalpha @ x.T
dL_dbx = dL_dalpha

In [61]:
dL_dalpha

array([[-0.01729235],
       [ 0.05386739],
       [-0.00038443],
       [-0.02734253],
       [ 0.02387646]])

In [54]:
dL_dalpha

array([[-0.01729235,  0.05323234, -0.00038789, -0.03039739,  0.02363899],
       [-0.01749865,  0.05386739, -0.00039252, -0.03076003,  0.023921  ],
       [-0.01713792,  0.05275692, -0.00038443, -0.03012591,  0.02342787],
       [-0.01555452,  0.04788262, -0.00034891, -0.02734253,  0.02126333],
       [-0.01746607,  0.05376709, -0.00039179, -0.03070275,  0.02387646]])

In [53]:
dL_dWx = dL_dalpha @ x.T

In [46]:
dL_dbeta

array([[0.02911074]])

In [48]:
h

array([[ 0.11302725],
       [-0.03158653],
       [ 0.14694241],
       [ 0.33464728],
       [-0.05345921]])

In [49]:
self.Wy.T @ dL_dbeta

array([[-0.01751613],
       [ 0.05392119],
       [-0.00039291],
       [-0.03079075],
       [ 0.02394489]])

In [42]:
dL_dbeta

array([[0.02911074]])

In [30]:
dL_dbeta = dL_dy * y_pred * (1 - y_pred)

array([[0.1246985]])

In [31]:
dy_dWy = h * y_pred * (1 - y_pred)
dy_dby = y_pred * (1 - y_pred)

In [35]:
dL_dWy = dy_dWy @ dL_dy
dL_dby = dL_dy * dy_dby

In [40]:
self.Wy

array([[-0.60170661,  1.85227818, -0.01349722, -1.05771093,  0.82254491]])

In [39]:
dL_dh = dL_dy *  y_pred * (1 - y_pred) * self.Wy

In [32]:
dy_dby

array([[0.23344902]])

In [None]:
dL_dy = y_pred - y_target
dy_dWy = h * y_pred * (1 - y_pred)
dy_dby = y_pred * (1 - y_pred)

dL_dWy = np.dot(dL_dy, dy_dWy.T)
dL_dby = dL_dy * dy_dby
dL_dh = np.dot(self.Wy.T, dL_dy)
dL_da = dL_dh * (1 - h**2)

dL_dWx = np.dot(dL_da, x.T)
dL_dWh = np.dot(dL_da, h_prev.T)
dL_dbh = dL_da

In [26]:
y

array([[0.37134939]])

In [27]:
h

array([[ 0.11302725],
       [-0.03158653],
       [ 0.14694241],
       [ 0.33464728],
       [-0.05345921]])

In [12]:
import numpy as np

time_series = np.sin(np.linspace(0, 10, 100))  # Replace this with your 100 time series values

class SimpleRNN:
    # ... (same as before) ...

# Example usage
n_x, n_h, n_y = 1, 5, 1
rnn = SimpleRNN(n_x, n_h, n_y)
learning_rate = 0.01
n_epochs = 1000

# Prepare the input and target sequences
X = time_series[:-1].reshape(1, -1, 1)  # Exclude the last value
Y_target = time_series[1:].reshape(1, -1, 1)  # Exclude the first value

# Training loop
for epoch in range(n_epochs):
    h_prev = np.zeros((n_h, 1))
    loss = 0

    for t in range(X.shape[1]):
        x = X[:, t, :]
        y_target = Y_target[:, t, :]

        # Forward step
        y, h = rnn.forward_step(x, h_prev)
        h_prev = h

        # Calculate loss (mean squared error)
        loss += np.mean((y - y_target)**2)

        # Backward step
        rnn.backward_step(x, h_prev, y, h, y_target, learning_rate)

    # Average loss for the sequence
    loss /= X.shape[1]

    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")


IndentationError: expected an indented block (3602867221.py, line 9)

In [2]:
# Parameter split_percent defines the ratio of training examples
def get_train_test(url, split_percent=0.8):
    df = pd.read_csv(url, usecols=[1], engine='python')
    data = np.array(df.values.astype('float32'))
    scaler = MinMaxScaler(feature_range=(0, 1))
    data = scaler.fit_transform(data).flatten()
    n = len(data)
    # Point for splitting data into train and test
    split = int(n*split_percent)
    train_data = data[range(split)]
    test_data = data[split:]
    return train_data, test_data, data
 
sunspots_url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/monthly-sunspots.csv'
train_data, test_data, data = get_train_test(sunspots_url)

In [5]:
# Prepare the input X and target Y
def get_XY(dat, time_steps):
    # Indices of target array
    Y_ind = np.arange(time_steps, len(dat), time_steps)
    Y = dat[Y_ind]
    # Prepare X
    rows_x = len(Y)
    X = dat[range(time_steps*rows_x)]
    X = np.reshape(X, (rows_x, time_steps, 1))    
    return X, Y

time_steps = 12
trainX, trainY = get_XY(train_data, time_steps)
testX, testY = get_XY(test_data, time_steps)

In [9]:
trainX[0]

array([[0.22852638],
       [0.24665089],
       [0.2758077 ],
       [0.21946414],
       [0.33490935],
       [0.3289992 ],
       [0.37352246],
       [0.2612293 ],
       [0.29905435],
       [0.29747832],
       [0.6249015 ],
       [0.33569738]], dtype=float32)

In [8]:
trainY.shape

(187,)

In [10]:
train_data

array([0.22852638, 0.24665089, 0.2758077 , ..., 0.3506698 , 0.4546887 ,
       0.4862096 ], dtype=float32)

In [11]:
trainY[0]

0.28881007