In [27]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

data = pd.read_csv("../../data/clean_weather.csv")
data = data.ffill()

In [28]:
PREDICTORS = ["tmax", "tmin", "rain"]
TARGET = "tmax_tomorrow"

scaler = StandardScaler()
data[PREDICTORS] = scaler.fit_transform(data[PREDICTORS])

split_data = np.split(data, [int(.7*len(data)), int(.85*len(data))])
(train_x, train_y), (valid_x, valid_y), (test_x, test_y) = [[d[PREDICTORS].to_numpy(), d[[TARGET]].to_numpy()] for d in split_data]

In [29]:
# Rnn
# Input -> hidden
# hidden -> hidden
# hidden -> output

In [30]:
def init_params(layer_conf):
    layers = []
    for i in range(len(layer_conf)):
        if layer_conf[i]["type"] == "input":
            continue
        elif layer_conf[i]["type"] == "rnn":
            input_weights = np.random.rand(layer_conf[i-1]["units"], layer_conf[i]["hidden"])

            hidden_weights = np.random.rand(layer_conf[i]["hidden"], layer_conf[i]["hidden"])
            hidden_bias = np.random.rand(1, layer_conf[i]["hidden"])

            output_weights = np.random.rand(layer_conf[i]["hidden"], layer_conf[i]["output"])
            output_bias = np.random.rand(1, layer_conf[i]["output"])

            layers.append(
                [[input_weights], [hidden_weights, hidden_bias], [output_weights, output_bias]]
            )
    return layers

In [31]:
def forward(params, x, layer_conf):
    hiddens = []
    outputs = []
    for i in range(len(params)):
        if layer_conf[i+1]["type"] == "rnn":
            [i_weight], [h_weight, h_bias], [o_weight, o_bias] = params[i]
            hidden = np.zeros((x.shape[0], i_weight.shape[1]))
            output = np.zeros((x.shape[0], o_weight.shape[1]))
            for j in range(x.shape[0]):
                input_x = x[j,:] @ i_weight
                hidden_x = input_x + hidden[max(j-1,0),:] @ h_weight + h_bias
                # Activation.  tanh avoids outputs getting larger and larger.
                hidden_x = np.tanh(hidden_x)
                # Store hidden for use in backprop
                hidden[j,:] = hidden_x.copy()

                # Activation
                output_x = hidden_x @ o_weight + o_bias
                output[j,:] = output_x.copy()
            hiddens.append(hidden)
            outputs.append(output)
    return hiddens, outputs

In [32]:
def mse(actual, predicted):
    return np.mean((actual-predicted)**2)

def mse_grad(actual, predicted):
    return (predicted - actual)

In [33]:
def backward(params, x, lr, grad, hiddens, layer_conf):
    for i in range(len(params)):
        if layer_conf[i+1]["type"] == "rnn":
            [i_weight], [h_weight, h_bias], [o_weight, o_bias] = params[i]
            hidden = hiddens[i]
            next_h_grad = None
            i_weight_grad, h_weight_grad, h_bias_grad, o_weight_grad, o_bias_grad = [0] * 5

            for j in range(x.shape[0] - 1, -1, -1):
                out = grad[j,:][:,np.newaxis]
                ho_grad = o_weight @ out
                if x.shape[0] - 1 > j:
                    tanh_deriv_next = 1 - hidden[j+1] ** 2
                    hh_grad = h_weight.T @ np.diag(tanh_deriv_next) @ next_h_grad
                    h_grad = ho_grad + hh_grad
                else:
                    h_grad = ho_grad

                next_h_grad = h_grad.copy()
                tanh_deriv = 1 - hidden[j] ** 2

                if j > 0:
                    h_bias_grad += (np.diag(tanh_deriv) @ h_grad).T
                    h_weight_grad += (np.diag(tanh_deriv) @ h_grad @ hidden[j-1][:,np.newaxis].T).T

                o_bias_grad += out
                o_weight_grad += (out @ hidden[j][:,np.newaxis].T).T

                i_weight_grad += (np.diag(tanh_deriv) @ h_grad @ x[j,:][:,np.newaxis].T).T

            i_weight -= i_weight_grad * lr
            h_weight -= h_weight_grad * lr
            h_bias -= h_bias_grad * lr
            o_weight -= o_weight_grad * lr
            o_bias -= o_bias_grad * lr
            params[i] = [[i_weight], [h_weight, h_bias], [o_weight, o_bias]]
    return params

In [44]:
epochs = 10
lr = 1e-5
sequence_len = 14

layer_conf = [
    {"type":"input", "units": 3},
    {"type": "rnn", "hidden": 10, "output": 1}
]
params = init_params(layer_conf)

for i in range(epochs):
    for j in range(train_x.shape[0] - sequence_len):
        seq_x = train_x[j:(j+sequence_len),]
        seq_y = train_y[j:(j+sequence_len),]
        hiddens, outputs = forward(params, seq_x, layer_conf)
        grad = mse_grad(seq_y, outputs[0])
        params = backward(params, seq_x, lr, grad, hiddens, layer_conf)

    _, outputs = forward(params, valid_x, layer_conf)
    loss = mse(valid_y, outputs[0])

    print(f"Epoch: {i} valid loss {loss}")

Epoch: 0 valid loss 58.5310506179591
Epoch: 1 valid loss 29.754882415847124
Epoch: 2 valid loss 27.2513681715272
Epoch: 3 valid loss 25.59929246629181
Epoch: 4 valid loss 28.737270165094312
Epoch: 5 valid loss 32.271596542941204
Epoch: 6 valid loss 33.42286643624139
Epoch: 7 valid loss 35.316524713856985
Epoch: 8 valid loss 35.029367041892925
Epoch: 9 valid loss 34.33764930373408


In [36]:
outputs

[array([[59.63853951],
        [58.42592827],
        [59.23404144],
        [60.17906532],
        [58.97861198],
        [59.29876741],
        [59.09136193],
        [59.46308188],
        [61.09376653],
        [67.24164872]])]

In [None]:
"""
x = train_x[:10,]
i = 0
lr = 1e-4

[i_weight], [h_weight, h_bias], [o_weight, o_bias] = params[i]
hidden = hiddens[i]
h_grads = np.zeros((x.shape[0], h_weight.shape[1]))
next_h_grad = None
i_weight_grad, h_weight_grad, h_bias_grad, o_weight_grad, o_bias_grad = [0] * 5

for j in range(x.shape[0] - 1, -1, -1):
    out = grad[j,:][:,np.newaxis]
    ho_grad = o_weight @ out
    if x.shape[0] - 1 > j:
        tanh_deriv_next = 1 - hidden[j+1] ** 2
        hh_grad = h_weight.T @ np.diag(tanh_deriv_next) @ next_h_grad
        h_grad = ho_grad + hh_grad
    else:
        h_grad = ho_grad

    next_h_grad = h_grad.copy()
    tanh_deriv = 1 - hidden[j] ** 2

    if j > 0:
        h_bias_grad += (np.diag(tanh_deriv) @ h_grad).T
        h_weight_grad += (np.diag(tanh_deriv) @ h_grad @ hidden[j-1][:,np.newaxis].T).T

    o_bias_grad += out
    o_weight_grad += (out @ hidden[j][:,np.newaxis].T).T

    i_weight_grad += (np.diag(tanh_deriv) @ h_grad @ x[j,:][:,np.newaxis].T).T

i_weight -= i_weight_grad * lr
h_weight -= h_weight_grad * lr
h_bias -= h_bias_grad * lr
o_weight -= o_weight_grad * lr
o_bias -= o_bias_grad * lr
"""