In [319]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from statistics import mean
import math

data = pd.read_csv("../../data/clean_weather.csv")
data = data.ffill()

In [320]:
PREDICTORS = ["tmax", "tmin", "rain"]
TARGET = "tmax_tomorrow"

scaler = StandardScaler()
data[PREDICTORS] = scaler.fit_transform(data[PREDICTORS])

np.random.seed(0)
split_data = np.split(data, [int(.7*len(data)), int(.85*len(data))])
(train_x, train_y), (valid_x, valid_y), (test_x, test_y) = [[d[PREDICTORS].to_numpy(), d[[TARGET]].to_numpy()] for d in split_data]

In [321]:
# Rnn
# Input -> hidden
# hidden -> hidden
# hidden -> output

In [322]:
def init_params(layer_conf):
    layers = []
    for i in range(len(layer_conf)):
        if layer_conf[i]["type"] == "input":
            continue
        elif layer_conf[i]["type"] == "rnn":
            np.random.seed(0)
            k = 1/math.sqrt(layer_conf[i]["hidden"])
            input_weights = np.random.rand(layer_conf[i-1]["units"], layer_conf[i]["hidden"]) * 2 * k - k

            hidden_weights = np.random.rand(layer_conf[i]["hidden"], layer_conf[i]["hidden"]) * 2 * k - k
            hidden_bias = np.random.rand(1, layer_conf[i]["hidden"]) * 2 * k - k

            output_weights = np.random.rand(layer_conf[i]["hidden"], layer_conf[i]["output"]) * 2 * k - k
            output_bias = np.random.rand(1, layer_conf[i]["output"]) * 2 * k - k

            layers.append(
                [[input_weights], [hidden_weights, hidden_bias], [output_weights, output_bias]]
            )
    return layers

In [323]:
def forward(params, x, layer_conf):
    hiddens = []
    outputs = []
    for i in range(len(params)):
        if layer_conf[i+1]["type"] == "rnn":
            [i_weight], [h_weight, h_bias], [o_weight, o_bias] = params[i]
            hidden = np.zeros((x.shape[0], i_weight.shape[1]))
            output = np.zeros((x.shape[0], o_weight.shape[1]))
            for j in range(x.shape[0]):
                input_x = x[j,:] @ i_weight
                hidden_x = input_x + hidden[max(j-1,0),:] @ h_weight + h_bias
                # Activation.  tanh avoids outputs getting larger and larger.
                hidden_x = np.tanh(hidden_x)
                # Store hidden for use in backprop
                hidden[j,:] = hidden_x.copy()

                # Activation
                output_x = hidden_x @ o_weight + o_bias
                output[j,:] = output_x.copy()
            hiddens.append(hidden)
            outputs.append(output)
    return hiddens, outputs

In [324]:
def mse(actual, predicted):
    return np.mean((actual-predicted)**2)

def mse_grad(actual, predicted):
    return (predicted - actual)

In [325]:
def backward(params, x, lr, grad, hiddens, layer_conf):
    for i in range(len(params)):
        if layer_conf[i+1]["type"] == "rnn":
            [i_weight], [h_weight, h_bias], [o_weight, o_bias] = params[i]
            hidden = hiddens[i]
            next_h_grad = None
            i_weight_grad, h_weight_grad, h_bias_grad, o_weight_grad, o_bias_grad = [0] * 5

            for j in range(x.shape[0] - 1, -1, -1):
                # 1,1
                out_grad = grad[j,:][:,np.newaxis]

                # Output updates
                # (1,1 @ 1,3).T = 3,1
                o_weight_grad += (out_grad @ hidden[j,:][np.newaxis, :]).T
                # 1,1
                o_bias_grad += out_grad

                # Propagate gradient to hidden unit
                # (3,1 @ 1,1).T = 1,3
                ho_grad = (o_weight @ out_grad).T

                if j < x.shape[0] - 1:
                    tanh_deriv_next = 1 - hidden[j+1] ** 2
                    # 1,3 @ 3,3 @ 3,3
                    hh_grad = next_h_grad @ np.diag(tanh_deriv_next) @ h_weight
                    h_grad = hh_grad + ho_grad
                else:
                    h_grad = ho_grad

                next_h_grad = h_grad.copy()

                tanh_deriv = 1 - hidden[j] ** 2
                # 1,3.T @ (1,3 @ 3,3)
                i_weight_grad += x[j,:][:,np.newaxis] @ (h_grad @ np.diag(tanh_deriv))

                if j > 0:
                    # (1,3 @ 3,3).T = 3,1 @ 1,3
                    # Turn this to match the correct grad values with the associated weights
                    h_weight_grad += ((h_grad @ np.diag(tanh_deriv)).T @ hidden[j-1][np.newaxis,:]).T
                    # (1,3) @ 3,3
                    h_bias_grad += h_grad @ np.diag(tanh_deriv)

            i_weight -= i_weight_grad * lr
            h_weight -= h_weight_grad * lr
            h_bias -= h_bias_grad * lr
            o_weight -= o_weight_grad * lr
            o_bias -= o_bias_grad * lr
            params[i] = [[i_weight], [h_weight, h_bias], [o_weight, o_bias]]
    return params

In [328]:
epochs = 50
lr = 1e-5


layer_conf = [
    {"type":"input", "units": 3},
    {"type": "rnn", "hidden": 4, "output": 1}
]
params = init_params(layer_conf)

for i in range(epochs):
    sequence_len = 7
    for j in range(train_x.shape[0] - sequence_len):
        seq_x = train_x[j:(j+sequence_len),]
        seq_y = train_y[j:(j+sequence_len),]
        hiddens, outputs = forward(params, seq_x, layer_conf)
        grad = mse_grad(seq_y, outputs[0])
        params = backward(params, seq_x, lr, grad, hiddens, layer_conf)

    sequence_len = 7
    losses = []
    for j in range(valid_x.shape[0] - sequence_len):
        seq_x = valid_x[j:(j+sequence_len),]
        seq_y = valid_y[j:(j+sequence_len),]
        _, outputs = forward(params, seq_x, layer_conf)
        losses.append(mse(seq_y, outputs[0]))

    print(f"Epoch: {i} valid loss {mean(losses)}")

Epoch: 0 valid loss 89.86535111568139
Epoch: 1 valid loss 74.38209988279957
Epoch: 2 valid loss 46.68417143453307
Epoch: 3 valid loss 35.27479367955674
Epoch: 4 valid loss 31.616366595603132
Epoch: 5 valid loss 30.51672686115938
Epoch: 6 valid loss 29.53137483865339
Epoch: 7 valid loss 28.4821828286481
Epoch: 8 valid loss 27.915303477724308
Epoch: 9 valid loss 27.7105958059442
Epoch: 10 valid loss 27.798556917758656
Epoch: 11 valid loss 28.69006176689853
Epoch: 12 valid loss 29.77378069212401
Epoch: 13 valid loss 30.006155465845147
Epoch: 14 valid loss 29.912103897964553
Epoch: 15 valid loss 29.96274696364353
Epoch: 16 valid loss 30.119110347572366
Epoch: 17 valid loss 30.310023343212126
Epoch: 18 valid loss 30.495203347830774
Epoch: 19 valid loss 30.656271606191556


KeyboardInterrupt: 