In [25]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from statistics import mean

data = pd.read_csv("../../data/clean_weather.csv", index_col=0)
data = data.ffill()

In [26]:
PREDICTORS = ["tmax", "tmin", "rain"]
TARGET = "tmax_tomorrow"

scaler = StandardScaler()
data[PREDICTORS] = scaler.fit_transform(data[PREDICTORS])

split_data = np.split(data, [int(.7*len(data)), int(.85*len(data))])
(train_x, train_y), (valid_x, valid_y), (test_x, test_y) = [[d[PREDICTORS].to_numpy(), d[[TARGET]].to_numpy()] for d in split_data]

In [27]:
def mse(actual, predicted):
    return np.mean((actual-predicted)**2)

def mse_grad(actual, predicted):
    return (predicted - actual)

In [28]:
def init_layers(inputs):
    layers = []
    for i in range(1, len(inputs)):
        layers.append([
            np.random.rand(inputs[i-1], inputs[i]) / 5 - .1,
            np.ones((1,inputs[i]))
        ])
    return layers

In [32]:
def forward(batch, layers):
    hidden = [batch.copy()]
    for i in range(len(layers)):
        batch = np.matmul(batch, layers[i][0]) + layers[i][1]
        if i < len(layers) - 1:
            batch = np.maximum(batch, 0)
        hidden.append(batch.copy())

    return batch, hidden

## Hidden

1,3
1,10
1,10

## Output
1,1

## Weights

3,10
10,10
10,1

## Backwards

### Iter 1 - layer 3

* grad 1,1
* w_grad 1,1 * 1,10 = 1,10 .T = 10,1
* grad 10,1 * 1,1 = 10,1 .T = 1,10

### Iter 2 - layer 2

* grad 10,1
* w_grad 10,1 * 1,10 = 10,10
* grad 10,10 10,1 = 10,1 . T = 1,10

## Iter 3 - layer 1

* grad 10,1
* w_grad 10,1 1,3 = 10,3 .T = 3,10
* grad 3,10 10,1 = 3,1 .T = 1,3

In [33]:
def backward(layers, hidden, grad, lr):
    for i in range(len(layers)-1, -1, -1):
        if i != len(layers) - 1:
            grad = np.multiply(grad, np.heaviside(hidden[i+1], 0))

        grad = grad.T
        w_grad = np.matmul(grad, hidden[i]).T
        b_grad = np.mean(grad.T, axis=0)

        layers[i][0] -= (w_grad + layers[i][0] * .01) * lr
        layers[i][1] -= b_grad * lr
        grad = np.matmul(layers[i][0], grad).T
    return layers

In [34]:
layer_conf = [3,10,10,1]
lr = 1e-6
epochs=10
batch_size = 8

layers = init_layers(layer_conf)

for epoch in range(epochs):
    epoch_loss = []

    for i in range(0, train_x.shape[0], batch_size):
        x_batch = train_x[i:(i+batch_size)]
        y_batch = train_y[i:(i+batch_size)]
        pred, hidden = forward(x_batch, layers)

        loss = mse_grad(y_batch, pred)
        epoch_loss.append(np.mean(loss ** 2))

        layers = backward(layers, hidden, loss, lr)
    

    valid_preds, _ = forward(valid_x, layers)

    print(f"Epoch: {epoch} Train MSE: {mean(epoch_loss)} Valid MSE: {mse(valid_preds,valid_y)}")

Epoch: 0 Train MSE: 3298.8488090041587 Valid MSE: 1699.191865893834
Epoch: 1 Train MSE: 347.13026842327326 Valid MSE: 25.506153413026723
Epoch: 2 Train MSE: 23.084596669639275 Valid MSE: 21.19942451555908
Epoch: 3 Train MSE: 22.388523703717826 Valid MSE: 20.833607483123387
Epoch: 4 Train MSE: 22.184622048882105 Valid MSE: 20.762662004702822
Epoch: 5 Train MSE: 22.1317309020479 Valid MSE: 20.75264472565585
Epoch: 6 Train MSE: 22.116946070297494 Valid MSE: 20.75349128500077
Epoch: 7 Train MSE: 22.111837612133225 Valid MSE: 20.75512212748549
Epoch: 8 Train MSE: 22.109293717522267 Valid MSE: 20.75600995109669
Epoch: 9 Train MSE: 22.107265980091956 Valid MSE: 20.755970386694667
