In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

data = pd.read_csv("../../data/clean_weather.csv", index_col=0)
data = data.ffill()

In [2]:
PREDICTORS = ["tmax", "tmin", "rain"]
TARGET = "tmax_tomorrow"

scaler = StandardScaler()
data[PREDICTORS] = scaler.fit_transform(data[PREDICTORS])

split_data = np.split(data, [int(.7*len(data)), int(.85*len(data))])
(train_x, train_y), (valid_x, valid_y), (test_x, test_y) = [[d[PREDICTORS].to_numpy(), d[[TARGET]].to_numpy()] for d in split_data]

In [3]:
def mse(actual, predicted):
    return np.mean((actual-predicted)**2)

def mse_grad(actual, predicted):
    return (predicted - actual)

In [4]:
def init_layers(inputs):
    layers = []
    for i in range(1, len(inputs)):
        layers.append([
            np.random.rand(inputs[i-1], inputs[i]) / 5 - .1,
            np.ones((1,inputs[i]))
        ])
    return layers

In [5]:
def forward(batch, layers):
    hidden = [batch.copy()]
    for i in range(len(layers)):
        batch = np.matmul(batch, layers[i][0]) + layers[i][1]
        hidden.append(batch.copy())
        if i < len(layers) - 1:
            batch = np.maximum(batch, 0)
        
    return layers, batch, hidden

## Hidden

1,3
1,10
1,10

## Output
1,1

## Weights

3,10
10,10
10,1

## Backwards

### Iter 1 - layer 3

* grad 1,1
* w_grad 1,1 * 1,10 = 1,10 .T = 10,1
* grad 10,1 * 1,1 = 10,1 .T = 1,10

### Iter 2 - layer 2

* grad 10,1
* w_grad 10,1 * 1,10 = 10,10
* grad 10,10 10,1 = 10,1 . T = 1,10

## Iter 3 - layer 1

* grad 10,1
* w_grad 10,1 1,3 = 10,3 .T = 3,10
* grad 3,10 10,1 = 3,1 .T = 1,3

In [6]:
def backward(layers, hidden, grad, lr):
    for i in range(len(layers)-1, -1, -1):
        print(f"Layer {i}")
        if i != len(layers) - 1:
            grad = np.multiply(grad, np.heaviside(hidden[i+1], 1))

        grad = grad.T
        print(f"starting grad: {grad.shape}")
        w_grad = np.matmul(grad, hidden[i]).T
        print(f"w_grad: {w_grad.shape}")
        b_grad = grad.T

        layers[i][0] -= (w_grad + layers[i][0] * .01) * lr
        layers[i][1] -= b_grad * lr

        grad = np.matmul(layers[i][0], grad).T
        print(f"ending grad: {grad.shape}")
    return layers

In [None]:
backward(layers, hidden, np.array(loss).reshape(1,1), lr/train_x.shape[0])

In [7]:
layer_conf = [3,10,10,1]
lr = 5e-4
epochs=1

layers = init_layers(layer_conf)

for epoch in range(epochs+1):
    epoch_loss = 0

    for i, row in enumerate(train_x):
        row = row.copy().reshape((1,train_x.shape[1]))
        layers, pred, hidden = forward(row, layers)

        loss = mse_grad(train_y[i,0], pred[0,0])
        epoch_loss += loss ** 2

        layers = backward(layers, hidden, np.array(loss).reshape(1,1), lr/train_x.shape[0])
    
    
    if epoch % 10 == 0:
        _, valid_preds, _ = forward(valid_x, layers)
        
        print(f"Epoch: {epoch} Train MSE: {epoch_loss/train_x.shape[0]} Valid MSE: {mse(valid_preds,valid_y)}")

Layer 2
starting grad: (1, 1)
w_grad: (10, 1)
ending grad: (1, 10)
Layer 1
starting grad: (10, 1)
w_grad: (10, 10)
ending grad: (1, 10)
Layer 0
starting grad: (10, 1)
w_grad: (3, 10)
ending grad: (1, 3)
Layer 2
starting grad: (1, 1)
w_grad: (10, 1)
ending grad: (1, 10)
Layer 1
starting grad: (10, 1)
w_grad: (10, 10)
ending grad: (1, 10)
Layer 0
starting grad: (10, 1)
w_grad: (3, 10)
ending grad: (1, 3)
Layer 2
starting grad: (1, 1)
w_grad: (10, 1)
ending grad: (1, 10)
Layer 1
starting grad: (10, 1)
w_grad: (10, 10)
ending grad: (1, 10)
Layer 0
starting grad: (10, 1)
w_grad: (3, 10)
ending grad: (1, 3)
Layer 2
starting grad: (1, 1)
w_grad: (10, 1)
ending grad: (1, 10)
Layer 1
starting grad: (10, 1)
w_grad: (10, 10)
ending grad: (1, 10)
Layer 0
starting grad: (10, 1)
w_grad: (3, 10)
ending grad: (1, 3)
Layer 2
starting grad: (1, 1)
w_grad: (10, 1)
ending grad: (1, 10)
Layer 1
starting grad: (10, 1)
w_grad: (10, 10)
ending grad: (1, 10)
Layer 0
starting grad: (10, 1)
w_grad: (3, 10)
endin

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [8]:
_, test_preds, _ = forward(test_x, layers)
mse(test_preds,test_y)

23.52496690609325