In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

data = pd.read_csv("clean_weather.csv", index_col=0)
data = data.ffill()

In [2]:
PREDICTORS = ["tmax", "tmin", "rain"]
TARGET = "tmax_tomorrow"

scaler = StandardScaler()
data[PREDICTORS] = scaler.fit_transform(data[PREDICTORS])

split_data = np.split(data, [int(.7*len(data)), int(.85*len(data))])
(train_x, train_y), (valid_x, valid_y), (test_x, test_y) = [[d[PREDICTORS].to_numpy(), d[[TARGET]].to_numpy()] for d in split_data]

In [3]:
def mse(actual, predicted):
    return np.mean((actual-predicted)**2)

def mse_grad(actual, predicted):
    return (predicted - actual)

# Shape

## Inputs

1,3

1,10
1,10
1,1

## Weights

3,10
10,10
10,1


In [4]:
def init_layers(inputs):
    layers = []
    for i in range(1, len(inputs)):
        layers.append([
            np.random.rand(inputs[i-1], inputs[i]) / 5 - .1,
            np.ones((1,inputs[i]))
        ])
    return layers

In [5]:
def forward(batch, layers):
    hidden = [batch.copy()]
    for i in range(len(layers)):
        batch = np.matmul(batch, layers[i][0]) + layers[i][1]
        hidden.append(batch.copy())
        if i < len(layers) - 1:
            batch = np.maximum(batch, 0)
        
    return layers, batch, hidden

In [6]:
def backward(layers, hidden, grad, lr):
    for i in range(len(layers)-1, -1, -1):
        if i != len(layers) - 1:
            grad = np.multiply(grad, np.heaviside(hidden[i+1], 1))
        
        grad = grad.T
        w_grad = np.matmul(grad, hidden[i]).T
        b_grad = grad.T
        
        layers[i][0] -= (w_grad + layers[i][0] * .01) * lr
        layers[i][1] -= b_grad * lr
        
        grad = np.matmul(layers[i][0], grad).T
    return layers

In [7]:
layer_conf = [3,10,10,1]
lr = 5e-4
epochs=70

layers = init_layers(layer_conf)

for epoch in range(epochs+1):
    epoch_loss = 0

    for i, row in enumerate(train_x):
        row = row.copy().reshape((1,train_x.shape[1]))
        layers, pred, hidden = forward(row, layers)

        loss = mse_grad(train_y[i,0], pred[0,0])
        epoch_loss += loss ** 2

        layers = backward(layers, hidden, np.array(loss).reshape(1,1), lr/train_x.shape[0])
    
    
    if epoch % 10 == 0:
        _, valid_preds, _ = forward(valid_x, layers)
        
        print(f"Epoch: {epoch} Train MSE: {epoch_loss/train_x.shape[0]} Valid MSE: {mse(valid_preds,valid_y)}")

Epoch: 0 Train MSE: 4165.963038631907 Valid MSE: 4397.895101297194
Epoch: 10 Train MSE: 3316.984073975768 Valid MSE: 3456.46281889481
Epoch: 20 Train MSE: 700.4861748365349 Valid MSE: 669.5895849267408
Epoch: 30 Train MSE: 31.99195938937586 Valid MSE: 32.05093658222849
Epoch: 40 Train MSE: 24.590538881856975 Valid MSE: 23.19375265557206
Epoch: 50 Train MSE: 23.371466740021315 Valid MSE: 22.25158140472315
Epoch: 60 Train MSE: 22.8752792973375 Valid MSE: 21.851917797369588
Epoch: 70 Train MSE: 22.603357662118192 Valid MSE: 21.596051634087654


In [8]:
_, test_preds, _ = forward(test_x, layers)
mse(test_preds,test_y)

23.52496690609325