In [69]:
import pandas as pd
import numpy as np
import math

data = pd.read_csv("../../data/clean_weather.csv", index_col=0)
data = data.ffill()

In [70]:
data = data.ffill()
PREDICTORS = ["tmax", "tmin", "rain"]
TARGET = "tmax_tomorrow"

split_data = np.split(data, [int(.7 * len(data)), int(.85 * len(data))])
(train_x, train_y), (valid_x, valid_y), (test_x, test_y) = [[d[PREDICTORS].to_numpy(), d[[TARGET]].to_numpy()] for d in
                                                            split_data]

In [71]:
def mse(actual, predicted):
    return np.mean((actual - predicted) ** 2)


def mse_grad(actual, predicted):
    return (predicted - actual)

In [215]:
def init_params(predictors, targets):
    k = math.sqrt(1 / predictors)
    weights = np.random.rand(predictors, targets) * 2 * k - k
    biases = np.ones((1, targets)) * 2 * k - k
    return  [weights, biases]

def forward(params, x):
    weights, biases = params
    prediction = x @ weights + biases
    return prediction

def backward(params, x, lr, grad):
    w_grad = (x.T @ grad) / x.shape[0] / np.mean(x)
    b_grad = np.mean(grad, axis=0)

    params[0] -= w_grad * lr
    params[1] -= b_grad * lr

    return params

In [225]:
lr = 1e-2
epochs = 40000
params = init_params(train_x.shape[1], train_y.shape[1])

for i in range(epochs):
    predictions = forward(params, train_x)
    grad = mse_grad(train_y, predictions)

    params = backward(params, train_x, lr, grad)

    if i % 2500 == 0:
        predictions = forward(params, valid_x)
        valid_loss = mse(valid_y, predictions)

        print(f"Epoch {i} validation loss: {valid_loss}")

Epoch 0 validation loss: 6571.960202799781
Epoch 2500 validation loss: 22.084500916088228
Epoch 5000 validation loss: 21.805369772490685
Epoch 7500 validation loss: 21.642412830406148
Epoch 10000 validation loss: 21.543413729583673
Epoch 12500 validation loss: 21.480417208763782
Epoch 15000 validation loss: 21.43822793784847
Epoch 17500 validation loss: 21.40842988865444
Epoch 20000 validation loss: 21.3862560754172
Epoch 22500 validation loss: 21.36893614958532
Epoch 25000 validation loss: 21.354814333227946
Epoch 27500 validation loss: 21.34287178668064
Epoch 30000 validation loss: 21.332463334258954
Epoch 32500 validation loss: 21.323169214322775
Epoch 35000 validation loss: 21.314709523518015
Epoch 37500 validation loss: 21.30689349452886


In [226]:
params

[array([[ 0.7263088 ],
        [ 0.17744623],
        [-0.26382715]]),
 array([[8.94199041]])]

In [227]:
predictions = forward(params, test_x)
mse(test_y, predictions)

22.557199730563976