In [278]:
import pandas as pd
import numpy as np
import math

data = pd.read_csv("../../data/clean_weather.csv", index_col=0)
data = data.ffill()

In [279]:
PREDICTORS = ["tmax", "tmin", "rain"]
TARGET = "tmax_tomorrow"

np.random.seed(0)
split_data = np.split(data, [int(.7 * len(data)), int(.85 * len(data))])
(train_x, train_y), (valid_x, valid_y), (test_x, test_y) = [[d[PREDICTORS].to_numpy(), d[[TARGET]].to_numpy()] for d in
                                                            split_data]

In [280]:
def mse(actual, predicted):
    return np.mean((actual - predicted) ** 2)


def mse_grad(actual, predicted):
    return (predicted - actual)

In [281]:
def init_params(predictors, targets):
    k = math.sqrt(1 / predictors)
    np.random.seed(0)
    weights = np.random.rand(predictors, targets) * 2 * k - k
    biases = np.ones((1, targets)) * 2 * k - k
    return  [weights, biases]

def forward(params, x):
    weights, biases = params
    prediction = x @ weights + biases
    return prediction

def backward(params, x, lr, grad):
    w_grad = (x.T / x.shape[0]) @ grad
    b_grad = np.mean(grad, axis=0)

    params[0] -= w_grad * lr
    params[1] -= b_grad * lr

    return params

In [285]:
lr = 1e-4
epochs = 50000
params = init_params(train_x.shape[1], train_y.shape[1])

for i in range(epochs):
    predictions = forward(params, train_x)
    grad = mse_grad(train_y, predictions)

    params = backward(params, train_x, lr, grad)

    if i % 10000 == 0:
        predictions = forward(params, valid_x)
        valid_loss = mse(valid_y, predictions)

        print(f"Epoch {i} validation loss: {valid_loss}")

Epoch 0 validation loss: 297.27540770706065
Epoch 10000 validation loss: 22.647249675483117
Epoch 20000 validation loss: 22.613777406314743
Epoch 30000 validation loss: 22.581085439005864
Epoch 40000 validation loss: 22.549154158580198


In [None]:
params

In [None]:
predictions = forward(params, test_x)
mse(test_y, predictions)