In [4]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import torch
from torch import nn
from torch.utils.data import Dataset

data = pd.read_csv("../../data/clean_weather.csv", index_col=0)
data = data.ffill()

In [5]:
PREDICTORS = ["tmax", "tmin", "rain"]
TARGET = "tmax_tomorrow"

scaler = StandardScaler()
data[PREDICTORS] = scaler.fit_transform(data[PREDICTORS])

split_data = np.split(data, [int(.7*len(data)), int(.85*len(data))])
(train_x, train_y), (valid_x, valid_y), (test_x, test_y) = [[d[PREDICTORS].to_numpy(), d[[TARGET]].to_numpy()] for d in split_data]

In [16]:
class WeatherDataset(Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        return torch.from_numpy(self.x[idx]).float(), torch.from_numpy(self.y[idx]).float()

In [25]:
from torch.utils.data import DataLoader

BATCH_SIZE = 32
EPOCHS = 20

train_dataset = WeatherDataset(train_x, train_y)
valid_dataset = WeatherDataset(valid_x, valid_y)

train = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
valid = DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [26]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.dense = nn.Linear(len(PREDICTORS), 1)

    def forward(self, x):
        return self.dense(x)

In [35]:
epochs=50

model = NeuralNetwork()
loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In [36]:
for epoch in range(epochs):
    epoch_loss = 0

    for batch, (x, y) in enumerate(train):
        optimizer.zero_grad()
        pred = model(x)

        loss = loss_fn(pred, y)
        epoch_loss += loss.item()

        loss.backward()
        optimizer.step()

    if epoch % 10 == 0:
        with torch.no_grad():
            valid_loss = 0
            for batch, (x, y) in enumerate(valid):
                pred = model(x)
                loss = loss_fn(pred, y)
                valid_loss += loss.item()

        print(f"Epoch: {epoch} Train MSE: {epoch_loss/len(train)} Valid MSE: {valid_loss/len(valid)}")

Epoch: 0 Train MSE: 2594.5309386382232 Valid MSE: 1499.397117614746
Epoch: 10 Train MSE: 22.527406446031623 Valid MSE: 22.04008938372135
Epoch: 20 Train MSE: 22.13951975912661 Valid MSE: 20.99353812634945
Epoch: 30 Train MSE: 22.167372345924377 Valid MSE: 21.082484543323517
Epoch: 40 Train MSE: 22.142251172581233 Valid MSE: 21.01604261994362
