In [128]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import torch

data = pd.read_csv("../../data/clean_weather.csv")
data = data.ffill()

In [129]:
PREDICTORS = ["tmax", "tmin", "rain"]
TARGET = "tmax_tomorrow"

scaler = StandardScaler()
data[PREDICTORS] = scaler.fit_transform(data[PREDICTORS])

np.random.seed(0)
split_data = np.split(data, [int(.7*len(data)), int(.85*len(data))])
(train_x, train_y), (valid_x, valid_y), (test_x, test_y) = [[torch.from_numpy(d[PREDICTORS].to_numpy()), torch.from_numpy(d[[TARGET]].to_numpy())] for d in split_data]

In [134]:
import torch

device = torch.device("cpu")

from torch.utils.data import Dataset

class WeatherDataset(Dataset):
    def __init__(self, x, y):
        self.dataset = [x,y]
        self.sequence_length = 7

    def __len__(self):
        return len(self.dataset[0]) - self.sequence_length

    def __getitem__(self, idx):
        x, y = self.dataset[0][idx:(idx+self.sequence_length)], self.dataset[1][idx:(idx+self.sequence_length)]
        return x.float(), y.float()

In [135]:
from torch.utils.data import DataLoader
from torch import nn

BATCH_SIZE = 1

train_dataset = WeatherDataset(train_x, train_y)
valid_dataset = WeatherDataset(valid_x, valid_y)
test_dataset = WeatherDataset(test_x, test_y)

train = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False)
valid = DataLoader(valid_dataset, batch_size=BATCH_SIZE, shuffle=False)
test = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [136]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.rnn = nn.RNN(3, 4, 1, batch_first=True)
        self.dense = nn.Linear(4, 1)

    def forward(self, x):
        x, hidden = self.rnn(x)
        x = self.dense(x)
        return x

In [138]:
from statistics import mean

model = NeuralNetwork().to(device)
loss_fn = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=5e-5)

EPOCHS = 50
for epoch in range(EPOCHS):
    for batch, (sequence, target) in enumerate(train):
        optimizer.zero_grad()

        sequence = sequence.to(device)
        pred = model(sequence)

        loss = loss_fn(pred, target)
        loss.backward()
        optimizer.step()

    losses = []
    with torch.no_grad():
        for batch, (sequence, target) in enumerate(valid):
            sequence = sequence.to(device)
            pred = model(sequence)
            loss = loss_fn(pred, target)
            losses.append(loss.item())

    print(f"Epoch {epoch} valid loss: {mean(losses)}")

Epoch 0 valid loss: 63.35616726834565
Epoch 1 valid loss: 30.89414867692327
Epoch 2 valid loss: 27.443207366092185
Epoch 3 valid loss: 26.720991076367394
Epoch 4 valid loss: 26.598924968223752
Epoch 5 valid loss: 26.66597429140775
Epoch 6 valid loss: 26.813935584629924
Epoch 7 valid loss: 27.033441203664353
Epoch 8 valid loss: 27.21880263252504
Epoch 9 valid loss: 27.363106326693174
Epoch 10 valid loss: 27.495163866881985
Epoch 11 valid loss: 27.61247335476944
Epoch 12 valid loss: 27.714108669031607
Epoch 13 valid loss: 27.801664669422774
Epoch 14 valid loss: 27.87712899377406
Epoch 15 valid loss: 27.942824626630223
Epoch 16 valid loss: 28.00033164071588
Epoch 17 valid loss: 28.051229641896892
Epoch 18 valid loss: 28.09666900932346
Epoch 19 valid loss: 28.13764070472604
Epoch 20 valid loss: 28.175005164578625
Epoch 21 valid loss: 28.209454651041867
Epoch 22 valid loss: 28.241226136477763
Epoch 23 valid loss: 28.270834976461277
Epoch 24 valid loss: 28.298524421237968
Epoch 25 valid loss