In [1]:
import torch
import torch.nn as nn
import torchinfo
import numpy as np
import pandas as pd

# Visualize dataset

In [3]:
data_path = "dataset/temp.csv"
data = pd.read_csv(data_path)
data.head()

Unnamed: 0.1,Unnamed: 0,Temperature (C)
0,0,9.472222
1,1,9.355556
2,2,9.377778
3,3,8.288889
4,4,8.755556


In [4]:
len(data["Temperature (C)"])

96453

In [8]:
type(data["Temperature (C)"].values)

numpy.ndarray

# Define models

## RNN

In [56]:
class MyRNN(nn.Module):
    def __init__(self, 
                 sequence_length = 5,
                 embed_dim = 3,
                 hidden_dim = 16,
                 num_layers=2):
        super(MyRNN, self).__init__()
        self.fc1 = nn.Linear(sequence_length, embed_dim)
        self.fc2 = nn.Linear(hidden_dim, 1)
        self.relu = nn.ReLU()
        self.rnn = nn.RNN(embed_dim, hidden_dim, num_layers, batch_first=True)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.rnn(x)
        x = self.relu(x)
        x = self.fc2(x)

        return x

In [57]:
sample_rnn = MyRNN()

In [59]:
import torchinfo
torchinfo.summary(sample_rnn, input=(1, 5, 1))

Layer (type:depth-idx)                   Param #
MyRNN                                    --
├─Linear: 1-1                            18
├─Linear: 1-2                            17
├─ReLU: 1-3                              --
├─RNN: 1-4                               880
Total params: 915
Trainable params: 915
Non-trainable params: 0

# Define pytorch dataset and dataloader

In [37]:
import torch
from torch.utils.data import Dataset, DataLoader

class WeatherDataset(Dataset):
    def __init__(self, data, sequence_length):
        # data: numpy array data
        self.data = data
        self.sequence_length = sequence_length

    def __len__(self):
        return len(self.data) - self.sequence_length
    
    def __getitem__(self, index):
        start_index = index
        end_index = index + self.sequence_length

        sequence_data = self.data[start_index:end_index]
        target = self.data[end_index+1]

        # convert to torch tensor
        sequence_data = torch.FloatTensor(sequence_data)
        target = torch.FloatTensor([target])
  
        return sequence_data, target

In [60]:
sample_data = data["Temperature (C)"].values
sample_dataset = WeatherDataset(data=sample_data, sequence_length=5)

print(next(iter(sample_dataset)))

(tensor([9.4722, 9.3556, 9.3778, 8.2889, 8.7556]), tensor([7.7333]))


# Train - Val model

## Split dataset

In [41]:
from sklearn.model_selection import train_test_split

all_data = data['Temperature (C)'].values
train_data, val_data = train_test_split(all_data, test_size=0.2, shuffle=False)
len(train_data), len(val_data)

(77162, 19291)

## Create dataloader

In [43]:
LAG = 5 # forecasting sequence_length
BATCH_SIZE = 64

train_dataset = WeatherDataset(train_data, sequence_length=LAG)
val_dataset = WeatherDataset(val_data, sequence_length=LAG)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

## Train

In [None]:
num_epochs = 300
# Define your loss function and instantiate the model
criterion = nn.MSELoss()
model = MyRNN()

# Instantiate your optimizer (e.g., stochastic gradient descent)
optimizer = optim.SGD(model.parameters(), lr=0.01)

for epoch in range(num_epochs):
    # Training loop
    for batch_idx, (sequence, target) in enumerate(train_dataloader):
        # Your training code here

    # Validation loop
    with torch.no_grad():
        for batch_idx, (sequence, target) in enumerate(val_dataloader):
            # Your validation code here