## RNN, LSTM, and GRU
**Code by**: Noor de Bruijn \
**Date**: Thursday, November 6th 2025 \
**Task**: Forecasting of electricity consumption (regression)

In [119]:
#Import packages
import numpy as np
import pandas as pd
import torch
import torchmetrics
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import TensorDataset, DataLoader
from tqdm import tqdm

#### I. Import dataset

In [94]:
dataset = pd.read_csv("LD2011_2014.txt", sep=';', index_col=0, parse_dates=True, low_memory=False)

#### II. Create datatime and year variable

In [95]:
dataset = dataset.reset_index()
dataset.rename(columns={"index": "Datetime"}, inplace=True)
dataset["year"] = dataset["Datetime"].dt.year

In [96]:
selection = dataset[['Datetime', 'MT_001', 'year']]
missing_per_year = selection.groupby('year')['MT_001'].apply(lambda x: x.isna().sum())
print(missing_per_year)

year
2011    0
2012    0
2013    0
2014    0
2015    0
Name: MT_001, dtype: int64


#### III. Split data in train_data and test_data

In [97]:
#Split data by year to get training_data (2011-2013) and test_data (2014)
train_data = dataset[dataset["year"].between(2011, 2013)][["Datetime", "MT_001"]].copy()
train_data.rename(columns={"MT_001": "Consumption"}, inplace=True)
print(len(train_data))

test_data = dataset[dataset["year"] == 2014][["Datetime", "MT_001"]].copy()
test_data.rename(columns={"MT_001": "Consumption"}, inplace=True)
print(len(test_data))

105215
35040


#### IV. Count missing values in Consumption column

In [98]:
print(train_data['Consumption'].isna().sum())
print(test_data['Consumption'].isna().sum())

0
0


#### V. Make sure type = float

In [99]:
train_data['Consumption'] = train_data['Consumption'].str.replace(',', '.').astype(float)
test_data['Consumption'] = test_data['Consumption'].str.replace(',', '.').astype(float)

#### VI. Create TensorDataset

In [124]:
seq_length = 96

def create_sequences(df, seq_length):
    xs, xy = [], []
    for i in range(len(df) - seq_length):
        x = df.iloc[i:(i+seq_length), 1]
        y = df.iloc[i+seq_length, 1]
        xs.append(x)
        xy.append(y)
    return np.array(xs), np.array(xy)

X_train, y_train = create_sequences(train_data, seq_length)
print(X_train.shape, y_train.shape)

X_test, y_test = create_sequences(test_data, seq_length)
print(X_test.shape, y_test.shape)

#Convert to float32
X_train = X_train.astype(np.float32)
y_train = y_train.astype(np.float32)
X_test = X_test.astype(np.float32)
y_test = y_test.astype(np.float32)

#Create TensorDataset
dataset_train = TensorDataset(
    torch.from_numpy(X_train),
    torch.from_numpy(y_train),
)

dataset_test = TensorDataset(
    torch.from_numpy(X_test),
    torch.from_numpy(y_test),
)

(105119, 96) (105119,)
(34944, 96) (34944,)


#### VII. Create train_loader and test_loader
**Important**: We do not shuffle the data here. We are working with a dataset where there is a temporal dependency. Shuffling sequences randomly could leak future information into the model or break the natural order it needs to learn.

In [126]:
#Create train_loader
batch_size = 4
train_loader = DataLoader(dataset_train, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(dataset_test, batch_size=batch_size, shuffle=False)

#### Define and run RNN
Recurrent Neural Networks

In [127]:
#Define RNN
class RNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.rnn = nn.RNN(
            input_size = 1,
            hidden_size = 32,
            num_layers = 2,
            batch_first = True,
        )
        self.fc = nn.Linear(32, 1)

    def forward(self, x):
        h0 = torch.zeros(2, x.size(0), 32)
        out, _ = self.rnn(x, h0)
        out = self.fc(out[:, -1, :])
        return out

In [128]:
#Initialize model
RNN_model = RNN()

#Loss function
criterion = nn.MSELoss()

#Optimizer
optimizer = optim.Adam(RNN_model.parameters(), lr=0.001)

In [129]:
#Device set-up
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
RNN_model = RNN_model.to(device)

#Training the RNN
num_epochs = 1

for epoch in range(num_epochs):

    #Set to training mode
    RNN_model.train()
    epoch_loss = 0
    
    for X_batch, y_batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
        X_batch = X_batch.unsqueeze(-1).to(device)
        y_batch = y_batch.to(device)
        
        #Forward pass
        outputs = RNN_model(X_batch)

        #Calculate loss
        loss = criterion(outputs.squeeze(), y_batch)
        
        optimizer.zero_grad()

        #Backward pass
        loss.backward()

        #Update gradients
        optimizer.step()

        #Track loss
        epoch_loss += loss.item()
    
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss/len(train_loader):.4f}")


Epoch 1/1: 100%|█████████████████████████| 26280/26280 [02:41<00:00, 162.77it/s]

Epoch 1/1, Loss: 5.5845





In [130]:
#Evaluation loop
#Metrics tracking
mse = torchmetrics.MeanSquaredError()

RNN_model.eval()

with torch.no_grad():
    for X_batch, y_batch in tqdm(test_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
        X_batch = X_batch.unsqueeze(-1).to(device)
        y_batch = y_batch.to(device)

        outputs = RNN_model(X_batch)

        mse(outputs.squeeze(), y_batch)

print(f"Test MSE: {mse.compute()}")     

Epoch 1/1: 100%|███████████████████████████| 8736/8736 [00:09<00:00, 911.07it/s]

Test MSE: 4.31719970703125





#### II. LSTM
Long Short-Term Memory cell
1. **Forget gate**: What to remove from long-term memory.
2. **Input gate**: What to save to long-term memory.
3. **Output gate**: What to return at the current time step

#### Define and run LSTM

In [None]:
class LSTM(nn.Module):
    def __init__(self, input_size):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size = input_size,
            hidden_size = 32,
            num_layers = 2,
            batch_first = True
        )
        self.fc = nn.Linear(32, 1)

    def forward(self, x):
        h0 = torch.zeros(2, x.size(0), 32)
        c0 = torch.zeros(2, x.size(0), 32)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out    

In [132]:
#Initialize model
LSTM_model = LSTM(input_size=1)

#Loss function
criterion = nn.MSELoss()

#Optimizer
optimizer = optim.Adam(LSTM_model.parameters(), lr=0.001)

In [133]:
#Device set-up
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
LSTM_model = LSTM_model.to(device)

#Quick trial run
num_epochs = 1

for epoch in range(num_epochs):
    
    LSTM_model.train()
    epoch_loss = 0
    
    for X_batch, y_batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
        X_batch = X_batch.unsqueeze(-1).to(device)
        y_batch = y_batch.to(device)

        #Forward pass
        outputs = LSTM_model(X_batch)

        #Calculate loss
        loss = criterion(outputs.squeeze(), y_batch)

        optimizer.zero_grad()

        #Backward pass
        loss.backward()

        #Update gradients
        optimizer.step()

        #Track loss
        epoch_loss += loss.item()
    
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss/len(train_loader):.4f}")

Epoch 1/1: 100%|██████████████████████████| 26280/26280 [07:21<00:00, 59.46it/s]

Epoch 1/1, Loss: 5.6527





In [134]:
#Evaluation loop
#Metrics tracking
mse = torchmetrics.MeanSquaredError()

LSTM_model.eval()

with torch.no_grad():
    for X_batch, y_batch in tqdm(test_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
        X_batch = X_batch.unsqueeze(-1).to(device)
        y_batch = y_batch.to(device)

        outputs = LSTM_model(X_batch)

        mse(outputs.squeeze(), y_batch)

print(f"Test MSE: {mse.compute()}")     

Epoch 1/1: 100%|███████████████████████████| 8736/8736 [00:26<00:00, 329.23it/s]

Test MSE: 4.1954498291015625





#### III. GRU
Gated Recurrent Unit (simplified version of LSTM cell)
1. **Forget gate**
2. **Input gate**

**Important to remember**: No output gate and just one hidden state.

In [138]:
#Code below is almost identical to RNN > we replace nn.RNN with nn.GRU

#Define GRU
class GRU(nn.Module):
    def __init__(self):
        super().__init__()
        self.gru = nn.GRU(
            input_size = 1,
            hidden_size = 32,
            num_layers = 2,
            batch_first = True,
        )
        self.fc = nn.Linear(32, 1)

    def forward(self, x):
        h0 = torch.zeros(2, x.size(0), 32)
        out, _ = self.gru(x, h0)
        out = self.fc(out[:, -1, :])
        return out

In [139]:
#Initialize model
GRU_model = GRU()

#Loss function
criterion = nn.MSELoss()

#Optimizer
optimizer = optim.Adam(GRU_model.parameters(), lr=0.001)

In [140]:
#Device set-up
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
GRU_model = GRU_model.to(device)

#Quick trial run
num_epochs = 1

for epoch in range(num_epochs):

    #Set to training mode
    GRU_model.train()
    epoch_loss = 0
    
    for X_batch, y_batch in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
        X_batch = X_batch.unsqueeze(-1).to(device)
        y_batch = y_batch.to(device)
        
        #Forward pass
        outputs = GRU_model(X_batch)

        #Calculate loss
        loss = criterion(outputs.squeeze(), y_batch)
        
        optimizer.zero_grad()

        #Backward pass
        loss.backward()

        #Update gradients
        optimizer.step()

        #Track loss
        epoch_loss += loss.item()
    
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss/len(train_loader):.4f}")

Epoch 1/1: 100%|██████████████████████████| 26280/26280 [08:08<00:00, 53.80it/s]

Epoch 1/1, Loss: 6.2439





In [141]:
#Evaluation loop
#Metrics tracking
mse = torchmetrics.MeanSquaredError()

GRU_model.eval()

with torch.no_grad():
    for X_batch, y_batch in tqdm(test_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
        X_batch = X_batch.unsqueeze(-1).to(device)
        y_batch = y_batch.to(device)

        outputs = GRU_model(X_batch)

        mse(outputs.squeeze(), y_batch)

print(f"Test MSE: {mse.compute()}")     

Epoch 1/1: 100%|███████████████████████████| 8736/8736 [00:26<00:00, 332.28it/s]

Test MSE: 4.2448344230651855



