<a href="https://colab.research.google.com/github/fabbz03/load_forecasting_TFT/blob/main/test_code_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import pytorch_lightning as pl
import torch
from pytorch_forecasting import TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import QuantileLoss
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor

# Step 1: Load your dataset (replace with your actual dataset path)
data = pd.read_csv("load_forecasting_data.csv")  # Example CSV file

# Step 2: Prepare the dataset
# Ensure that 'time_idx', 'group', and 'target' columns are correctly named.
# 'time_idx' is a sequential index, 'group' is the group identifier (e.g., regions or customers),
# 'target' is the load (energy consumption), and external covariates should also be included.

# Example: assume the following structure in the data
# time_idx (time steps), group (region or customer ID), load (target), and other covariates
data["time_idx"] = pd.to_datetime(data["timestamp"]).astype(int) // 10**9  # Convert timestamp to integer time index

# Step 3: Define the TimeSeriesDataSet
max_encoder_length = 30  # Number of past observations
max_prediction_length = 7  # Number of future steps to predict

# Create the dataset for training
training_cutoff = data["time_idx"].max() - max_prediction_length
training = TimeSeriesDataSet(
    data[lambda x: x.time_idx <= training_cutoff],
    time_idx="time_idx",
    target="load",
    group_ids=["group"],
    min_encoder_length=max_encoder_length // 2,
    max_encoder_length=max_encoder_length,
    max_prediction_length=max_prediction_length,
    static_categoricals=["group"],
    time_varying_known_reals=["time_idx"],
    time_varying_unknown_reals=["load"],  # This is the target we want to predict
    target_normalizer=GroupNormalizer(groups=["group"], transformation="softplus"),
)

# Step 4: Create dataloaders for training and validation
train_dataloader = training.to_dataloader(train=True, batch_size=64, num_workers=4)

# Step 5: Define the TFT Model
tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.03,
    hidden_size=16,  # Size of the internal layers
    attention_head_size=1,
    dropout=0.1,
    hidden_continuous_size=8,
    loss=QuantileLoss(),
    log_interval=10,
    reduce_on_plateau_patience=4,
)

# Step 6: Train the model
# Configure the trainer with early stopping and learning rate monitoring
early_stop_callback = EarlyStopping(monitor="val_loss", patience=10, verbose=False, mode="min")
lr_logger = LearningRateMonitor()

trainer = pl.Trainer(
    max_epochs=30,
    gpus=1 if torch.cuda.is_available() else 0,  # Set GPUs if available
    gradient_clip_val=0.1,
    callbacks=[early_stop_callback, lr_logger],
)

# Fit the model
trainer.fit(tft, train_dataloaders=train_dataloader)

# Step 7: Model Evaluation
# Predicting on new data or test set
test_data = data[lambda x: x.time_idx > training_cutoff]  # Example for testing
predictions = tft.predict(test_data)

print(predictions)


ModuleNotFoundError: No module named 'pytorch_lightning'

In [2]:
!pip install pytorch-lightning


Collecting pytorch-lightning
  Downloading pytorch_lightning-2.4.0-py3-none-any.whl.metadata (21 kB)
Collecting torchmetrics>=0.7.0 (from pytorch-lightning)
  Downloading torchmetrics-1.4.2-py3-none-any.whl.metadata (19 kB)
Collecting lightning-utilities>=0.10.0 (from pytorch-lightning)
  Downloading lightning_utilities-0.11.7-py3-none-any.whl.metadata (5.2 kB)
Downloading pytorch_lightning-2.4.0-py3-none-any.whl (815 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m815.2/815.2 kB[0m [31m24.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading lightning_utilities-0.11.7-py3-none-any.whl (26 kB)
Downloading torchmetrics-1.4.2-py3-none-any.whl (869 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m869.2/869.2 kB[0m [31m39.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: lightning-utilities, torchmetrics, pytorch-lightning
Successfully installed lightning-utilities-0.11.7 pytorch-lightning-2.4.0 torchmetrics-1.4.2


In [4]:

import pandas as pd
import pytorch_lightning as pl
import torch
from pytorch_forecasting import TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import QuantileLoss
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor

# Step 1: Load your dataset (replace with your actual dataset path)
data = pd.read_csv("load_forecasting_data.csv")  # Example CSV file

# Step 2: Prepare the dataset
# Ensure that 'time_idx', 'group', and 'target' columns are correctly named.
# 'time_idx' is a sequential index, 'group' is the group identifier (e.g., regions or customers),
# 'target' is the load (energy consumption), and external covariates should also be included.

# Example: assume the following structure in the data
# time_idx (time steps), group (region or customer ID), load (target), and other covariates
data["time_idx"] = pd.to_datetime(data["timestamp"]).astype(int) // 10**9  # Convert timestamp to integer time index

# Step 3: Define the TimeSeriesDataSet
max_encoder_length = 30  # Number of past observations
max_prediction_length = 7  # Number of future steps to predict

# Create the dataset for training
training_cutoff = data["time_idx"].max() - max_prediction_length
training = TimeSeriesDataSet(
    data[lambda x: x.time_idx <= training_cutoff],
    time_idx="time_idx",
    target="load",
    group_ids=["group"],
    min_encoder_length=max_encoder_length // 2,
    max_encoder_length=max_encoder_length,
    max_prediction_length=max_prediction_length,
    static_categoricals=["group"],
    time_varying_known_reals=["time_idx"],
    time_varying_unknown_reals=["load"],  # This is the target we want to predict
    target_normalizer=GroupNormalizer(groups=["group"], transformation="softplus"),
)

# Step 4: Create dataloaders for training and validation
train_dataloader = training.to_dataloader(train=True, batch_size=64, num_workers=4)

# Step 5: Define the TFT Model
tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.03,
    hidden_size=16,  # Size of the internal layers
    attention_head_size=1,
    dropout=0.1,
    hidden_continuous_size=8,
    loss=QuantileLoss(),
    log_interval=10,
    reduce_on_plateau_patience=4,
)

# Step 6: Train the model
# Configure the trainer with early stopping and learning rate monitoring
early_stop_callback = EarlyStopping(monitor="val_loss", patience=10, verbose=False, mode="min")
lr_logger = LearningRateMonitor()

trainer = pl.Trainer(
    max_epochs=30,
    gpus=1 if torch.cuda.is_available() else 0,  # Set GPUs if available
    gradient_clip_val=0.1,
    callbacks=[early_stop_callback, lr_logger],
)

# Fit the model
trainer.fit(tft, train_dataloaders=train_dataloader)

# Step 7: Model Evaluation
# Predicting on new data or test set
test_data = data[lambda x: x.time_idx > training_cutoff]  # Example for testing
predictions = tft.predict(test_data)

print(predictions)

ModuleNotFoundError: No module named 'pytorch_forecasting'

In [5]:
!pip install pytorch-forecasting


Collecting pytorch-forecasting
  Downloading pytorch_forecasting-1.1.1-py3-none-any.whl.metadata (13 kB)
Collecting lightning<3.0.0,>=2.0.0 (from pytorch-forecasting)
  Downloading lightning-2.4.0-py3-none-any.whl.metadata (38 kB)
Downloading pytorch_forecasting-1.1.1-py3-none-any.whl (177 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m177.6/177.6 kB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading lightning-2.4.0-py3-none-any.whl (810 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m811.0/811.0 kB[0m [31m40.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: lightning, pytorch-forecasting
Successfully installed lightning-2.4.0 pytorch-forecasting-1.1.1


In [6]:

# imports
import pandas as pd
import pytorch_lightning as pl
import torch
from pytorch_forecasting import TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import QuantileLoss
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor

# Step 1: Load your dataset (replace with your actual dataset path)
data = pd.read_csv("load_forecasting_data.csv")  # Example CSV file

# Step 2: Prepare the dataset
# Ensure that 'time_idx', 'group', and 'target' columns are correctly named.
# 'time_idx' is a sequential index, 'group' is the group identifier (e.g., regions or customers),
# 'target' is the load (energy consumption), and external covariates should also be included.

# Example: assume the following structure in the data
# time_idx (time steps), group (region or customer ID), load (target), and other covariates
data["time_idx"] = pd.to_datetime(data["timestamp"]).astype(int) // 10**9  # Convert timestamp to integer time index

# Step 3: Define the TimeSeriesDataSet
max_encoder_length = 30  # Number of past observations
max_prediction_length = 7  # Number of future steps to predict

# Create the dataset for training
training_cutoff = data["time_idx"].max() - max_prediction_length
training = TimeSeriesDataSet(
    data[lambda x: x.time_idx <= training_cutoff],
    time_idx="time_idx",
    target="load",
    group_ids=["group"],
    min_encoder_length=max_encoder_length // 2,
    max_encoder_length=max_encoder_length,
    max_prediction_length=max_prediction_length,
    static_categoricals=["group"],
    time_varying_known_reals=["time_idx"],
    time_varying_unknown_reals=["load"],  # This is the target we want to predict
    target_normalizer=GroupNormalizer(groups=["group"], transformation="softplus"),
)

# Step 4: Create dataloaders for training and validation
train_dataloader = training.to_dataloader(train=True, batch_size=64, num_workers=4)

# Step 5: Define the TFT Model
tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.03,
    hidden_size=16,  # Size of the internal layers
    attention_head_size=1,
    dropout=0.1,
    hidden_continuous_size=8,
    loss=QuantileLoss(),
    log_interval=10,
    reduce_on_plateau_patience=4,
)

# Step 6: Train the model
# Configure the trainer with early stopping and learning rate monitoring
early_stop_callback = EarlyStopping(monitor="val_loss", patience=10, verbose=False, mode="min")
lr_logger = LearningRateMonitor()

trainer = pl.Trainer(
    max_epochs=30,
    gpus=1 if torch.cuda.is_available() else 0,  # Set GPUs if available
    gradient_clip_val=0.1,
    callbacks=[early_stop_callback, lr_logger],
)

# Fit the model
trainer.fit(tft, train_dataloaders=train_dataloader)

# Step 7: Model Evaluation
# Predicting on new data or test set
test_data = data[lambda x: x.time_idx > training_cutoff]  # Example for testing
predictions = tft.predict(test_data)

print(predictions)



FileNotFoundError: [Errno 2] No such file or directory: 'load_forecasting_data.csv'

In [8]:

# imports
import pandas as pd
import pytorch_lightning as pl
import torch
from pytorch_forecasting import TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import QuantileLoss
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor

# Step 1: Load your dataset (replace with your actual dataset path)
data = pd.read_csv("load_forecasting_data.csv")  # Example CSV file

# Step 2: Prepare the dataset
# Ensure that 'time_idx', 'group', and 'target' columns are correctly named.
# 'time_idx' is a sequential index, 'group' is the group identifier (e.g., regions or customers),
# 'target' is the load (energy consumption), and external covariates should also be included.

# Example: assume the following structure in the data
# time_idx (time steps), group (region or customer ID), load (target), and other covariates
data["time_idx"] = pd.to_datetime(data["timestamp"]).astype(int) // 10**9  # Convert timestamp to integer time index

# Step 3: Define the TimeSeriesDataSet
max_encoder_length = 30  # Number of past observations
max_prediction_length = 7  # Number of future steps to predict

# Create the dataset for training
training_cutoff = data["time_idx"].max() - max_prediction_length
training = TimeSeriesDataSet(
    data[lambda x: x.time_idx <= training_cutoff],
    time_idx="time_idx",
    target="load",
    group_ids=["group"],
    min_encoder_length=max_encoder_length // 2,
    max_encoder_length=max_encoder_length,
    max_prediction_length=max_prediction_length,
    static_categoricals=["group"],
    time_varying_known_reals=["time_idx"],
    time_varying_unknown_reals=["load"],  # This is the target we want to predict
    target_normalizer=GroupNormalizer(groups=["group"], transformation="softplus"),
)

# Step 4: Create dataloaders for training and validation
train_dataloader = training.to_dataloader(train=True, batch_size=64, num_workers=4)

# Step 5: Define the TFT Model
tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.03,
    hidden_size=16,  # Size of the internal layers
    attention_head_size=1,
    dropout=0.1,
    hidden_continuous_size=8,
    loss=QuantileLoss(),
    log_interval=10,
    reduce_on_plateau_patience=4,
)

# Step 6: Train the model
# Configure the trainer with early stopping and learning rate monitoring
early_stop_callback = EarlyStopping(monitor="val_loss", patience=10, verbose=False, mode="min")
lr_logger = LearningRateMonitor()

trainer = pl.Trainer(
    max_epochs=30,
    gpus=1 if torch.cuda.is_available() else 0,  # Set GPUs if available
    gradient_clip_val=0.1,
    callbacks=[early_stop_callback, lr_logger],
)

# Fit the model
trainer.fit(tft, train_dataloaders=train_dataloader)

# Step 7: Model Evaluation
# Predicting on new data or test set
test_data = data[lambda x: x.time_idx > training_cutoff]  # Example for testing
predictions = tft.predict(test_data)

print(predictions)

FileNotFoundError: [Errno 2] No such file or directory: 'load_forecasting_data.csv'

# New Section

In [20]:
import pandas as pd

df = pd.read_csv('/content/drive/My Drive/load_forecasting_data.csv') # removed extra indent

In [21]:
df.head()

Unnamed: 0,utc_timestamp,cet_cest_timestamp,ES_load_actual_entsoe_transparency
0,2014-12-31T23:00:00Z,2015-01-01T00:00:00+0100,
1,2015-01-01T00:00:00Z,2015-01-01T01:00:00+0100,
2,2015-01-01T01:00:00Z,2015-01-01T02:00:00+0100,22734.0
3,2015-01-01T02:00:00Z,2015-01-01T03:00:00+0100,21286.0
4,2015-01-01T03:00:00Z,2015-01-01T04:00:00+0100,20264.0
