# Engergy Forecasting with Deep Learning (Part 1)
* In this Tutorial, we will walk through how a data scientist would train a deep learning model for time series forecasting.
* In the second notebook (y) we will take the same code and have it in a end-to-end MLOPs pipeline using the Machine Learning Data Management system (MLDM).

* The dataset we are using is x
* The model we are training is a Temporal Convolutional Network


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pytorch_lightning.callbacks import TQDMProgressBar

from darts import TimeSeries, concatenate
from darts.utils.callbacks import TFMProgressBar
from darts.models import TCNModel, RNNModel
from darts.dataprocessing.transformers import Scaler
from darts.utils.timeseries_generation import datetime_attribute_timeseries
from darts.metrics import mape, r2_score
from darts.utils.missing_values import fill_missing_values
from darts.datasets import AirPassengersDataset, SunspotsDataset, EnergyDataset
import warnings
from darts.metrics import mape, mse
from darts.metrics import mape

warnings.filterwarnings("ignore")
def generate_torch_kwargs():
    # run torch models on CPU, and disable progress bars for all model stages except training.
    return {
        "pl_trainer_kwargs": {
            "accelerator": "cpu",
            "callbacks": [TFMProgressBar(enable_train_bar_only=True)],
        }
    }

In [None]:
import pandas as pd
from darts import TimeSeries

dataset_url='data/energy_dataset.csv'
# Load the dataset into a Pandas DataFrame
df3 = pd.read_csv(dataset_url, parse_dates=['time'])
df3.set_index('time', inplace=True)

# Convert a specific column into a TimeSeries object for demonstration
# Example, let's use 'generation fossil gas'
time_series_gas = TimeSeries.from_series(df3['generation fossil gas'])

# Display the first few rows of the time series to confirm successful loading
print(time_series_gas.pd_dataframe().head())


In [None]:
df3_day_avg = (
    df3.groupby(df3.index.astype(str).str.split(" ").str[0]).mean().reset_index()
)
series_en = fill_missing_values(
    TimeSeries.from_dataframe(
        df3_day_avg, "time", ["generation hydro run-of-river and poundage"]
    ),
    "auto",
)

# create train and test splits
train_en, val_en = series_en.split_after(pd.Timestamp("20170901"))

# scale the data
scaler_en = Scaler()
train_en_transformed = scaler_en.fit_transform(train_en)
val_en_transformed = scaler_en.transform(val_en)
series_en_transformed = scaler_en.transform(series_en)

# add the day as a covariate (scaling not required as one-hot-encoded)
day_series = datetime_attribute_timeseries(
    series_en_transformed, attribute="day", one_hot=True
)

plt.figure(figsize=(10, 3))
train_en_transformed.plot(label="train")
val_en_transformed.plot(label="validation")


In [None]:
# Save the transformed training and validation series to files using pickle
import pickle

with open('cache/train_en_transformed.pkl', 'wb') as f:
    pickle.dump(train_en_transformed, f)
with open('cache/val_en_transformed.pkl', 'wb') as f:
    pickle.dump(val_en_transformed, f)
with open('cache/day_series.pkl', 'wb') as f:
    pickle.dump(day_series, f)

In [None]:
# Load the transformed training and validation series from files using pickle
with open('cache/train_en_transformed.pkl', 'rb') as f:
    train_en_transformed = pickle.load(f)
with open('cache/val_en_transformed.pkl', 'rb') as f:
    val_en_transformed = pickle.load(f)
with open('cache/day_series.pkl', 'rb') as f:
    day_series = pickle.load(f)

In [None]:
# Example code to see what loading the transformed training and validation series from files
train_en_transformed2 = TimeSeries.load('train_en_transformed.pkl')
val_en_transformed2 = TimeSeries.load('val_en_transformed.pkl')
# And in the script where you load the datasets
day_series2 = TimeSeries.load('day_series.pkl')

In [None]:
model_name = "TCN_energy"
model_en = TCNModel(
    input_chunk_length=365,
    output_chunk_length=7,
    n_epochs=50,
    dropout=0.2,
    dilation_base=2,
    weight_norm=True,
    kernel_size=5,
    num_filters=8,
    nr_epochs_val_period=1,
    random_state=0,
    save_checkpoints=True,
    model_name=model_name,
    force_reset=True,
    **generate_torch_kwargs()
)

In [None]:
model_en.load_weights("TCN_model.pt")
model_en.n_epochs=1

In [None]:
model_en.fit(
    series=train_en_transformed,
    past_covariates=day_series,
    val_series=val_en_transformed,
    val_past_covariates=day_series,
)

In [None]:
# Training is done, lets save the model
model_en.save("models/TCN_model.pt")


In [None]:
# Here is code to show how we can load a model from weights
model_name = "TCN_energy"
model_en = TCNModel(
    input_chunk_length=365,
    output_chunk_length=7,
    n_epochs=50,
    dropout=0.2,
    dilation_base=2,
    weight_norm=True,
    kernel_size=5,
    num_filters=8,
    nr_epochs_val_period=1,
    random_state=0,
    save_checkpoints=True,
    model_name=model_name,
    force_reset=True,
    **generate_torch_kwargs()
)
model_en.load_weights("TCN_model.pt")
# If we want to finetune with less than the pretraining epochs, we need to change the n_epochs attribute
model_en.n_epochs=2

In [None]:
# Run backtest validation
backtest_en = model_en.historical_forecasts(
    series=series_en_transformed,
    past_covariates=day_series,
    start=val_en_transformed.start_time(),
    forecast_horizon=7,
    stride=7,
    last_points_only=False,
    retrain=False,
    verbose=True,
)
backtest_en = concatenate(backtest_en)

In [None]:
plt.figure(figsize=(10, 6))
val_en_transformed.plot(label="actual")
backtest_en.plot(label="backtest (H=7)")
plt.legend()

In [None]:

# Ensure the model is loaded (you should have executed this part already)
# model_en = TCNModel.load_from_checkpoint(model_name=model_name, best=True)

# Forecast on the training set
backtest_train = model_en.historical_forecasts(
    series=train_en_transformed,
    past_covariates=day_series,
    forecast_horizon=7,
    retrain=False,
    verbose=True,
    overlap_end=False  # Avoid overlapping with validation period
)

# Forecast on the validation set
backtest_val = model_en.historical_forecasts(
    series=val_en_transformed,
    past_covariates=day_series,
    forecast_horizon=7,
    retrain=False,
    verbose=True,
    overlap_end=False
)

# Compute RMSE for the training and validation sets
rmse_train = round(rmse(train_en_transformed, backtest_train),2)
rmse_val = round(rmse(val_en_transformed, backtest_val),2)

print(f"RMSE on Training Set: {rmse_train}")
print(f"RMSE on Validation Set: {rmse_val}")

# Plotting the actual vs. forecasted values
import matplotlib.pyplot as plt

plt.figure(figsize=(15, 5))

# Training set plot
plt.subplot(1, 2, 1)
train_en_transformed.plot(label="Actual - Train")
backtest_train.plot(label="Forecast - Train")
plt.title('Training Set Forecasts '+f'MAPE: {rmse_train}')
plt.legend()

# Validation set plot
plt.subplot(1, 2, 2)
val_en_transformed.plot(label="Actual - Validation")
backtest_val.plot(label="Forecast - Validation")
plt.title('Validation Set Forecasts '+f'MAPE: {rmse_val}')
plt.legend()

plt.show()