In [115]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import os
from pathlib import Path

BASE_DIR = Path(os.getcwd()).parent.parent.parent


In [116]:
data_path = os.path.join(BASE_DIR, 'backend', 'data', 'processed', 'FEATURE_ENGINEERED_DATASET.csv')
data = pd.read_csv(data_path)

In [117]:
data['date'] = pd.to_datetime(data['date'])

# Sort by date to ensure chronological order
data = data.sort_values('date').reset_index(drop=True)

# Create time_idx - required by TimeSeriesDataSet (sequential integer index)
data['time_idx'] = range(len(data))

# Create a dummy group_id for single time series (all rows get same ID)
data['group_id'] = 0

# Create time-based features for static encoding (optional but helpful)
data['year'] = data['date'].dt.year
data['month'] = data['date'].dt.month
data['day'] = data['date'].dt.day
data['hour'] = data['date'].dt.hour
data['day_of_week'] = data['date'].dt.dayofweek

In [118]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10943 entries, 0 to 10942
Data columns (total 69 columns):
 #   Column                                   Non-Null Count  Dtype         
---  ------                                   --------------  -----         
 0   date                                     10943 non-null  datetime64[ns]
 1   load                                     10943 non-null  float64       
 2   temperature_2m                           10943 non-null  float64       
 3   apparent_temperature                     10943 non-null  float64       
 4   relative_humidity_2m                     10943 non-null  float64       
 5   vapour_pressure_deficit                  10943 non-null  float64       
 6   pressure_msl                             10943 non-null  float64       
 7   precipitation                            10943 non-null  float64       
 8   cloud_cover                              10943 non-null  float64       
 9   cloud_cover_low                        

In [119]:
def past_mean(series, window):
    return series.shift(1).rolling(window, min_periods=1).mean()

def past_std(series, window):
    return series.shift(1).rolling(window, min_periods=1).std()
    
def past_sum(series, window):
    return series.shift(1).rolling(window, min_periods=1).sum()

In [120]:
# Shifting all the features to avoid data leakage
# Since we want the current row to only contain info from the past

# load-based
data['load_roll_std_3h']    = past_std(data['load'], 3)
data['load_roll_std_24h']   = past_std(data['load'], 24)
data['load_roll_std_168h']  = past_std(data['load'], 168)
data['load_roll_mean_24h']  = past_mean(data['load'], 24)
data['load_roll_mean_168h'] = past_mean(data['load'], 168)

# wind direction
data['wind_dir_cos_10m_roll_mean_3h'] = past_mean(data['wind_dir_cos_10m'], 3)
data['wind_dir_sin_10m_roll_mean_3h'] = past_mean(data['wind_dir_sin_10m'], 3)

# pressure & temperature
data['pressure_msl_roll_mean_3h']  = past_mean(data['pressure_msl'], 3)
data['pressure_msl_roll_mean_24h'] = past_mean(data['pressure_msl'], 24)
data['temperature_2m_roll_mean_3h']  = past_mean(data['temperature_2m'], 3)
data['temperature_2m_roll_mean_24h'] = past_mean(data['temperature_2m'], 24)

# apparent temp & humidity
data['apparent_temperature_roll_mean_3h']  = past_mean(data['apparent_temperature'], 3)
data['relative_humidity_2m_roll_mean_3h']  = past_mean(data['relative_humidity_2m'], 3)
data['relative_humidity_2m_roll_mean_24h'] = past_mean(data['relative_humidity_2m'], 24)

# VPD
data['vapour_pressure_deficit_roll_mean_3h']  = past_mean(data['vapour_pressure_deficit'], 3)
data['vapour_pressure_deficit_roll_mean_24h'] = past_mean(data['vapour_pressure_deficit'], 24)

# clouds/wind/sun
data['cloud_cover_roll_mean_3h']      = past_mean(data['cloud_cover'], 3)
data['cloud_cover_low_roll_mean_3h']  = past_mean(data['cloud_cover_low'], 3)
data['cloud_cover_mid_roll_mean_3h']  = past_mean(data['cloud_cover_mid'], 3)
data['cloud_cover_high_roll_mean_3h'] = past_mean(data['cloud_cover_high'], 3)
data['wind_gusts_10m_roll_mean_3h']   = past_mean(data['wind_gusts_10m'], 3)
data['wind_speed_10m_roll_mean_3h']   = past_mean(data['wind_speed_10m'], 3)
data['sunshine_duration_roll_mean_3h'] = past_mean(data['sunshine_duration'], 3)
data['et0_fao_evapotranspiration_roll_mean_3h'] = past_mean(data['et0_fao_evapotranspiration'], 3)

# precipitation (sum)
data['precipitation_roll_sum_3h'] = past_sum(data['precipitation'], 3)

# safe lag
data['load_lag_1h']   = data['load'].shift(1)
data['load_lag_2h']   = data['load'].shift(2)
data['load_lag_3h']   = data['load'].shift(3)
data['load_lag_24h']  = data['load'].shift(24)
data['load_lag_25h']  = data['load'].shift(25)
data['load_lag_168h'] = data['load'].shift(168)

data['pressure_msl_lag_24h']            = data['pressure_msl'].shift(24)
data['temperature_2m_lag_24h']          = data['temperature_2m'].shift(24)
data['relative_humidity_2m_lag_1h']     = data['relative_humidity_2m'].shift(1)
data['relative_humidity_2m_lag_24h']    = data['relative_humidity_2m'].shift(24)
data['vapour_pressure_deficit_lag_1h']  = data['vapour_pressure_deficit'].shift(1)
data['vapour_pressure_deficit_lag_24h'] = data['vapour_pressure_deficit'].shift(24)
data['apparent_temperature_lag_24h']    = data['apparent_temperature'].shift(24)

In [121]:
data.dropna(inplace=True)

# Drop date column as TimeSeriesDataSet uses time_idx
data = data.drop(['date'], axis=1)

# Verify we have time_idx and group_id
print(f"Data shape: {data.shape}")
print(f"Has time_idx: {'time_idx' in data.columns}")
print(f"Has group_id: {'group_id' in data.columns}")
print(f"Has load: {'load' in data.columns}")
print(f"\nFirst few columns: {list(data.columns[:10])}")

Data shape: (10775, 68)
Has time_idx: True
Has group_id: True
Has load: True

First few columns: ['load', 'temperature_2m', 'apparent_temperature', 'relative_humidity_2m', 'vapour_pressure_deficit', 'pressure_msl', 'precipitation', 'cloud_cover', 'cloud_cover_low', 'cloud_cover_mid']


In [122]:
from pytorch_forecasting import TimeSeriesDataSet

# Identify all feature columns (exclude target, time_idx, group_id, and time features we'll use as static)
exclude_cols = {'load', 'time_idx', 'group_id', 'year', 'month', 'day', 'hour', 'day_of_week'}
feature_cols = [col for col in data.columns if col not in exclude_cols]

# Separate known future features (can be forecasted/known in advance) from unknown (observed only)
# Known features: weather forecasts, time-based cyclical features, rolling features from past
known_reals = [
    # Weather features (assumed to be forecastable)
    "temperature_2m", "apparent_temperature", "relative_humidity_2m",
    "vapour_pressure_deficit", "pressure_msl", "precipitation",
    "cloud_cover", "cloud_cover_low", "cloud_cover_mid", "cloud_cover_high",
    "et0_fao_evapotranspiration", "sunshine_duration",
    "wind_speed_10m", "wind_gusts_10m",
    # Cyclical time features
    "hour_sin", "hour_cos", "dow_sin", "dow_cos", "month_sin", "month_cos",
    # Wind direction features
    "wind_dir_cos_10m", "wind_dir_sin_10m",
    # Rolling features (derived from past, but computed for future too)
    "wind_dir_cos_10m_roll_mean_3h", "wind_dir_sin_10m_roll_mean_3h",
    "pressure_msl_roll_mean_3h", "pressure_msl_roll_mean_24h",
    "temperature_2m_roll_mean_3h", "temperature_2m_roll_mean_24h",
    "apparent_temperature_roll_mean_3h",
    "relative_humidity_2m_roll_mean_3h", "relative_humidity_2m_roll_mean_24h",
    "vapour_pressure_deficit_roll_mean_3h", "vapour_pressure_deficit_roll_mean_24h",
    "cloud_cover_roll_mean_3h", "cloud_cover_low_roll_mean_3h",
    "cloud_cover_mid_roll_mean_3h", "cloud_cover_high_roll_mean_3h",
    "wind_gusts_10m_roll_mean_3h", "wind_speed_10m_roll_mean_3h",
    "sunshine_duration_roll_mean_3h", "et0_fao_evapotranspiration_roll_mean_3h",
    "precipitation_roll_sum_3h",
    # Lag features (from past observations)
    "pressure_msl_lag_24h", "temperature_2m_lag_24h",
    "relative_humidity_2m_lag_1h", "relative_humidity_2m_lag_24h",
    "vapour_pressure_deficit_lag_1h", "vapour_pressure_deficit_lag_24h",
    "apparent_temperature_lag_24h",
]

# Unknown features: historical load values (observed only, not forecastable)
unknown_reals = [
    "load",  # Target variable (past values only)
    # Load-based features (derived from historical load)
    "load_lag_1h", "load_lag_2h", "load_lag_3h", "load_lag_24h", "load_lag_25h", "load_lag_168h",
    "load_roll_std_3h", "load_roll_std_24h", "load_roll_mean_24h",
    "load_roll_std_168h", "load_roll_mean_168h",
]

# Filter to only include columns that actually exist in the dataframe
known_reals = [col for col in known_reals if col in data.columns]
unknown_reals = [col for col in unknown_reals if col in data.columns]

print(f"Known reals: {len(known_reals)} features")
print(f"Unknown reals: {len(unknown_reals)} features")

# Create TimeSeriesDataSet
max_encoder_length = 168  # 1 week of historical data
max_prediction_length = 24  # Predict 24 hours ahead

training_cutoff = data["time_idx"].max() - max_prediction_length

training = TimeSeriesDataSet(
    data[data.time_idx <= training_cutoff],
    time_idx="time_idx",
    target="load",
    group_ids=["group_id"], 
    min_encoder_length=max_encoder_length // 2,  # Minimum encoder length
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,
    static_categoricals=[],  # No static categorical features
    static_reals=[],  # Can add static real-valued features if needed
    time_varying_known_reals=known_reals,
    time_varying_unknown_reals=unknown_reals,
    target_normalizer=None,  # Will use default GroupNormalizer
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
)

# Create validation dataset
validation = TimeSeriesDataSet.from_dataset(training, data, predict=True, stop_randomization=True)

print(f"\nTraining samples: {len(training)}")
print(f"Validation samples: {len(validation)}")

Known reals: 49 features
Unknown reals: 12 features

Training samples: 10774
Validation samples: 1


In [123]:
# Create dataloaders
batch_size = 64  # Adjust based on your GPU memory

train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)

print(f"Batch size: {batch_size}")
print(f"Train batches: {len(train_dataloader)}")
print(f"Validation batches: {len(val_dataloader)}")

Batch size: 64
Train batches: 168
Validation batches: 1


In [124]:
from pytorch_forecasting import TemporalFusionTransformer
from pytorch_forecasting.metrics import QuantileLoss
import torch

# Configure TFT model
tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.03,
    hidden_size=16,  # Reduced for faster training, increase to 64-128 for better performance
    attention_head_size=1,
    dropout=0.1,
    hidden_continuous_size=8,  # Size for processing continuous variables
    output_size=7,  # 7 quantiles by default: [0.02, 0.1, 0.25, 0.5, 0.75, 0.9, 0.98]
    loss=QuantileLoss(),
    log_interval=10,
    reduce_on_plateau_patience=4,
)

print(f"Model parameters: {tft.size()/1e3:.1f}k")


c:\Users\atmor\OneDrive\Desktop\Repositories\Prenergyze\prenergyze_venv\Lib\site-packages\lightning\pytorch\utilities\parsing.py:210: Attribute 'loss' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['loss'])`.
c:\Users\atmor\OneDrive\Desktop\Repositories\Prenergyze\prenergyze_venv\Lib\site-packages\lightning\pytorch\utilities\parsing.py:210: Attribute 'logging_metrics' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['logging_metrics'])`.


Model parameters: 94.1k


## IMPORTANT: PyTorch Lightning Version Compatibility

**If you encounter errors**, the best solution is to downgrade PyTorch Lightning:
```python
!pip install pytorch-lightning==1.9.0
```

Then restart your kernel and re-run the cells.


In [124]:
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint
import pytorch_lightning as pl
import torch

# Workaround for pytorch-lightning 2.x compatibility with pytorch-forecasting 1.5.0
# The issue: pytorch-lightning 2.x has stricter type checking that rejects TFT
# Solution: Wrap TFT model to pass isinstance checks
pl_version = pl.__version__
major_version = int(pl_version.split('.')[0])

if major_version >= 2:
    # PyTorch Lightning 2.x compatibility workaround
    # The simplest solution: downgrade to pytorch-lightning 1.9.0
    # But if you must use 2.x, we'll use a direct wrapper approach
    print(f"WARNING: PyTorch Lightning {pl_version} has compatibility issues with pytorch-forecasting 1.5.0")
    print("For best results, downgrade to pytorch-lightning==1.9.0:")
    print("  pip install pytorch-lightning==1.9.0")
    print("")
    print("Attempting workaround...")
    
    # Import TFT class
    from pytorch_forecasting import TemporalFusionTransformer
    TFT_TYPE = TemporalFusionTransformer
    
    # Store original fit
    original_fit = Trainer.fit
    
    # Simple wrapper class that properly delegates everything
    class TFTWrapper(pl.LightningModule):
        """Wrapper that makes TFT pass LightningModule checks"""
        def __init__(self, tft_model):
            # Don't call super().__init__() to avoid conflicts
            object.__setattr__(self, '_tft', tft_model)
            # Copy all attributes
            for key, value in tft_model.__dict__.items():
                object.__setattr__(self, key, value)
        
        def __getattribute__(self, name):
            if name == '_tft':
                return object.__getattribute__(self, '_tft')
            if name in ('__dict__', '__class__'):
                return object.__getattribute__(self, name)
            # Delegate to wrapped TFT
            return getattr(self._tft, name)
        
        def __setattr__(self, name, value):
            if name == '_tft':
                object.__setattr__(self, name, value)
                return
            # Set on wrapped TFT
            setattr(self._tft, name, value)
            # Also set on self for attributes that might be checked
            try:
                object.__setattr__(self, name, value)
            except:
                pass
    
    def patched_fit(self, model, *args, **kwargs):
        # Check if it's TFT
        model_type = type(model)
        is_tft = (model_type == TFT_TYPE or 
                  model_type.__name__ == 'TemporalFusionTransformer')
        
        if is_tft:
            # Wrap the TFT model
            wrapped = TFTWrapper(model)
            try:
                return original_fit(self, wrapped, *args, **kwargs)
            except Exception as e:
                print(f"Workaround failed: {e}")
                print("Please downgrade to pytorch-lightning==1.9.0 for compatibility")
                raise
        else:
            return original_fit(self, model, *args, **kwargs)
    
    Trainer.fit = patched_fit
    print("Applied compatibility patch (may not work - consider downgrading)")
else:
    print(f"Using PyTorch Lightning {pl_version} (1.x - no patch needed)")

# Configure trainer with callbacks
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min")
checkpoint_callback = ModelCheckpoint(
    monitor="val_loss",
    mode="min",
    save_top_k=1,
    verbose=False,
)

if major_version >= 2:
    # PyTorch Lightning 2.x
    trainer = Trainer(
        max_epochs=30,
        accelerator="auto",  # Automatically detects GPU if available
        enable_model_summary=True,
        gradient_clip_val=0.1,
        callbacks=[early_stop_callback, checkpoint_callback],
        limit_train_batches=50,  # Limit for faster iteration during development
    )
else:
    # PyTorch Lightning 1.x
    trainer = Trainer(
        max_epochs=30,
        gpus=1 if torch.cuda.is_available() else 0,
        enable_model_summary=True,
        gradient_clip_val=0.1,
        callbacks=[early_stop_callback, checkpoint_callback],
        limit_train_batches=50,
    )

print("Starting training...")
# Use appropriate parameter names based on version
if major_version >= 2:
    trainer.fit(
        tft,
        train_dataloaders=train_dataloader,
        val_dataloaders=val_dataloader,
    )
else:
    trainer.fit(
        tft,
        train_dataloader=train_dataloader,
        val_dataloaders=val_dataloader,
    )

GPU available: False, used: False
TPU available: False, using: 0 TPU cores


For best results, downgrade to pytorch-lightning==1.9.0:
  pip install pytorch-lightning==1.9.0

Attempting workaround...
Applied compatibility patch (may not work - consider downgrading)
Starting training...
Workaround failed: maximum recursion depth exceeded
Please downgrade to pytorch-lightning==1.9.0 for compatibility


RecursionError: maximum recursion depth exceeded

KeyboardInterrupt: 

In [None]:
# Load best model from checkpoint
best_model_path = trainer.checkpoint_callback.best_model_path
print(f"Best model path: {best_model_path}")

# Generate predictions (returns quantiles, use median/0.5 quantile for point predictions)
raw_predictions = tft.predict(val_dataloader)
# Extract median (0.5 quantile) which is at index 3 for 7 quantiles: [0.02, 0.1, 0.25, 0.5, 0.75, 0.9, 0.98]
predictions = raw_predictions[..., 3] if raw_predictions.dim() > 2 else raw_predictions

# Get actual values for comparison
actuals_list = []
for x, (y, weight) in iter(val_dataloader):
    actuals_list.append(y)
actuals = torch.cat(actuals_list, dim=0)

# Calculate metrics
from pytorch_forecasting.metrics import MAE, RMSE, MAPE

mae_metric = MAE()
rmse_metric = RMSE()
mape_metric = MAPE()

mae = mae_metric(predictions, actuals)
rmse = rmse_metric(predictions, actuals)
mape = mape_metric(predictions, actuals)

print(f"\nValidation Metrics:")
print(f"MAE: {mae:.2f}")
print(f"RMSE: {rmse:.2f}")
print(f"MAPE: {mape:.2%}")

# Visualize predictions vs actuals
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
axes = axes.flatten()

for idx in range(min(4, len(predictions))):
    ax = axes[idx]
    actual_vals = actuals[idx].cpu().numpy()
    pred_vals = predictions[idx].cpu().numpy()
    ax.plot(actual_vals, label='Actual', marker='o', alpha=0.7)
    ax.plot(pred_vals, label='Predicted', marker='x', alpha=0.7)
    ax.set_title(f'Sample {idx+1} - MAE: {np.abs(actual_vals - pred_vals).mean():.2f}')
    ax.set_xlabel('Hour ahead')
    ax.set_ylabel('Load')
    ax.legend()
    ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Plot overall prediction accuracy
fig, ax = plt.subplots(1, 1, figsize=(10, 6))
all_actuals = actuals.cpu().numpy().flatten()
all_predictions = predictions.cpu().numpy().flatten()
ax.scatter(all_actuals, all_predictions, alpha=0.5, s=10)
ax.plot([all_actuals.min(), all_actuals.max()], 
        [all_actuals.min(), all_actuals.max()], 'r--', lw=2, label='Perfect prediction')
ax.set_xlabel('Actual Load')
ax.set_ylabel('Predicted Load')
ax.set_title(f'Prediction Accuracy (Overall RMSE: {rmse:.2f})')
ax.legend()
ax.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()