# Temporal Fusion Transformer (TFT) Time-Series Analysis of Solar and Weather

This notebook uses the NeuralForecast library with the TFT (Temporal Fusion Transformer) model to capture time-series dynamics for solar power prediction.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import datetime
from IPython.display import display, Markdown

# NeuralForecast imports
from neuralforecast import NeuralForecast
from neuralforecast.models import TFT

# Metrics imports
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

## Configuration

In [None]:
# Determine the current path of the notebook
notebook_path = os.path.abspath("big_project_tft_hourly.ipynb")
notebook_dir = os.path.dirname(notebook_path).replace('\\', '/')
print("Current notebook directory:", notebook_dir)
HOME_DIR = f'{notebook_dir}'
DATA_DIR = f'{HOME_DIR}/data/'
print("Data directory set to:", DATA_DIR)
RAW_DATA_DIR = f'{DATA_DIR}/raw_data/'
TRAIN_DATA_DIR = f'{DATA_DIR}/training_data/'
SQL_DB_PATH = f'{DATA_DIR}/db_sqlite/'
SQL_DB_FILE = f'{SQL_DB_PATH}/big_project_db.sqlite3'
BACKUP_FILE_TYPE = 'feather'  # Options: 'csv', 'feather', 'parquet'

# Plotly setup
plt.style.use('classic')
sns.set_style('whitegrid')

# Solar panel configuration 
SOLAR_SITE_POSITION = (53.6985, -6.2080)  # Bettystown, Ireland
LATITUDE, LONGITUDE = SOLAR_SITE_POSITION
ROOF_PANE_I_ANGLE = 30  # degrees
ROOF_PANE_II_ANGLE = 30  # degrees
ROOF_PANE_I_AZIMUTH = 65  # degrees ( East-South-East)
ROOF_PANE_II_AZIMUTH = 245  # degrees ( West-South-West)
ROOF_PANE_I_COUNT = 7
ROOF_PANE_II_COUNT = 12
SOLAR_PANEL_POWER_RATING_W = 440  # Watts per panel
TOTAL_SOLAR_PANE_I_CAPACITY_W = ROOF_PANE_I_COUNT * SOLAR_PANEL_POWER_RATING_W
TOTAL_SOLAR_PANE_II_CAPACITY_W = ROOF_PANE_II_COUNT * SOLAR_PANEL_POWER_RATING_W
TOTAL_SOLAR_CAPACITY_W = TOTAL_SOLAR_PANE_I_CAPACITY_W + TOTAL_SOLAR_PANE_II_CAPACITY_W

# TFT Model Configuration
FORECAST_HORIZON = 24  # 24-hour forecast horizon
INPUT_SIZE = 48  # Look back 48 hours
MAX_TRAINING_STEPS = 500  # Number of training steps

## Load Training Data

Load the existing `df_daytime_train` dataframe that was used for the Random Forest model.

In [None]:
# Load training data
df_daytime_train = pd.read_feather(f"{TRAIN_DATA_DIR}/hourly_solar_training_data.feather")
df_daytime_test = pd.read_feather(f"{TRAIN_DATA_DIR}/hourly_solar_testing_data.feather")

print(f"Training data shape: {df_daytime_train.shape}")
print(f"Testing data shape: {df_daytime_test.shape}")
print(f"\nTraining data columns: {df_daytime_train.columns.tolist()}")

In [None]:
# Display first few rows and data types
display(df_daytime_train.head())
display(pd.DataFrame({"Columns": df_daytime_train.columns, "Data Types": df_daytime_train.dtypes}))

## Transform Data for TFT Model

Transform the dataframe to the format required by NeuralForecast's TFT model:

1. Create a constant `unique_id` column with value "House_1"
2. Rename `DateTime` to `ds` and `PV(W)` to `y`
3. Select only the required columns (excluding manual lag features)
4. Let the TFT model handle temporal mixing internally

In [None]:
# Transform training data
df_tft_train = df_daytime_train.copy()

# 1. Create unique_id column
df_tft_train['unique_id'] = 'House_1'

# 2. Rename columns for NeuralForecast format
df_tft_train = df_tft_train.rename(columns={
    'DateTime': 'ds',
    'PV(W)': 'y'
})

# 3. Select ONLY the required columns (no manual lag features)
selected_columns = [
    'unique_id',
    'ds',
    'y',
    'Total_Power_ClearSky_Output(W)',
    'Temperature(C)',
    'Humidity(%)',
    'Precipitation(mm)',
    'Wind Speed(m/s)',
    'Month_Sin',
    'Month_Cos',
    'HourOfDay_Sin',
    'HourOfDay_Cos'
]

df_tft_train = df_tft_train[selected_columns]

# Ensure ds is datetime type
df_tft_train['ds'] = pd.to_datetime(df_tft_train['ds'])

# Sort by time
df_tft_train = df_tft_train.sort_values(['unique_id', 'ds']).reset_index(drop=True)

print(f"Transformed training data shape: {df_tft_train.shape}")
print(f"\nColumns in TFT dataframe: {df_tft_train.columns.tolist()}")
display(df_tft_train.head())

In [None]:
# Transform testing data
df_tft_test = df_daytime_test.copy()

# 1. Create unique_id column
df_tft_test['unique_id'] = 'House_1'

# 2. Rename columns for NeuralForecast format
df_tft_test = df_tft_test.rename(columns={
    'DateTime': 'ds',
    'PV(W)': 'y'
})

# 3. Select ONLY the required columns
df_tft_test = df_tft_test[selected_columns]

# Ensure ds is datetime type
df_tft_test['ds'] = pd.to_datetime(df_tft_test['ds'])

# Sort by time
df_tft_test = df_tft_test.sort_values(['unique_id', 'ds']).reset_index(drop=True)

print(f"Transformed testing data shape: {df_tft_test.shape}")
display(df_tft_test.head())

## TFT Model Setup

Configure the TFT (Temporal Fusion Transformer) model with:
- Forecast horizon (h): 24 hours
- Input size: 48 hours lookback
- Future exogenous variables: Weather and time features
- Scaler: Standard scaling
- Max steps: 500 training steps

In [None]:
# Define future exogenous variables (weather and time features)
# These are the "known" features that will be available at prediction time
futr_exog_list = [
    'Total_Power_ClearSky_Output(W)',
    'Temperature(C)',
    'Humidity(%)',
    'Precipitation(mm)',
    'Wind Speed(m/s)',
    'Month_Sin',
    'Month_Cos',
    'HourOfDay_Sin',
    'HourOfDay_Cos'
]

print(f"Future exogenous features ({len(futr_exog_list)}): {futr_exog_list}")

In [None]:
# Initialize TFT model
models = [
    TFT(
        h=FORECAST_HORIZON,              # 24-hour forecast horizon
        input_size=INPUT_SIZE,            # Look back 48 hours
        futr_exog_list=futr_exog_list,   # Future exogenous variables
        scaler_type='standard',           # Standard scaling
        max_steps=MAX_TRAINING_STEPS,     # Training steps
        random_seed=42
    )
]

print("TFT model initialized with:")
print(f"  - Forecast horizon (h): {FORECAST_HORIZON} hours")
print(f"  - Input size: {INPUT_SIZE} hours")
print(f"  - Scaler type: standard")
print(f"  - Max training steps: {MAX_TRAINING_STEPS}")
print(f"  - Number of exogenous features: {len(futr_exog_list)}")

## Train the TFT Model

Initialize the NeuralForecast object with hourly frequency and fit the model on the prepared dataframe.

In [None]:
# Initialize NeuralForecast with hourly frequency
nf = NeuralForecast(
    models=models,
    freq='H'  # Hourly frequency
)

print("NeuralForecast initialized with hourly frequency ('H')")
print("\nStarting model training...")
print("This may take several minutes depending on your hardware.")

# Fit the model on the prepared dataframe
nf.fit(df=df_tft_train)

print("\n✓ Model training completed successfully!")

## Generate Predictions

Create a future dataframe with the next 24 hours of "known" future values (Clear Sky + Weather Forecasts) and generate predictions.

In [None]:
# Create future_df for the next 24 hours
# This should contain the exogenous features for the forecast horizon

# Get the last timestamp from training data
last_timestamp = df_tft_train['ds'].max()
print(f"Last timestamp in training data: {last_timestamp}")

# For demonstration, we'll use the test data as our "future" data
# In a real scenario, you would have actual weather forecasts
future_df = df_tft_test[df_tft_test['ds'] > last_timestamp].copy()

# Take only the first 24 hours for prediction
future_df = future_df.head(FORECAST_HORIZON)

# For NeuralForecast prediction, we need the exogenous features but NOT the target 'y'
# The futr_df should only contain: unique_id, ds, and the exogenous features
futr_df_columns = ['unique_id', 'ds'] + futr_exog_list
futr_df = future_df[futr_df_columns].copy()

print(f"\nFuture dataframe shape: {futr_df.shape}")
print(f"Forecast period: {futr_df['ds'].min()} to {futr_df['ds'].max()}")
print(f"\nFuture dataframe columns: {futr_df.columns.tolist()}")
display(futr_df.head())

In [None]:
# Generate predictions using the trained model
print("Generating predictions for the next 24 hours...")
predictions = nf.predict(futr_df=futr_df)

print("\n✓ Predictions generated successfully!")
print(f"\nPredictions shape: {predictions.shape}")
print(f"Predictions columns: {predictions.columns.tolist()}")
display(predictions.head())

## Visualize Predictions

Compare the predicted values with actual values (if available in test set).

In [None]:
# Merge predictions with actual values from test set
predictions_with_actual = predictions.merge(
    future_df[['ds', 'y']],
    on='ds',
    how='left'
)

# Plot predictions vs actual
plt.figure(figsize=(15, 6))
plt.plot(predictions_with_actual['ds'], predictions_with_actual['y'], 
         label='Actual PV(W)', marker='o', linewidth=2)
plt.plot(predictions_with_actual['ds'], predictions_with_actual['TFT'], 
         label='Predicted PV(W)', marker='s', linewidth=2, linestyle='--')
plt.xlabel('DateTime', fontsize=12)
plt.ylabel('Power Output (W)', fontsize=12)
plt.title('TFT Model: 24-Hour Solar Power Predictions vs Actual', fontsize=14, fontweight='bold')
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# Calculate basic metrics
if 'y' in predictions_with_actual.columns:
    actual = predictions_with_actual['y'].values
    predicted = predictions_with_actual['TFT'].values
    
    mse = mean_squared_error(actual, predicted)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actual, predicted)
    r2 = r2_score(actual, predicted)
    
    print("\n" + "="*50)
    print("TFT Model Performance Metrics (24-hour forecast)")
    print("="*50)
    print(f"Root Mean Squared Error (RMSE): {rmse:.2f} W")
    print(f"Mean Absolute Error (MAE): {mae:.2f} W")
    print(f"R² Score: {r2:.4f}")
    print(f"Mean Actual Power: {actual.mean():.2f} W")
    print(f"Mean Predicted Power: {predicted.mean():.2f} W")
    print("="*50)

## Save Results and Model

Save the predictions and model for later use.

In [None]:
# Create results directory if it doesn't exist
results_dir = f"{HOME_DIR}/results/tft_model/"
os.makedirs(results_dir, exist_ok=True)

# Save predictions to feather file
predictions_file = f"{results_dir}/tft_predictions_24h.feather"
predictions_with_actual.reset_index(drop=True).to_feather(predictions_file)
print(f"Predictions saved to: {predictions_file}")

# Save transformed training data
train_data_file = f"{results_dir}/tft_training_data.feather"
df_tft_train.reset_index(drop=True).to_feather(train_data_file)
print(f"Transformed training data saved to: {train_data_file}")

print("\n✓ All results saved successfully!")

## Summary

This notebook demonstrates the complete workflow for using the Temporal Fusion Transformer (TFT) model for solar power forecasting:

1. **Data Preparation**: Transformed the existing `df_daytime_train` dataframe to the format required by NeuralForecast
2. **Model Configuration**: Set up the TFT model with 24-hour forecast horizon and 48-hour lookback window
3. **Training**: Fitted the model using hourly frequency with weather and time features as exogenous variables
4. **Prediction**: Generated 24-hour ahead forecasts using future weather data
5. **Evaluation**: Visualized and evaluated the model performance

The TFT model can capture complex temporal patterns and relationships between weather conditions and solar power output, potentially providing better forecasts than traditional models like Random Forest for time-series data.