In [16]:
import pandas as pd
import numpy as np
import torch
from pytorch_forecasting import TimeSeriesDataSet, TemporalFusionTransformer
from pytorch_forecasting.data import GroupNormalizer
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import EarlyStopping
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error

# Load CSV
df = pd.read_csv("final-modified.csv")  # change path as needed


In [17]:
# Fix data types for categoricals
df["PLATFORM"] = df["PLATFORM"].astype(str)
df["PRACTICE"] = df["PRACTICE"].astype(str)
df["HORIZON"] = df["HORIZON"].astype(str)


In [18]:

# Add ID and time_idx
df["ID"] = df["PLATFORM"].astype(str) + "_" + df["PRACTICE"].astype(str) + "_" + df["HORIZON"].astype(str)
df["YEAR-MONTH"] = pd.to_datetime(df["YEAR-MONTH"])
df = df.sort_values(by=["ID", "YEAR-MONTH"])
df["time_idx"] = df.groupby("ID")["YEAR-MONTH"].rank(method="dense").astype(int) - 1

# Split based on TRAIN/TEST column
train_df = df[df["TRAIN/TEST"] == "TRAIN"]
val_df = df[df["TRAIN/TEST"] == "VALIDATION"]
test_df = df[df["TRAIN/TEST"] == "TEST"]

# Set up model parameters
max_encoder_length = 5
max_prediction_length = 1  # Monthly data, 1-step ahead


In [19]:

# Identify features
static_categoricals = ["PLATFORM", "PRACTICE", "HORIZON"]
known_reals = ["time_idx"] + [col for col in df.columns if "__regressor" in col or "Expected_Inflation" in col]
unknown_reals = ["ACTUAL"]

# Create training dataset
training = TimeSeriesDataSet(
    train_df,
    time_idx="time_idx",
    target="ACTUAL",
    group_ids=["ID"],
    max_encoder_length=max_encoder_length,
    max_prediction_length=max_prediction_length,
    static_categoricals=static_categoricals,
    time_varying_known_reals=known_reals,
    time_varying_unknown_reals=unknown_reals,
    target_normalizer=GroupNormalizer(groups=["ID"]),
)

# Create validation and test datasets
validation = training.from_parameters(training.get_parameters(), val_df)
test = training.from_parameters(training.get_parameters(), test_df)


In [23]:

# Dataloaders
train_dataloader = training.to_dataloader(train=True, batch_size=8, num_workers=0)
val_dataloader = validation.to_dataloader(train=False, batch_size=8, num_workers=0)
test_dataloader = test.to_dataloader(train=False, batch_size=8, num_workers=0)

from pytorch_forecasting.metrics import QuantileLoss

tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=0.03,
    hidden_size=16,
    attention_head_size=1,
    dropout=0.1,
    loss=QuantileLoss(),
    log_interval=10,
    log_val_interval=1,
)


# Trainer
trainer = Trainer(
    max_epochs=50,
    gradient_clip_val=0.1,
    callbacks=[EarlyStopping(monitor="val_loss", patience=5)],
)




GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [24]:
# Train the model
trainer.fit(tft, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader)



TypeError: `model` must be a `LightningModule` or `torch._dynamo.OptimizedModule`, got `TemporalFusionTransformer`

In [None]:
# Predict on test set
predictions = tft.predict(test_dataloader, return_y=True)
y_pred = predictions[0].numpy()
y_true = predictions[1].numpy()

# Evaluation metrics
def calculate_mape(y_true, y_pred):
    mask = y_true != 0
    return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100

print("Test Results:")
print("RMSE:", np.sqrt(mean_squared_error(y_true, y_pred)))
print("MAE:", mean_absolute_error(y_true, y_pred))
print("R2:", r2_score(y_true, y_pred))
print("MAPE:", calculate_mape(y_true, y_pred))

In [None]:

# Train the model
