# Forecasting Library Example Usage

In [1]:
from forecasting_library.datasets import load_caiso_data
from forecasting_library.forecasting.evaluation import backtest_model
from forecasting_library.forecasting.forecasting_api import ForecastingAPI
from forecasting_library.forecasting.preprocessing import create_preprocessing_pipeline

### Load Data

In [2]:
data = load_caiso_data()
X = data.drop(columns=["CAISO_system_load"])
y = data["CAISO_system_load"]

### Define Preprocessing Pipeline

In [3]:
categorical_cols = ["hour"]
numeric_cols = [c for c in X if c not in categorical_cols]
preprocessing_pipeline = create_preprocessing_pipeline(
    numeric_cols, categorical_cols, datetime_column="index"
)

### Define Forecasting Model Params

In [4]:
model_type = "xgboost"

# Define the Hyperparameter Grid for the Model
param_grid = {
    "n_estimators": [50, 100],
    "max_depth": [3, 6],
    "learning_rate": [0.01, 0.1],
    "subsample": [0.8, 1.0],
}

### Initialize the ForecastingAPI with the Preprocessing Pipeline

In [5]:
forecasting_api = ForecastingAPI(
    model_type=model_type, preprocessing_pipeline=preprocessing_pipeline
)

### Split training and test data. 
Split off the final month (jul 2023) for test

In [6]:
X_train, X_test, y_train, y_test = forecasting_api.split_train_test(
    data, target_col="CAISO_system_load", split_date="2022-08-01"
)

### Train the Model Using the Complete Training Pipeline 
(from pre jul 2023 data)

In [7]:
forecasting_api.train_pipeline(X_train, y_train, param_grid=param_grid, test_size=0.2)

Fitting 5 folds for each of 16 candidates, totalling 80 fits
Best hyperparameters: {'learning_rate': 0.1, 'max_depth': 6, 'n_estimators': 100, 'subsample': 0.8}


### Make Predictions on test data 

In [8]:
predictions = forecasting_api.forecast(X_test)

### Run Backtesting

In [9]:
import numpy as np
backtest_results = forecasting_api.backtest_model(X, y, n_splits=14, test_size=24)
average_mape = np.mean([r["MAPE"] for r in backtest_results])
print(f"Average Mean Absolute Percent Error (MAPE) during backtesting: {average_mape}")

Average Mean Absolute Percent Error (MAPE) during backtesting: 0.06877690184913757
