In [1]:
import pandas as pd

In [2]:
from sklearn.metrics import mean_absolute_error

def time_series_mae(y_true, y_pred):
    """
    Calculate Mean Absolute Error (MAE) for time series cross validation.

    Parameters:
    y_true (array-like): Original/actual values.
    y_pred (array-like): Model predicted values.

    Returns:
    float: MAE value.
    """
    return mean_absolute_error(y_true, y_pred)

In [3]:
data = pd.read_csv(r'C:\Users\hp\Documents\GitHub\Forecast_Treasury_Curve\Dataset\final_feature_library_all_features.csv')

In [4]:
data['Spread'] = data['USGG10YR_mean'] - data['USGG2YR_mean']

In [6]:
# Split the time series into training and test sets (e.g., last 12 months as test)
test_size = 36
train = data['Spread'][:-test_size]
test = data['Spread'][-test_size:]

print(f"Training set length: {len(train)}")
print(f"Test set length: {len(test)}")

Training set length: 267
Test set length: 36


In [8]:
from sklearn.model_selection import TimeSeriesSplit
import numpy as np

# Naive forecast for time series cross-validation

tscv = TimeSeriesSplit(n_splits=5)
mae_scores = []

spread = test.values

for train_index, test_index in tscv.split(spread):
    y_train, y_test = spread[train_index], spread[test_index]
    # Naive forecast: predict each test value as the last value of the train set
    y_pred = np.full_like(y_test, fill_value=y_train[-1], dtype=np.float64)
    mae = time_series_mae(y_test, y_pred)
    mae_scores.append(mae)

print(f"MAE scores for each fold: {mae_scores}")
print(f"Average MAE: {np.mean(mae_scores)}")

MAE scores for each fold: [0.30222209846916304, 0.14135950702678968, 0.30990410471171126, 0.17054479358805627, 0.12320418266516049]
Average MAE: 0.20944693729217617


In [11]:
# Drift method forecast for time series cross-validation

mae_scores_drift = []

for train_index, test_index in tscv.split(spread):
    y_train, y_test = spread[train_index], spread[test_index]
    n_train = len(y_train)
    drift = (y_train[-1] - y_train[0]) / (n_train - 1) if n_train > 1 else 0
    # Forecast for each test point: y_hat_t = y_train[-1] + drift * (t+1)
    y_pred = np.array([y_train[-1] + drift * (i + 1) for i in range(len(y_test))])
    mae = time_series_mae(y_test, y_pred)
    mae_scores_drift.append(mae)

print(f"Drift method MAE scores for each fold: {mae_scores_drift}")
print(f"Drift method average MAE: {np.mean(mae_scores_drift)}")

Drift method MAE scores for each fold: [0.11370144409937939, 0.1922973073906776, 0.4942070458881815, 0.24855191574126576, 0.14020533209044775]
Drift method average MAE: 0.2377926090419904


In [23]:
from statsmodels.tsa.holtwinters import SimpleExpSmoothing

# Fit Simple Exponential Smoothing (SES) on training set and select best smoothing_level using MAE
best_mae = np.inf
best_alpha = None
alphas = np.linspace(0.01, 1, 20)
for alpha in alphas:
    try:
        ses_model = SimpleExpSmoothing(train).fit(smoothing_level=alpha, optimized=False)
        y_pred_train = ses_model.fittedvalues
        mae_train = mean_absolute_error(train, y_pred_train)
        if mae_train < best_mae:
            best_mae = mae_train
            best_alpha = alpha
    except Exception:
        continue

print(f"Best smoothing_level (alpha): {best_alpha:.2f} (Train MAE={best_mae:.4f})")

# Fit SES with best_alpha on train, forecast test set
ses_model = SimpleExpSmoothing(train).fit(smoothing_level=best_alpha, optimized=False)
y_forecast = ses_model.forecast(len(test))
mae_test = mean_absolute_error(test, y_forecast)

print(f"Test set MAE using SES: {mae_test:.4f}")

Best smoothing_level (alpha): 1.00 (Train MAE=0.1066)
Test set MAE using SES: 0.5399


In [26]:
# Try different window sizes to find the best rolling average parameter
best_mae_rolling = np.inf
best_window = None
for window in range(2, 21):  # Try window sizes from 2 to 20
    rolling_mean = train.rolling(window=window).mean()
    if rolling_mean.dropna().empty:
        continue
    last_rolling_avg = rolling_mean.dropna().iloc[-1]
    y_forecast_rolling = np.full(len(test), last_rolling_avg)
    mae_test_rolling = mean_absolute_error(test, y_forecast_rolling)
    if mae_test_rolling < best_mae_rolling:
        best_mae_rolling = mae_test_rolling
        best_window = window

print(f"Best window size for rolling average: {best_window} (Test MAE={best_mae_rolling:.4f})")


Best window size for rolling average: 2 (Test MAE=0.6628)


In [27]:
# Forecast with best window
rolling_mean = train.rolling(window=best_window).mean()
last_rolling_avg = rolling_mean.dropna().iloc[-1]
y_forecast_rolling = np.full(len(test), last_rolling_avg)
mae_test_rolling = mean_absolute_error(test, y_forecast_rolling)
print(f"Test MAE using rolling average (window={best_window}): {mae_test_rolling:.4f}")

Test MAE using rolling average (window=2): 0.6628
