In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
import warnings
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

def load_data(file_path):
    """Load and prepare the time series data"""
    df = pd.read_csv(file_path)
    
    # Convert time column to datetime
    df['time'] = pd.to_datetime(df['time'])
    
    # Set time as index
    df.set_index('time', inplace=True)
    
    return df

def engineer_features(df):
    """Create additional features for modeling"""
    df_copy = df.copy()
    
    # Create lag features
    for i in range(1, 13):
        if 'cpi_mom' in df_copy.columns:
            df_copy[f'cpi_mom_lag_{i}'] = df_copy['cpi_mom'].shift(i)
        if 'cpi_yoy' in df_copy.columns:
            df_copy[f'cpi_yoy_lag_{i}'] = df_copy['cpi_yoy'].shift(i)
    
    # Create rolling window features
    for window in [3, 6, 12]:
        if 'cpi_mom' in df_copy.columns:
            df_copy[f'cpi_mom_rolling_mean_{window}'] = df_copy['cpi_mom'].rolling(window=window).mean()
            df_copy[f'cpi_mom_rolling_std_{window}'] = df_copy['cpi_mom'].rolling(window=window).std()
        if 'cpi_yoy' in df_copy.columns:
            df_copy[f'cpi_yoy_rolling_mean_{window}'] = df_copy['cpi_yoy'].rolling(window=window).mean()
            df_copy[f'cpi_yoy_rolling_std_{window}'] = df_copy['cpi_yoy'].rolling(window=window).std()
    
    # Create economic indicator lag features
    for i in range(1, 4):
        if 'oil_price' in df_copy.columns:
            df_copy[f'oil_price_lag_{i}'] = df_copy['oil_price'].shift(i)
        if 'gold_price' in df_copy.columns:
            df_copy[f'gold_price_lag_{i}'] = df_copy['gold_price'].shift(i)
        if 'interest_rate' in df_copy.columns:
            df_copy[f'interest_rate_lag_{i}'] = df_copy['interest_rate'].shift(i)
    
    # Add month and year as cyclical features
    if 'month' in df_copy.columns:
        df_copy['month_sin'] = np.sin(2 * np.pi * df_copy['month']/12)
        df_copy['month_cos'] = np.cos(2 * np.pi * df_copy['month']/12)
    
    # Create interaction features
    if all(col in df_copy.columns for col in ['oil_price', 'gold_price']):
        df_copy['oil_gold_ratio'] = df_copy['oil_price'] / df_copy['gold_price']
    
    # Drop rows with NaN values (due to lag features)
    df_clean = df_copy.dropna()
    
    return df_clean

def calculate_metrics(actual, predicted):
    """Calculate evaluation metrics"""
    mse = mean_squared_error(actual, predicted)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actual, predicted)
    r2 = r2_score(actual, predicted)
    mape = np.mean(np.abs((actual - predicted) / actual)) * 100
    
    return rmse, mae, mape, r2

def create_sequences(X, y, time_steps=12):
    """
    Create sequences for LSTM input
    
    Parameters:
    -----------
    X : np.array
        Feature array
    y : np.array
        Target array
    time_steps : int
        Number of time steps to use for each sequence
        
    Returns:
    --------
    tuple
        X_seq, y_seq arrays with shape [samples, time_steps, features] and [samples]
    """
    X_seq, y_seq = [], []
    for i in range(len(X) - time_steps):
        X_seq.append(X[i:i + time_steps])
        y_seq.append(y[i + time_steps])
    
    return np.array(X_seq), np.array(y_seq)

def build_lstm_model(input_shape, lstm_units=50, dropout_rate=0.2, learning_rate=0.001):
    """
    Build an LSTM model
    
    Parameters:
    -----------
    input_shape : tuple
        Shape of input data (time_steps, features)
    lstm_units : int
        Number of LSTM units
    dropout_rate : float
        Dropout rate for regularization
    learning_rate : float
        Learning rate for optimizer
        
    Returns:
    --------
    tf.keras.Model
        Compiled LSTM model
    """
    model = Sequential([
        LSTM(lstm_units, activation='relu', return_sequences=True, input_shape=input_shape),
        BatchNormalization(),
        Dropout(dropout_rate),
        LSTM(lstm_units // 2, activation='relu'),
        BatchNormalization(),
        Dropout(dropout_rate),
        Dense(1)
    ])
    
    model.compile(
        optimizer=Adam(learning_rate=learning_rate),
        loss='mse'
    )
    
    return model

def run_lstm(df, target_col, test_size=0.2, time_steps=12, epochs=100, batch_size=32):
    """
    Train and evaluate an LSTM model for time series forecasting
    
    Parameters:
    -----------
    df : pd.DataFrame
        Input dataframe with engineered features
    target_col : str
        Name of the target column
    test_size : float
        Proportion of data to use for testing
    time_steps : int
        Number of time steps to use for each sequence
    epochs : int
        Number of training epochs
    batch_size : int
        Batch size for training
        
    Returns:
    --------
    tuple
        Model, predictions, metrics, history
    """
    print(f"Running LSTM model for {target_col}...")
    
    # Define features and target
    X = df.drop([col for col in ['cpi_mom', 'cpi_yoy', target_col] if col in df.columns], axis=1).values
    y = df[target_col].values
    
    # Scale features and target
    X_scaler = StandardScaler()
    X_scaled = X_scaler.fit_transform(X)
    
    y_scaler = MinMaxScaler()
    y_scaled = y_scaler.fit_transform(y.reshape(-1, 1)).flatten()
    
    # Create sequences
    X_seq, y_seq = create_sequences(X_scaled, y_scaled, time_steps)
    
    # Split data into training and testing sets (time-based split)
    split_idx = int(len(X_seq) * (1 - test_size))
    X_train, X_test = X_seq[:split_idx], X_seq[split_idx:]
    y_train, y_test = y_seq[:split_idx], y_seq[split_idx:]
    
    print(f"Training data: {X_train.shape}, Test data: {X_test.shape}")
    
    # Build model
    model = build_lstm_model(
        input_shape=(X_train.shape[1], X_train.shape[2]),
        lstm_units=64,
        dropout_rate=0.2,
        learning_rate=0.001
    )
    
    # Define callbacks
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=0.0001),
        ModelCheckpoint(f'models/lstm_{target_col}.h5', monitor='val_loss', save_best_only=True)
    ]
    
    # Train model
    print("Training LSTM model...")
    history = model.fit(
        X_train, y_train,
        epochs=epochs,
        batch_size=batch_size,
        validation_split=0.2,
        callbacks=callbacks,
        verbose=1
    )
    
    # Make predictions
    y_pred_train_scaled = model.predict(X_train)
    y_pred_test_scaled = model.predict(X_test)
    
    # Inverse transform predictions
    y_pred_train = y_scaler.inverse_transform(y_pred_train_scaled).flatten()
    y_pred_test = y_scaler.inverse_transform(y_pred_test_scaled).flatten()
    
    # Inverse transform actual values
    y_train_actual = y_scaler.inverse_transform(y_train.reshape(-1, 1)).flatten()
    y_test_actual = y_scaler.inverse_transform(y_test.reshape(-1, 1)).flatten()
    
    # Calculate metrics
    train_rmse, train_mae, train_mape, train_r2 = calculate_metrics(y_train_actual, y_pred_train)
    test_rmse, test_mae, test_mape, test_r2 = calculate_metrics(y_test_actual, y_pred_test)
    
    print(f"Training - RMSE: {train_rmse:.4f}, MAE: {train_mae:.4f}, MAPE: {train_mape:.2f}%, R²: {train_r2:.4f}")
    print(f"Testing - RMSE: {test_rmse:.4f}, MAE: {test_mae:.4f}, MAPE: {test_mape:.2f}%, R²: {test_r2:.4f}")
    
    # Plot training history
    plt.figure(figsize=(12, 6))
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title(f'LSTM Training History for {target_col}')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(f'plots/lstm_training_history_{target_col}.png')
    plt.close()
    
    # Plot actual vs predicted
    # Get the original dates for the test set
    test_dates = df.index[time_steps+split_idx:time_steps+len(X_seq)]
    
    plt.figure(figsize=(14, 7))
    plt.plot(test_dates, y_test_actual, label='Actual', color='blue')
    plt.plot(test_dates, y_pred_test, label='Predicted', color='red', linestyle='--')
    plt.title(f'LSTM: Actual vs Predicted {target_col}')
    plt.xlabel('Date')
    plt.ylabel(target_col)
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(f'plots/lstm_predictions_{target_col}.png')
    plt.close()
    
    # Create a DataFrame with predictions
    predictions = pd.DataFrame({
        'Actual': y_test_actual,
        'Predicted': y_pred_test,
        'Error': y_test_actual - y_pred_test
    }, index=test_dates)
    
    # Plot error distribution
    plt.figure(figsize=(10, 6))
    sns.histplot(predictions['Error'], kde=True)
    plt.title(f'Error Distribution for {target_col}')
    plt.xlabel('Error')
    plt.savefig(f'plots/lstm_error_distribution_{target_col}.png')
    plt.close()
    
    # Return results
    metrics = {
        'train_rmse': train_rmse,
        'train_mae': train_mae,
        'train_mape': train_mape,
        'train_r2': train_r2,
        'test_rmse': test_rmse,
        'test_mae': test_mae,
        'test_mape': test_mape,
        'test_r2': test_r2
    }
    
    return model, predictions, metrics, history, X_scaler, y_scaler

def forecast_future_lstm(model, df, target_col, X_scaler, y_scaler, time_steps=12, forecast_horizon=24):
    """
    Generate future forecasts using the trained LSTM model
    
    Parameters:
    -----------
    model : tf.keras.Model
        Trained LSTM model
    df : pd.DataFrame
        Input dataframe with features
    target_col : str
        Name of the target column
    X_scaler : StandardScaler
        Scaler used for features
    y_scaler : MinMaxScaler
        Scaler used for target
    time_steps : int
        Number of time steps used for training
    forecast_horizon : int
        Number of periods to forecast
        
    Returns:
    --------
    pd.Series
        Forecasted values
    """
    print(f"Generating {forecast_horizon} period forecast...")
    
    # Get the last time_steps data points
    X_last = df.drop([col for col in ['cpi_mom', 'cpi_yoy', target_col] if col in df.columns], axis=1).values[-time_steps:]
    X_last_scaled = X_scaler.transform(X_last)
    
    # Reshape for LSTM input [1, time_steps, features]
    X_last_scaled = X_last_scaled.reshape(1, time_steps, X_last_scaled.shape[1])
    
    # Get the last date in the dataframe
    last_date = df.index[-1]
    
    # Create a list to store forecasts
    forecasts = []
    
    # Generate forecasts recursively
    for i in range(forecast_horizon):
        # Make prediction
        forecast_scaled = model.predict(X_last_scaled)
        
        # Inverse transform prediction
        forecast = y_scaler.inverse_transform(forecast_scaled)[0][0]
        
        # Store forecast
        forecasts.append(forecast)
        
        # For a more accurate implementation, we would need to update all features
        # based on the new forecast, but this is a simplified version
        # Here we just shift the input sequence and append the new prediction
        X_last_scaled = np.roll(X_last_scaled, -1, axis=1)
        X_last_scaled[0, -1, :] = X_last_scaled[0, -2, :]  # Simple copy of the last known features
    
    # Create a Series with the forecasts
    future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=forecast_horizon, freq='MS')
    forecast_series = pd.Series(forecasts, index=future_dates)
    
    # Plot historical data with forecasts
    plt.figure(figsize=(14, 7))
    plt.plot(df[target_col].index, df[target_col], label='Historical Data')
    plt.plot(forecast_series.index, forecast_series, label='Forecast', color='red', linestyle='--')
    plt.title(f'LSTM: {target_col} Forecast')
    plt.xlabel('Date')
    plt.ylabel(target_col)
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(f'plots/lstm_future_forecast_{target_col}.png')
    plt.close()
    
    return forecast_series

def main():
    # Create plots and models directories if they don't exist
    import os
    for directory in ['plots', 'models']:
        if not os.path.exists(directory):
            os.makedirs(directory)
    
    # Load data
    file_path = 'data/analyzed_time_series.csv'
    df = load_data(file_path)
    
    # Engineer features
    df_engineered = engineer_features(df)
    
    # Run LSTM for CPI Year-over-Year
    target_col = 'cpi_yoy'
    model_yoy, predictions_yoy, metrics_yoy, history_yoy, X_scaler_yoy, y_scaler_yoy = run_lstm(
        df_engineered, target_col, test_size=0.2, time_steps=12, epochs=100, batch_size=32
    )
    
    # Generate future forecasts for YoY
    forecast_yoy = forecast_future_lstm(
        model_yoy, df_engineered, target_col, X_scaler_yoy, y_scaler_yoy, time_steps=12, forecast_horizon=24
    )
    print(f"\nFuture {target_col} forecasts:")
    print(forecast_yoy)
    
    # Run LSTM for CPI Month-over-Month
    target_col = 'cpi_mom'
    model_mom, predictions_mom, metrics_mom, history_mom, X_scaler_mom, y_scaler_mom = run_lstm(
        df_engineered, target_col, test_size=0.2, time_steps=12, epochs=100, batch_size=32
    )
    
    # Generate future forecasts for MoM
    forecast_mom = forecast_future_lstm(
        model_mom, df_engineered, target_col, X_scaler_mom, y_scaler_mom, time_steps=12, forecast_horizon=24
    )
    print(f"\nFuture {target_col} forecasts:")
    print(forecast_mom)
    
    # Save metrics to CSV
    metrics_data = [
        {
            'Model': 'LSTM', 'Target': 'CPI MoM',
            'Train_RMSE': metrics_mom['train_rmse'], 'Train_MAE': metrics_mom['train_mae'], 
            'Train_MAPE': metrics_mom['train_mape'], 'Train_R2': metrics_mom['train_r2'],
            'Test_RMSE': metrics_mom['test_rmse'], 'Test_MAE': metrics_mom['test_mae'], 
            'Test_MAPE': metrics_mom['test_mape'], 'Test_R2': metrics_mom['test_r2']
        },
        {
            'Model': 'LSTM', 'Target': 'CPI YoY',
            'Train_RMSE': metrics_yoy['train_rmse'], 'Train_MAE': metrics_yoy['train_mae'], 
            'Train_MAPE': metrics_yoy['train_mape'], 'Train_R2': metrics_yoy['train_r2'],
            'Test_RMSE': metrics_yoy['test_rmse'], 'Test_MAE': metrics_yoy['test_mae'], 
            'Test_MAPE': metrics_yoy['test_mape'], 'Test_R2': metrics_yoy['test_r2']
        }
    ]
    
    metrics_df = pd.DataFrame(metrics_data)
    metrics_df.to_csv('plots/lstm_metrics.csv', index=False)
    print("\nMetrics saved to plots/lstm_metrics.csv")

if __name__ == "__main__":
    main()

Running LSTM model for cpi_yoy...
Training data: (260, 12, 63), Test data: (65, 12, 63)
Training LSTM model...
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Training - RMSE: 7.9077, MAE: 5.6517, MAPE: 5.23%, R²: -0.7570
Testing - RMSE: 3.4181, MAE: 2.7573, MAPE: 2.68%, R²: -6.5927
Generating 24 period forecast...

Future cpi_yoy forecasts:
2025-01-01    113.654999
2025-02-01    114.412773
2025-03-01    115.104546
2025-04-01    114.555603
2025-05-01    114.184265
2025-06-01    113.717079
2025-07-01    113.084297
2025-08-01    112.553818
2025-09-01    112.189323
2025-10-01    111.886482
2025-11-01    111.989037
2025-12-01    111.833817
2026-01-01    111.833817
2026-02-01 

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
import warnings
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

def load_data(file_path):
    """Load and prepare the time series data"""
    df = pd.read_csv(file_path)
    
    # Convert time column to datetime
    df['time'] = pd.to_datetime(df['time'])
    
    # Set time as index
    df.set_index('time', inplace=True)
    
    return df

def engineer_features(df):
    """Create additional features for modeling"""
    df_copy = df.copy()
    
    # Create lag features
    for i in range(1, 13):
        if 'cpi_mom' in df_copy.columns:
            df_copy[f'cpi_mom_lag_{i}'] = df_copy['cpi_mom'].shift(i)
        if 'cpi_yoy' in df_copy.columns:
            df_copy[f'cpi_yoy_lag_{i}'] = df_copy['cpi_yoy'].shift(i)
    
    # Create rolling window features
    for window in [3, 6, 12]:
        if 'cpi_mom' in df_copy.columns:
            df_copy[f'cpi_mom_rolling_mean_{window}'] = df_copy['cpi_mom'].rolling(window=window).mean()
            df_copy[f'cpi_mom_rolling_std_{window}'] = df_copy['cpi_mom'].rolling(window=window).std()
        if 'cpi_yoy' in df_copy.columns:
            df_copy[f'cpi_yoy_rolling_mean_{window}'] = df_copy['cpi_yoy'].rolling(window=window).mean()
            df_copy[f'cpi_yoy_rolling_std_{window}'] = df_copy['cpi_yoy'].rolling(window=window).std()
    
    # Create economic indicator lag features
    for i in range(1, 4):
        if 'oil_price' in df_copy.columns:
            df_copy[f'oil_price_lag_{i}'] = df_copy['oil_price'].shift(i)
        if 'gold_price' in df_copy.columns:
            df_copy[f'gold_price_lag_{i}'] = df_copy['gold_price'].shift(i)
        if 'interest_rate' in df_copy.columns:
            df_copy[f'interest_rate_lag_{i}'] = df_copy['interest_rate'].shift(i)
    
    # Add month and year as cyclical features
    if 'month' in df_copy.columns:
        df_copy['month_sin'] = np.sin(2 * np.pi * df_copy['month']/12)
        df_copy['month_cos'] = np.cos(2 * np.pi * df_copy['month']/12)
    
    # Create interaction features
    if all(col in df_copy.columns for col in ['oil_price', 'gold_price']):
        df_copy['oil_gold_ratio'] = df_copy['oil_price'] / df_copy['gold_price']
    
    # Drop rows with NaN values (due to lag features)
    df_clean = df_copy.dropna()
    
    return df_clean

def calculate_metrics(actual, predicted):
    """Calculate evaluation metrics"""
    mse = mean_squared_error(actual, predicted)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actual, predicted)
    r2 = r2_score(actual, predicted)
    mape = np.mean(np.abs((actual - predicted) / actual)) * 100
    
    return rmse, mae, mape, r2

def create_sequences(X, y, time_steps=12):
    """
    Create sequences for GRU input
    
    Parameters:
    -----------
    X : np.array
        Feature array
    y : np.array
        Target array
    time_steps : int
        Number of time steps to use for each sequence
        
    Returns:
    --------
    tuple
        X_seq, y_seq arrays with shape [samples, time_steps, features] and [samples]
    """
    X_seq, y_seq = [], []
    for i in range(len(X) - time_steps):
        X_seq.append(X[i:i + time_steps])
        y_seq.append(y[i + time_steps])
    
    return np.array(X_seq), np.array(y_seq)

def build_gru_model(input_shape, gru_units=50, dropout_rate=0.2, learning_rate=0.001):
    """
    Build a GRU model
    
    Parameters:
    -----------
    input_shape : tuple
        Shape of input data (time_steps, features)
    gru_units : int
        Number of GRU units
    dropout_rate : float
        Dropout rate for regularization
    learning_rate : float
        Learning rate for optimizer
        
    Returns:
    --------
    tf.keras.Model
        Compiled GRU model
    """
    model = Sequential([
        GRU(gru_units, activation='tanh', return_sequences=True, input_shape=input_shape, 
            recurrent_dropout=0.0, reset_after=True),
        BatchNormalization(),
        Dropout(dropout_rate),
        GRU(gru_units // 2, activation='tanh', recurrent_dropout=0.0, reset_after=True),
        BatchNormalization(),
        Dropout(dropout_rate),
        Dense(1)
    ])
    
    model.compile(
        optimizer=Adam(learning_rate=learning_rate),
        loss='mse'
    )
    
    return model

def run_gru(df, target_col, test_size=0.2, time_steps=12, epochs=100, batch_size=32):
    """
    Train and evaluate a GRU model for time series forecasting
    
    Parameters:
    -----------
    df : pd.DataFrame
        Input dataframe with engineered features
    target_col : str
        Name of the target column
    test_size : float
        Proportion of data to use for testing
    time_steps : int
        Number of time steps to use for each sequence
    epochs : int
        Number of training epochs
    batch_size : int
        Batch size for training
        
    Returns:
    --------
    tuple
        Model, predictions, metrics, history
    """
    print(f"Running GRU model for {target_col}...")
    
    # Define features and target
    X = df.drop([col for col in ['cpi_mom', 'cpi_yoy', target_col] if col in df.columns], axis=1).values
    y = df[target_col].values
    
    # Scale features and target
    X_scaler = StandardScaler()
    X_scaled = X_scaler.fit_transform(X)
    
    y_scaler = MinMaxScaler()
    y_scaled = y_scaler.fit_transform(y.reshape(-1, 1)).flatten()
    
    # Create sequences
    X_seq, y_seq = create_sequences(X_scaled, y_scaled, time_steps)
    
    # Split data into training and testing sets (time-based split)
    split_idx = int(len(X_seq) * (1 - test_size))
    X_train, X_test = X_seq[:split_idx], X_seq[split_idx:]
    y_train, y_test = y_seq[:split_idx], y_seq[split_idx:]
    
    print(f"Training data: {X_train.shape}, Test data: {X_test.shape}")
    
    # Build model
    model = build_gru_model(
        input_shape=(X_train.shape[1], X_train.shape[2]),
        gru_units=64,
        dropout_rate=0.2,
        learning_rate=0.001
    )
    
    # Define callbacks
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=0.0001),
        ModelCheckpoint(f'models/gru_{target_col}.h5', monitor='val_loss', save_best_only=True)
    ]
    
    # Train model
    print("Training GRU model...")
    history = model.fit(
        X_train, y_train,
        epochs=epochs,
        batch_size=batch_size,
        validation_split=0.2,
        callbacks=callbacks,
        verbose=1
    )
    
    # Make predictions
    y_pred_train_scaled = model.predict(X_train)
    y_pred_test_scaled = model.predict(X_test)
    
    # Inverse transform predictions
    y_pred_train = y_scaler.inverse_transform(y_pred_train_scaled).flatten()
    y_pred_test = y_scaler.inverse_transform(y_pred_test_scaled).flatten()
    
    # Inverse transform actual values
    y_train_actual = y_scaler.inverse_transform(y_train.reshape(-1, 1)).flatten()
    y_test_actual = y_scaler.inverse_transform(y_test.reshape(-1, 1)).flatten()
    
    # Calculate metrics
    train_rmse, train_mae, train_mape, train_r2 = calculate_metrics(y_train_actual, y_pred_train)
    test_rmse, test_mae, test_mape, test_r2 = calculate_metrics(y_test_actual, y_pred_test)
    
    print(f"Training - RMSE: {train_rmse:.4f}, MAE: {train_mae:.4f}, MAPE: {train_mape:.2f}%, R²: {train_r2:.4f}")
    print(f"Testing - RMSE: {test_rmse:.4f}, MAE: {test_mae:.4f}, MAPE: {test_mape:.2f}%, R²: {test_r2:.4f}")
    
    # Plot training history
    plt.figure(figsize=(12, 6))
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title(f'GRU Training History for {target_col}')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(f'plots/gru_training_history_{target_col}.png')
    plt.close()
    
    # Plot actual vs predicted
    # Get the original dates for the test set
    test_dates = df.index[time_steps+split_idx:time_steps+len(X_seq)]
    
    plt.figure(figsize=(14, 7))
    plt.plot(test_dates, y_test_actual, label='Actual', color='blue')
    plt.plot(test_dates, y_pred_test, label='Predicted', color='red', linestyle='--')
    plt.title(f'GRU: Actual vs Predicted {target_col}')
    plt.xlabel('Date')
    plt.ylabel(target_col)
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(f'plots/gru_predictions_{target_col}.png')
    plt.close()
    
    # Create a DataFrame with predictions
    predictions = pd.DataFrame({
        'Actual': y_test_actual,
        'Predicted': y_pred_test,
        'Error': y_test_actual - y_pred_test
    }, index=test_dates)
    
    # Plot error distribution
    plt.figure(figsize=(10, 6))
    sns.histplot(predictions['Error'], kde=True)
    plt.title(f'Error Distribution for {target_col}')
    plt.xlabel('Error')
    plt.savefig(f'plots/gru_error_distribution_{target_col}.png')
    plt.close()
    
    # Return results
    metrics = {
        'train_rmse': train_rmse,
        'train_mae': train_mae,
        'train_mape': train_mape,
        'train_r2': train_r2,
        'test_rmse': test_rmse,
        'test_mae': test_mae,
        'test_mape': test_mape,
        'test_r2': test_r2
    }
    
    return model, predictions, metrics, history, X_scaler, y_scaler

def forecast_future_gru(model, df, target_col, X_scaler, y_scaler, time_steps=12, forecast_horizon=24):
    """
    Generate future forecasts using the trained GRU model
    
    Parameters:
    -----------
    model : tf.keras.Model
        Trained GRU model
    df : pd.DataFrame
        Input dataframe with features
    target_col : str
        Name of the target column
    X_scaler : StandardScaler
        Scaler used for features
    y_scaler : MinMaxScaler
        Scaler used for target
    time_steps : int
        Number of time steps used for training
    forecast_horizon : int
        Number of periods to forecast
        
    Returns:
    --------
    pd.Series
        Forecasted values
    """
    print(f"Generating {forecast_horizon} period forecast...")
    
    # Get the last time_steps data points
    X_last = df.drop([col for col in ['cpi_mom', 'cpi_yoy', target_col] if col in df.columns], axis=1).values[-time_steps:]
    X_last_scaled = X_scaler.transform(X_last)
    
    # Reshape for GRU input [1, time_steps, features]
    X_last_scaled = X_last_scaled.reshape(1, time_steps, X_last_scaled.shape[1])
    
    # Get the last date in the dataframe
    last_date = df.index[-1]
    
    # Create a list to store forecasts
    forecasts = []
    
    # Generate forecasts recursively
    for i in range(forecast_horizon):
        # Make prediction
        forecast_scaled = model.predict(X_last_scaled)
        
        # Inverse transform prediction
        forecast = y_scaler.inverse_transform(forecast_scaled)[0][0]
        
        # Store forecast
        forecasts.append(forecast)
        
        # For a more accurate implementation, we would need to update all features
        # based on the new forecast, but this is a simplified version
        # Here we just shift the input sequence and append the new prediction
        X_last_scaled = np.roll(X_last_scaled, -1, axis=1)
        X_last_scaled[0, -1, :] = X_last_scaled[0, -2, :]  # Simple copy of the last known features
    
    # Create a Series with the forecasts
    future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=forecast_horizon, freq='MS')
    forecast_series = pd.Series(forecasts, index=future_dates)
    
    # Plot historical data with forecasts
    plt.figure(figsize=(14, 7))
    plt.plot(df[target_col].index, df[target_col], label='Historical Data')
    plt.plot(forecast_series.index, forecast_series, label='Forecast', color='red', linestyle='--')
    plt.title(f'GRU: {target_col} Forecast')
    plt.xlabel('Date')
    plt.ylabel(target_col)
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(f'plots/gru_future_forecast_{target_col}.png')
    plt.close()
    
    return forecast_series

def main():
    # Create plots and models directories if they don't exist
    import os
    for directory in ['plots', 'models']:
        if not os.path.exists(directory):
            os.makedirs(directory)
    
    # Load data
    file_path = 'data/analyzed_time_series.csv'
    df = load_data(file_path)
    
    # Engineer features
    df_engineered = engineer_features(df)
    
    # Run GRU for CPI Year-over-Year
    target_col = 'cpi_yoy'
    model_yoy, predictions_yoy, metrics_yoy, history_yoy, X_scaler_yoy, y_scaler_yoy = run_gru(
        df_engineered, target_col, test_size=0.2, time_steps=12, epochs=100, batch_size=32
    )
    
    # Generate future forecasts for YoY
    forecast_yoy = forecast_future_gru(
        model_yoy, df_engineered, target_col, X_scaler_yoy, y_scaler_yoy, time_steps=12, forecast_horizon=24
    )
    print(f"\nFuture {target_col} forecasts:")
    print(forecast_yoy)
    
    # Run GRU for CPI Month-over-Month
    target_col = 'cpi_mom'
    model_mom, predictions_mom, metrics_mom, history_mom, X_scaler_mom, y_scaler_mom = run_gru(
        df_engineered, target_col, test_size=0.2, time_steps=12, epochs=100, batch_size=32
    )
    
    # Generate future forecasts for MoM
    forecast_mom = forecast_future_gru(
        model_mom, df_engineered, target_col, X_scaler_mom, y_scaler_mom, time_steps=12, forecast_horizon=24
    )
    print(f"\nFuture {target_col} forecasts:")
    print(forecast_mom)
    
    # Compare with LSTM (if available)
    try:
        lstm_metrics = pd.read_csv('plots/lstm_metrics.csv')
        gru_metrics = pd.DataFrame([
            {
                'Model': 'GRU', 'Target': 'CPI MoM',
                'Train_RMSE': metrics_mom['train_rmse'], 'Train_MAE': metrics_mom['train_mae'], 
                'Train_MAPE': metrics_mom['train_mape'], 'Train_R2': metrics_mom['train_r2'],
                'Test_RMSE': metrics_mom['test_rmse'], 'Test_MAE': metrics_mom['test_mae'], 
                'Test_MAPE': metrics_mom['test_mape'], 'Test_R2': metrics_mom['test_r2']
            },
            {
                'Model': 'GRU', 'Target': 'CPI YoY',
                'Train_RMSE': metrics_yoy['train_rmse'], 'Train_MAE': metrics_yoy['train_mae'], 
                'Train_MAPE': metrics_yoy['train_mape'], 'Train_R2': metrics_yoy['train_r2'],
                'Test_RMSE': metrics_yoy['test_rmse'], 'Test_MAE': metrics_yoy['test_mae'], 
                'Test_MAPE': metrics_yoy['test_mape'], 'Test_R2': metrics_yoy['test_r2']
            }
        ])
        
        # Combine metrics
        combined_metrics = pd.concat([lstm_metrics, gru_metrics])
        combined_metrics.to_csv('plots/rnn_comparison_metrics.csv', index=False)
        print("\nComparison with LSTM saved to plots/rnn_comparison_metrics.csv")
        
        # Create comparison plots
        for target, target_name in [('CPI MoM', 'cpi_mom'), ('CPI YoY', 'cpi_yoy')]:
            lstm_row = lstm_metrics[lstm_metrics['Target'] == target].iloc[0]
            gru_row = gru_metrics[gru_metrics['Target'] == target].iloc[0]
            
            # Compare test metrics
            metrics = ['Test_RMSE', 'Test_MAE', 'Test_MAPE']
            values = [
                [lstm_row[metric], gru_row[metric]] for metric in metrics
            ]
            
            plt.figure(figsize=(10, 6))
            x = np.arange(len(metrics))
            width = 0.35
            
            plt.bar(x - width/2, [v[0] for v in values], width, label='LSTM')
            plt.bar(x + width/2, [v[1] for v in values], width, label='GRU')
            
            plt.xlabel('Metric')
            plt.ylabel('Value')
            plt.title(f'LSTM vs GRU Performance Comparison for {target}')
            plt.xticks(x, metrics)
            plt.legend()
            plt.tight_layout()
            plt.savefig(f'plots/lstm_vs_gru_{target_name}.png')
            plt.close()
    except:
        # Save metrics to CSV
        metrics_data = [
            {
                'Model': 'GRU', 'Target': 'CPI MoM',
                'Train_RMSE': metrics_mom['train_rmse'], 'Train_MAE': metrics_mom['train_mae'], 
                'Train_MAPE': metrics_mom['train_mape'], 'Train_R2': metrics_mom['train_r2'],
                'Test_RMSE': metrics_mom['test_rmse'], 'Test_MAE': metrics_mom['test_mae'], 
                'Test_MAPE': metrics_mom['test_mape'], 'Test_R2': metrics_mom['test_r2']
            },
            {
                'Model': 'GRU', 'Target': 'CPI YoY',
                'Train_RMSE': metrics_yoy['train_rmse'], 'Train_MAE': metrics_yoy['train_mae'], 
                'Train_MAPE': metrics_yoy['train_mape'], 'Train_R2': metrics_yoy['train_r2'],
                'Test_RMSE': metrics_yoy['test_rmse'], 'Test_MAE': metrics_yoy['test_mae'], 
                'Test_MAPE': metrics_yoy['test_mape'], 'Test_R2': metrics_yoy['test_r2']
            }
        ]
        
        metrics_df = pd.DataFrame(metrics_data)
        metrics_df.to_csv('plots/gru_metrics.csv', index=False)
        print("\nMetrics saved to plots/gru_metrics.csv")

if __name__ == "__main__":
    main()

Running GRU model for cpi_yoy...
Training data: (260, 12, 63), Test data: (65, 12, 63)
Training GRU model...
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Training - RMSE: 9.9214, MAE: 8.2575, MAPE: 7.63%, R²: -1.7658
Testing - RMSE: 8.7249, MAE: 7.3740, MAPE: 7.18%, R²: -48.4703
Generating 24 period forecast...

Future cpi_yoy forecasts:
2025-01-01    104.716835
2025-02-01    105.649406
2025-03-01    106.437927
2025-04-01    106.994568
2025-05-01    107.284180
2025-06-01    107.444801
2025-07-01    107.556892
2025-08-01    107.559547
2025-09-01    107.475830
2025-10-01    107.467003
2025-11-01    107.379791
2025-12-01    107.463058
2026-01-01    107.463058
2026-02-01    107.463058
2026-03-01    107.463058
2026-04-01    107.463058
2026-05-01    1

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, LayerNormalization, MultiHeadAttention
from tensorflow.keras.layers import GlobalAveragePooling1D, Embedding, Add, Lambda
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
import warnings
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

def load_data(file_path):
    """Load and prepare the time series data"""
    df = pd.read_csv(file_path)
    
    # Convert time column to datetime
    df['time'] = pd.to_datetime(df['time'])
    
    # Set time as index
    df.set_index('time', inplace=True)
    
    return df

def engineer_features(df):
    """Create additional features for modeling"""
    df_copy = df.copy()
    
    # Create lag features
    for i in range(1, 13):
        if 'cpi_mom' in df_copy.columns:
            df_copy[f'cpi_mom_lag_{i}'] = df_copy['cpi_mom'].shift(i)
        if 'cpi_yoy' in df_copy.columns:
            df_copy[f'cpi_yoy_lag_{i}'] = df_copy['cpi_yoy'].shift(i)
    
    # Create rolling window features
    for window in [3, 6, 12]:
        if 'cpi_mom' in df_copy.columns:
            df_copy[f'cpi_mom_rolling_mean_{window}'] = df_copy['cpi_mom'].rolling(window=window).mean()
            df_copy[f'cpi_mom_rolling_std_{window}'] = df_copy['cpi_mom'].rolling(window=window).std()
        if 'cpi_yoy' in df_copy.columns:
            df_copy[f'cpi_yoy_rolling_mean_{window}'] = df_copy['cpi_yoy'].rolling(window=window).mean()
            df_copy[f'cpi_yoy_rolling_std_{window}'] = df_copy['cpi_yoy'].rolling(window=window).std()
    
    # Create economic indicator lag features
    for i in range(1, 4):
        if 'oil_price' in df_copy.columns:
            df_copy[f'oil_price_lag_{i}'] = df_copy['oil_price'].shift(i)
        if 'gold_price' in df_copy.columns:
            df_copy[f'gold_price_lag_{i}'] = df_copy['gold_price'].shift(i)
        if 'interest_rate' in df_copy.columns:
            df_copy[f'interest_rate_lag_{i}'] = df_copy['interest_rate'].shift(i)
    
    # Add month and year as cyclical features
    if 'month' in df_copy.columns:
        df_copy['month_sin'] = np.sin(2 * np.pi * df_copy['month']/12)
        df_copy['month_cos'] = np.cos(2 * np.pi * df_copy['month']/12)
    
    # Create interaction features
    if all(col in df_copy.columns for col in ['oil_price', 'gold_price']):
        df_copy['oil_gold_ratio'] = df_copy['oil_price'] / df_copy['gold_price']
    
    # Drop rows with NaN values (due to lag features)
    df_clean = df_copy.dropna()
    
    return df_clean

def calculate_metrics(actual, predicted):
    """Calculate evaluation metrics"""
    mse = mean_squared_error(actual, predicted)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actual, predicted)
    r2 = r2_score(actual, predicted)
    mape = np.mean(np.abs((actual - predicted) / actual)) * 100
    
    return rmse, mae, mape, r2

def create_sequences(X, y, time_steps=12):
    """
    Create sequences for Transformer input
    
    Parameters:
    -----------
    X : np.array
        Feature array
    y : np.array
        Target array
    time_steps : int
        Number of time steps to use for each sequence
        
    Returns:
    --------
    tuple
        X_seq, y_seq arrays with shape [samples, time_steps, features] and [samples]
    """
    X_seq, y_seq = [], []
    for i in range(len(X) - time_steps):
        X_seq.append(X[i:i + time_steps])
        y_seq.append(y[i + time_steps])
    
    return np.array(X_seq), np.array(y_seq)

def positional_encoding(seq_len, d_model):
    """
    Create positional encoding for transformer
    
    Parameters:
    -----------
    seq_len : int
        Sequence length
    d_model : int
        Dimension of the model
        
    Returns:
    --------
    tf.Tensor
        Positional encoding tensor
    """
    positions = np.arange(seq_len)[:, np.newaxis]
    angles = np.arange(d_model)[np.newaxis, :] / np.power(10000, (2 * (np.arange(d_model)[np.newaxis, :] // 2)) / d_model)
    
    # Apply sin to even indices
    sines = np.sin(positions * angles[:, 0::2])
    
    # Apply cos to odd indices
    cosines = np.cos(positions * angles[:, 1::2])
    
    # Combine sin and cos
    pos_encoding = np.zeros((seq_len, d_model))
    pos_encoding[:, 0::2] = sines
    pos_encoding[:, 1::2] = cosines
    
    return tf.cast(pos_encoding, dtype=tf.float32)

def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0.1):
    """
    Create a transformer encoder block
    
    Parameters:
    -----------
    inputs : tf.Tensor
        Input tensor
    head_size : int
        Size of each attention head
    num_heads : int
        Number of attention heads
    ff_dim : int
        Hidden layer size in feed forward network
    dropout : float
        Dropout rate
        
    Returns:
    --------
    tf.Tensor
        Output tensor
    """
    # Multi-head attention
    attention_output = MultiHeadAttention(
        key_dim=head_size, num_heads=num_heads, dropout=dropout
    )(inputs, inputs)
    
    # Add & Norm
    attention_output = LayerNormalization(epsilon=1e-6)(inputs + attention_output)
    
    # Feed Forward
    ff_output = Dense(ff_dim, activation='relu')(attention_output)
    ff_output = Dense(inputs.shape[-1])(ff_output)
    ff_output = Dropout(dropout)(ff_output)
    
    # Add & Norm
    return LayerNormalization(epsilon=1e-6)(attention_output + ff_output)

def build_transformer_model(input_shape, head_size=64, num_heads=4, ff_dim=256, num_transformer_blocks=4, mlp_units=[128, 64], dropout=0.1, mlp_dropout=0.1):
    """
    Build a transformer model for time series forecasting
    
    Parameters:
    -----------
    input_shape : tuple
        Shape of input data (time_steps, features)
    head_size : int
        Size of each attention head
    num_heads : int
        Number of attention heads
    ff_dim : int
        Hidden layer size in feed forward network
    num_transformer_blocks : int
        Number of transformer blocks
    mlp_units : list
        Number of units in MLP layers
    dropout : float
        Dropout rate in transformer blocks
    mlp_dropout : float
        Dropout rate in MLP layers
        
    Returns:
    --------
    tf.keras.Model
        Compiled transformer model
    """
    inputs = Input(shape=input_shape)
    
    # Add positional encoding
    pos_encoding = positional_encoding(input_shape[0], input_shape[1])
    x = inputs + pos_encoding
    
    # Transformer blocks
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)
    
    # Global average pooling
    x = GlobalAveragePooling1D()(x)
    
    # MLP layers
    for dim in mlp_units:
        x = Dense(dim, activation='relu')(x)
        x = Dropout(mlp_dropout)(x)
    
    # Output layer
    outputs = Dense(1)(x)
    
    model = Model(inputs=inputs, outputs=outputs)
    
    model.compile(
        optimizer=Adam(learning_rate=0.001),
        loss='mse'
    )
    
    return model

def run_transformer(df, target_col, test_size=0.2, time_steps=12, epochs=100, batch_size=32):
    """
    Train and evaluate a Transformer model for time series forecasting
    
    Parameters:
    -----------
    df : pd.DataFrame
        Input dataframe with engineered features
    target_col : str
        Name of the target column
    test_size : float
        Proportion of data to use for testing
    time_steps : int
        Number of time steps to use for each sequence
    epochs : int
        Number of training epochs
    batch_size : int
        Batch size for training
        
    Returns:
    --------
    tuple
        Model, predictions, metrics, history
    """
    print(f"Running Transformer model for {target_col}...")
    
    # Define features and target
    X = df.drop([col for col in ['cpi_mom', 'cpi_yoy', target_col] if col in df.columns], axis=1).values
    y = df[target_col].values
    
    # Scale features and target
    X_scaler = StandardScaler()
    X_scaled = X_scaler.fit_transform(X)
    
    y_scaler = MinMaxScaler()
    y_scaled = y_scaler.fit_transform(y.reshape(-1, 1)).flatten()
    
    # Create sequences
    X_seq, y_seq = create_sequences(X_scaled, y_scaled, time_steps)
    
    # Split data into training and testing sets (time-based split)
    split_idx = int(len(X_seq) * (1 - test_size))
    X_train, X_test = X_seq[:split_idx], X_seq[split_idx:]
    y_train, y_test = y_seq[:split_idx], y_seq[split_idx:]
    
    print(f"Training data: {X_train.shape}, Test data: {X_test.shape}")
    
    # Build model
    model = build_transformer_model(
        input_shape=(X_train.shape[1], X_train.shape[2]),
        head_size=64,
        num_heads=4,
        ff_dim=256,
        num_transformer_blocks=2,
        mlp_units=[128, 64],
        dropout=0.1,
        mlp_dropout=0.1
    )
    
    # Define callbacks
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=0.0001),
        ModelCheckpoint(f'models/transformer_{target_col}.h5', monitor='val_loss', save_best_only=True)
    ]
    
    # Train model
    print("Training Transformer model...")
    history = model.fit(
        X_train, y_train,
        epochs=epochs,
        batch_size=batch_size,
        validation_split=0.2,
        callbacks=callbacks,
        verbose=1
    )
    
    # Make predictions
    y_pred_train_scaled = model.predict(X_train)
    y_pred_test_scaled = model.predict(X_test)
    
    # Inverse transform predictions
    y_pred_train = y_scaler.inverse_transform(y_pred_train_scaled).flatten()
    y_pred_test = y_scaler.inverse_transform(y_pred_test_scaled).flatten()
    
    # Inverse transform actual values
    y_train_actual = y_scaler.inverse_transform(y_train.reshape(-1, 1)).flatten()
    y_test_actual = y_scaler.inverse_transform(y_test.reshape(-1, 1)).flatten()
    
    # Calculate metrics
    train_rmse, train_mae, train_mape, train_r2 = calculate_metrics(y_train_actual, y_pred_train)
    test_rmse, test_mae, test_mape, test_r2 = calculate_metrics(y_test_actual, y_pred_test)
    
    print(f"Training - RMSE: {train_rmse:.4f}, MAE: {train_mae:.4f}, MAPE: {train_mape:.2f}%, R²: {train_r2:.4f}")
    print(f"Testing - RMSE: {test_rmse:.4f}, MAE: {test_mae:.4f}, MAPE: {test_mape:.2f}%, R²: {test_r2:.4f}")
    
    # Plot training history
    plt.figure(figsize=(12, 6))
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title(f'Transformer Training History for {target_col}')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(f'plots/transformer_training_history_{target_col}.png')
    plt.close()
    
    # Plot actual vs predicted
    # Get the original dates for the test set
    test_dates = df.index[time_steps+split_idx:time_steps+len(X_seq)]
    
    plt.figure(figsize=(14, 7))
    plt.plot(test_dates, y_test_actual, label='Actual', color='blue')
    plt.plot(test_dates, y_pred_test, label='Predicted', color='red', linestyle='--')
    plt.title(f'Transformer: Actual vs Predicted {target_col}')
    plt.xlabel('Date')
    plt.ylabel(target_col)
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(f'plots/transformer_predictions_{target_col}.png')
    plt.close()
    
    # Create a DataFrame with predictions
    predictions = pd.DataFrame({
        'Actual': y_test_actual,
        'Predicted': y_pred_test,
        'Error': y_test_actual - y_pred_test
    }, index=test_dates)
    
    # Plot error distribution
    plt.figure(figsize=(10, 6))
    sns.histplot(predictions['Error'], kde=True)
    plt.title(f'Error Distribution for {target_col}')
    plt.xlabel('Error')
    plt.savefig(f'plots/transformer_error_distribution_{target_col}.png')
    plt.close()
    
    # Return results
    metrics = {
        'train_rmse': train_rmse,
        'train_mae': train_mae,
        'train_mape': train_mape,
        'train_r2': train_r2,
        'test_rmse': test_rmse,
        'test_mae': test_mae,
        'test_mape': test_mape,
        'test_r2': test_r2
    }
    
    return model, predictions, metrics, history, X_scaler, y_scaler

def forecast_future_transformer(model, df, target_col, X_scaler, y_scaler, time_steps=12, forecast_horizon=24):
    """
    Generate future forecasts using the trained Transformer model
    
    Parameters:
    -----------
    model : tf.keras.Model
        Trained Transformer model
    df : pd.DataFrame
        Input dataframe with features
    target_col : str
        Name of the target column
    X_scaler : StandardScaler
        Scaler used for features
    y_scaler : MinMaxScaler
        Scaler used for target
    time_steps : int
        Number of time steps used for training
    forecast_horizon : int
        Number of periods to forecast
        
    Returns:
    --------
    pd.Series
        Forecasted values
    """
    print(f"Generating {forecast_horizon} period forecast...")
    
    # Get the last time_steps data points
    X_last = df.drop([col for col in ['cpi_mom', 'cpi_yoy', target_col] if col in df.columns], axis=1).values[-time_steps:]
    X_last_scaled = X_scaler.transform(X_last)
    
    # Reshape for Transformer input [1, time_steps, features]
    X_last_scaled = X_last_scaled.reshape(1, time_steps, X_last_scaled.shape[1])
    
    # Get the last date in the dataframe
    last_date = df.index[-1]
    
    # Create a list to store forecasts
    forecasts = []
    
    # Generate forecasts recursively
    for i in range(forecast_horizon):
        # Make prediction
        forecast_scaled = model.predict(X_last_scaled)
        
        # Inverse transform prediction
        forecast = y_scaler.inverse_transform(forecast_scaled)[0][0]
        
        # Store forecast
        forecasts.append(forecast)
        
        # For a more accurate implementation, we would need to update all features
        # based on the new forecast, but this is a simplified version
        # Here we just shift the input sequence and append the new prediction
        X_last_scaled = np.roll(X_last_scaled, -1, axis=1)
        X_last_scaled[0, -1, :] = X_last_scaled[0, -2, :]  # Simple copy of the last known features
    
    # Create a Series with the forecasts
    future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=forecast_horizon, freq='MS')
    forecast_series = pd.Series(forecasts, index=future_dates)
    
    # Plot historical data with forecasts
    plt.figure(figsize=(14, 7))
    plt.plot(df[target_col].index, df[target_col], label='Historical Data')
    plt.plot(forecast_series.index, forecast_series, label='Forecast', color='red', linestyle='--')
    plt.title(f'Transformer: {target_col} Forecast')
    plt.xlabel('Date')
    plt.ylabel(target_col)
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(f'plots/transformer_future_forecast_{target_col}.png')
    plt.close()
    
    return forecast_series

def main():
    # Create plots and models directories if they don't exist
    import os
    for directory in ['plots', 'models']:
        if not os.path.exists(directory):
            os.makedirs(directory)
    
    # Load data
    file_path = 'data/analyzed_time_series.csv'
    df = load_data(file_path)
    
    # Engineer features
    df_engineered = engineer_features(df)
    
    # Run Transformer for CPI Year-over-Year
    target_col = 'cpi_yoy'
    model_yoy, predictions_yoy, metrics_yoy, history_yoy, X_scaler_yoy, y_scaler_yoy = run_transformer(
        df_engineered, target_col, test_size=0.2, time_steps=12, epochs=100, batch_size=32
    )
    
    # Generate future forecasts for YoY
    forecast_yoy = forecast_future_transformer(
        model_yoy, df_engineered, target_col, X_scaler_yoy, y_scaler_yoy, time_steps=12, forecast_horizon=24
    )
    print(f"\nFuture {target_col} forecasts:")
    print(forecast_yoy)
    
    # Run Transformer for CPI Month-over-Month
    target_col = 'cpi_mom'
    model_mom, predictions_mom, metrics_mom, history_mom, X_scaler_mom, y_scaler_mom = run_transformer(
        df_engineered, target_col, test_size=0.2, time_steps=12, epochs=100, batch_size=32
    )
    
    # Generate future forecasts for MoM
    forecast_mom = forecast_future_transformer(
        model_mom, df_engineered, target_col, X_scaler_mom, y_scaler_mom, time_steps=12, forecast_horizon=24
    )
    print(f"\nFuture {target_col} forecasts:")
    print(forecast_mom)
    
    # Compare with other deep learning models (if available)
    try:
        # Try to load metrics from other models
        lstm_metrics = pd.read_csv('plots/lstm_metrics.csv')
        gru_metrics = pd.read_csv('plots/gru_metrics.csv')
        tcn_metrics = pd.read_csv('plots/tcn_metrics.csv')
        
        # Create Transformer metrics dataframe
        transformer_metrics = pd.DataFrame([
            {
                'Model': 'Transformer', 'Target': 'CPI MoM',
                'Train_RMSE': metrics_mom['train_rmse'], 'Train_MAE': metrics_mom['train_mae'], 
                'Train_MAPE': metrics_mom['train_mape'], 'Train_R2': metrics_mom['train_r2'],
                'Test_RMSE': metrics_mom['test_rmse'], 'Test_MAE': metrics_mom['test_mae'], 
                'Test_MAPE': metrics_mom['test_mape'], 'Test_R2': metrics_mom['test_r2']
            },
            {
                'Model': 'Transformer', 'Target': 'CPI YoY',
                'Train_RMSE': metrics_yoy['train_rmse'], 'Train_MAE': metrics_yoy['train_mae'], 
                'Train_MAPE': metrics_yoy['train_mape'], 'Train_R2': metrics_yoy['train_r2'],
                'Test_RMSE': metrics_yoy['test_rmse'], 'Test_MAE': metrics_yoy['test_mae'], 
                'Test_MAPE': metrics_yoy['test_mape'], 'Test_R2': metrics_yoy['test_r2']
            }
        ])
        
        # Combine metrics
        combined_metrics = pd.concat([lstm_metrics, gru_metrics, tcn_metrics, transformer_metrics])
        combined_metrics.to_csv('plots/all_models_comparison_metrics.csv', index=False)
        print("\nComparison with other deep learning models saved to plots/all_models_comparison_metrics.csv")
        
        # Create comparison plots
        for target, target_name in [('CPI MoM', 'cpi_mom'), ('CPI YoY', 'cpi_yoy')]:
            # Filter metrics for the target
            target_metrics = combined_metrics[combined_metrics['Target'] == target]
            
            # Compare test metrics
            metrics_to_compare = ['Test_RMSE', 'Test_MAE', 'Test_MAPE']
            
            plt.figure(figsize=(14, 7))
            
            # Create grouped bar chart
            x = np.arange(len(metrics_to_compare))
            width = 0.2
            
            # Plot bars for each model
            models = ['LSTM', 'GRU', 'TCN', 'Transformer']
            for i, model_name in enumerate(models):
                model_data = target_metrics[target_metrics['Model'] == model_name]
                if not model_data.empty:
                    values = [model_data[metric].values[0] for metric in metrics_to_compare]
                    plt.bar(x + (i-1.5)*width, values, width, label=model_name)
            
            plt.xlabel('Metric')
            plt.ylabel('Value')
            plt.title(f'Deep Learning Models Comparison for {target}')
            plt.xticks(x, metrics_to_compare)
            plt.legend()
            plt.tight_layout()
            plt.savefig(f'plots/all_models_comparison_{target_name}.png')
            plt.close()
            
    except Exception as e:
        print(f"Could not compare with other models: {e}")
        # Save metrics to CSV
        metrics_data = [
            {
                'Model': 'Transformer', 'Target': 'CPI MoM',
                'Train_RMSE': metrics_mom['train_rmse'], 'Train_MAE': metrics_mom['train_mae'], 
                'Train_MAPE': metrics_mom['train_mape'], 'Train_R2': metrics_mom['train_r2'],
                'Test_RMSE': metrics_mom['test_rmse'], 'Test_MAE': metrics_mom['test_mae'], 
                'Test_MAPE': metrics_mom['test_mape'], 'Test_R2': metrics_mom['test_r2']
            },
            {
                'Model': 'Transformer', 'Target': 'CPI YoY',
                'Train_RMSE': metrics_yoy['train_rmse'], 'Train_MAE': metrics_yoy['train_mae'], 
                'Train_MAPE': metrics_yoy['train_mape'], 'Train_R2': metrics_yoy['train_r2'],
                'Test_RMSE': metrics_yoy['test_rmse'], 'Test_MAE': metrics_yoy['test_mae'], 
                'Test_MAPE': metrics_yoy['test_mape'], 'Test_R2': metrics_yoy['test_r2']
            }
        ]
        
        metrics_df = pd.DataFrame(metrics_data)
        metrics_df.to_csv('plots/transformer_metrics.csv', index=False)
        print("\nMetrics saved to plots/transformer_metrics.csv")

if __name__ == "__main__":
    main()

Running Transformer model for cpi_yoy...
Training data: (260, 12, 63), Test data: (65, 12, 63)
Training Transformer model...
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Training - RMSE: 2.6557, MAE: 2.0813, MAPE: 1.94%, R²: 0.8018
Testing - RMSE: 1.8976, MAE: 1.5027, MAPE: 1.46%, R²: -1.3401
Generating 24 period forecast...

Future cpi_yoy forecasts:
2025-01-01    103.279755
2025-02-01    103.890457
2025-03-01    103.712593
2025-04-01    104.517929
2025-05-01    104.370888
2025-06-01    103.729355
2025-07-01    103.118645
2025-08-01    102.262581
2025-09-01    102.084023
2025-10-01    101.919800
2025-11-01    101.800255
2025-12-01    101.455254
2026-01-01    101.455254
2026-02-01    101.455254
2026-03-01    101.455254
2026-04-01    101.455254
2026-05-01    

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Conv1D, BatchNormalization, Activation
from tensorflow.keras.layers import Add, Dropout, Lambda, Reshape
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
import warnings
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

def load_data(file_path):
    """Load and prepare the time series data"""
    df = pd.read_csv(file_path)
    
    # Convert time column to datetime
    df['time'] = pd.to_datetime(df['time'])
    
    # Set time as index
    df.set_index('time', inplace=True)
    
    return df

def engineer_features(df):
    """Create additional features for modeling"""
    df_copy = df.copy()
    
    # Create lag features
    for i in range(1, 13):
        if 'cpi_mom' in df_copy.columns:
            df_copy[f'cpi_mom_lag_{i}'] = df_copy['cpi_mom'].shift(i)
        if 'cpi_yoy' in df_copy.columns:
            df_copy[f'cpi_yoy_lag_{i}'] = df_copy['cpi_yoy'].shift(i)
    
    # Create rolling window features
    for window in [3, 6, 12]:
        if 'cpi_mom' in df_copy.columns:
            df_copy[f'cpi_mom_rolling_mean_{window}'] = df_copy['cpi_mom'].rolling(window=window).mean()
            df_copy[f'cpi_mom_rolling_std_{window}'] = df_copy['cpi_mom'].rolling(window=window).std()
        if 'cpi_yoy' in df_copy.columns:
            df_copy[f'cpi_yoy_rolling_mean_{window}'] = df_copy['cpi_yoy'].rolling(window=window).mean()
            df_copy[f'cpi_yoy_rolling_std_{window}'] = df_copy['cpi_yoy'].rolling(window=window).std()
    
    # Create economic indicator lag features
    for i in range(1, 4):
        if 'oil_price' in df_copy.columns:
            df_copy[f'oil_price_lag_{i}'] = df_copy['oil_price'].shift(i)
        if 'gold_price' in df_copy.columns:
            df_copy[f'gold_price_lag_{i}'] = df_copy['gold_price'].shift(i)
        if 'interest_rate' in df_copy.columns:
            df_copy[f'interest_rate_lag_{i}'] = df_copy['interest_rate'].shift(i)
    
    # Add month and year as cyclical features
    if 'month' in df_copy.columns:
        df_copy['month_sin'] = np.sin(2 * np.pi * df_copy['month']/12)
        df_copy['month_cos'] = np.cos(2 * np.pi * df_copy['month']/12)
    
    # Create interaction features
    if all(col in df_copy.columns for col in ['oil_price', 'gold_price']):
        df_copy['oil_gold_ratio'] = df_copy['oil_price'] / df_copy['gold_price']
    
    # Drop rows with NaN values (due to lag features)
    df_clean = df_copy.dropna()
    
    return df_clean

def calculate_metrics(actual, predicted):
    """Calculate evaluation metrics"""
    mse = mean_squared_error(actual, predicted)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actual, predicted)
    r2 = r2_score(actual, predicted)
    mape = np.mean(np.abs((actual - predicted) / actual)) * 100
    
    return rmse, mae, mape, r2

def create_sequences(X, y, time_steps=12):
    """
    Create sequences for TCN input
    
    Parameters:
    -----------
    X : np.array
        Feature array
    y : np.array
        Target array
    time_steps : int
        Number of time steps to use for each sequence
        
    Returns:
    --------
    tuple
        X_seq, y_seq arrays with shape [samples, time_steps, features] and [samples]
    """
    X_seq, y_seq = [], []
    for i in range(len(X) - time_steps):
        X_seq.append(X[i:i + time_steps])
        y_seq.append(y[i + time_steps])
    
    return np.array(X_seq), np.array(y_seq)

def residual_block(x, dilation_rate, nb_filters, kernel_size, dropout_rate=0.2):
    """
    Defines the residual block for the TCN
    
    Parameters:
    -----------
    x : tensor
        Input tensor
    dilation_rate : int
        Dilation rate for the dilated convolution
    nb_filters : int
        Number of filters in the convolution
    kernel_size : int
        Size of the kernel
    dropout_rate : float
        Dropout rate
        
    Returns:
    --------
    tensor
        Output tensor
    """
    prev_x = x
    
    # First dilated convolution
    x = Conv1D(filters=nb_filters, kernel_size=kernel_size, 
               dilation_rate=dilation_rate, padding='causal')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Dropout(dropout_rate)(x)
    
    # Second dilated convolution
    x = Conv1D(filters=nb_filters, kernel_size=kernel_size,
               dilation_rate=dilation_rate, padding='causal')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Dropout(dropout_rate)(x)
    
    # If the input shape is different from the output shape, use a 1x1 conv to match dimensions
    if prev_x.shape[-1] != nb_filters:
        prev_x = Conv1D(nb_filters, kernel_size=1)(prev_x)
    
    # Add skip connection
    res = Add()([prev_x, x])
    
    return res

def build_tcn_model(input_shape, nb_filters=64, kernel_size=3, nb_stacks=1, dilations=None, 
                   dropout_rate=0.2, learning_rate=0.001):
    """
    Build a TCN model
    
    Parameters:
    -----------
    input_shape : tuple
        Shape of input data (time_steps, features)
    nb_filters : int
        Number of filters in the convolution
    kernel_size : int
        Size of the kernel
    nb_stacks : int
        Number of stacks of residual blocks
    dilations : list
        List of dilations for each residual block
    dropout_rate : float
        Dropout rate
    learning_rate : float
        Learning rate for optimizer
        
    Returns:
    --------
    tf.keras.Model
        Compiled TCN model
    """
    if dilations is None:
        dilations = [1, 2, 4, 8, 16, 32]
    
    input_layer = Input(shape=input_shape)
    x = input_layer
    
    # Create TCN architecture
    for stack in range(nb_stacks):
        for dilation_rate in dilations:
            x = residual_block(x, dilation_rate, nb_filters, kernel_size, dropout_rate)
    
    # Apply a final convolution to get the output
    x = Conv1D(filters=nb_filters, kernel_size=1)(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    
    # Global average pooling to get a single vector
    x = Lambda(lambda z: tf.reduce_mean(z, axis=1))(x)
    
    # Output layer
    output_layer = Dense(1)(x)
    
    model = Model(inputs=input_layer, outputs=output_layer)
    
    model.compile(
        optimizer=Adam(learning_rate=learning_rate),
        loss='mse'
    )
    
    return model

def run_tcn(df, target_col, test_size=0.2, time_steps=12, epochs=100, batch_size=32):
    """
    Train and evaluate a TCN model for time series forecasting
    
    Parameters:
    -----------
    df : pd.DataFrame
        Input dataframe with engineered features
    target_col : str
        Name of the target column
    test_size : float
        Proportion of data to use for testing
    time_steps : int
        Number of time steps to use for each sequence
    epochs : int
        Number of training epochs
    batch_size : int
        Batch size for training
        
    Returns:
    --------
    tuple
        Model, predictions, metrics, history
    """
    print(f"Running TCN model for {target_col}...")
    
    # Define features and target
    X = df.drop([col for col in ['cpi_mom', 'cpi_yoy', target_col] if col in df.columns], axis=1).values
    y = df[target_col].values
    
    # Scale features and target
    X_scaler = StandardScaler()
    X_scaled = X_scaler.fit_transform(X)
    
    y_scaler = MinMaxScaler()
    y_scaled = y_scaler.fit_transform(y.reshape(-1, 1)).flatten()
    
    # Create sequences
    X_seq, y_seq = create_sequences(X_scaled, y_scaled, time_steps)
    
    # Split data into training and testing sets (time-based split)
    split_idx = int(len(X_seq) * (1 - test_size))
    X_train, X_test = X_seq[:split_idx], X_seq[split_idx:]
    y_train, y_test = y_seq[:split_idx], y_seq[split_idx:]
    
    print(f"Training data: {X_train.shape}, Test data: {X_test.shape}")
    
    # Build model
    model = build_tcn_model(
        input_shape=(X_train.shape[1], X_train.shape[2]),
        nb_filters=64,
        kernel_size=3,
        nb_stacks=1,
        dilations=[1, 2, 4, 8, 16],
        dropout_rate=0.2,
        learning_rate=0.001
    )
    
    # Define callbacks
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, min_lr=0.0001),
        ModelCheckpoint(f'models/tcn_{target_col}.h5', monitor='val_loss', save_best_only=True)
    ]
    
    # Train model
    print("Training TCN model...")
    history = model.fit(
        X_train, y_train,
        epochs=epochs,
        batch_size=batch_size,
        validation_split=0.2,
        callbacks=callbacks,
        verbose=1
    )
    
    # Make predictions
    y_pred_train_scaled = model.predict(X_train)
    y_pred_test_scaled = model.predict(X_test)
    
    # Inverse transform predictions
    y_pred_train = y_scaler.inverse_transform(y_pred_train_scaled).flatten()
    y_pred_test = y_scaler.inverse_transform(y_pred_test_scaled).flatten()
    
    # Inverse transform actual values
    y_train_actual = y_scaler.inverse_transform(y_train.reshape(-1, 1)).flatten()
    y_test_actual = y_scaler.inverse_transform(y_test.reshape(-1, 1)).flatten()
    
    # Calculate metrics
    train_rmse, train_mae, train_mape, train_r2 = calculate_metrics(y_train_actual, y_pred_train)
    test_rmse, test_mae, test_mape, test_r2 = calculate_metrics(y_test_actual, y_pred_test)
    
    print(f"Training - RMSE: {train_rmse:.4f}, MAE: {train_mae:.4f}, MAPE: {train_mape:.2f}%, R²: {train_r2:.4f}")
    print(f"Testing - RMSE: {test_rmse:.4f}, MAE: {test_mae:.4f}, MAPE: {test_mape:.2f}%, R²: {test_r2:.4f}")
    
    # Plot training history
    plt.figure(figsize=(12, 6))
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title(f'TCN Training History for {target_col}')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(f'plots/tcn_training_history_{target_col}.png')
    plt.close()
    
    # Plot actual vs predicted
    # Get the original dates for the test set
    test_dates = df.index[time_steps+split_idx:time_steps+len(X_seq)]
    
    plt.figure(figsize=(14, 7))
    plt.plot(test_dates, y_test_actual, label='Actual', color='blue')
    plt.plot(test_dates, y_pred_test, label='Predicted', color='red', linestyle='--')
    plt.title(f'TCN: Actual vs Predicted {target_col}')
    plt.xlabel('Date')
    plt.ylabel(target_col)
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(f'plots/tcn_predictions_{target_col}.png')
    plt.close()
    
    # Create a DataFrame with predictions
    predictions = pd.DataFrame({
        'Actual': y_test_actual,
        'Predicted': y_pred_test,
        'Error': y_test_actual - y_pred_test
    }, index=test_dates)
    
    # Plot error distribution
    plt.figure(figsize=(10, 6))
    sns.histplot(predictions['Error'], kde=True)
    plt.title(f'Error Distribution for {target_col}')
    plt.xlabel('Error')
    plt.savefig(f'plots/tcn_error_distribution_{target_col}.png')
    plt.close()
    
    # Return results
    metrics = {
        'train_rmse': train_rmse,
        'train_mae': train_mae,
        'train_mape': train_mape,
        'train_r2': train_r2,
        'test_rmse': test_rmse,
        'test_mae': test_mae,
        'test_mape': test_mape,
        'test_r2': test_r2
    }
    
    return model, predictions, metrics, history, X_scaler, y_scaler

def forecast_future_tcn(model, df, target_col, X_scaler, y_scaler, time_steps=12, forecast_horizon=24):
    """
    Generate future forecasts using the trained TCN model
    
    Parameters:
    -----------
    model : tf.keras.Model
        Trained TCN model
    df : pd.DataFrame
        Input dataframe with features
    target_col : str
        Name of the target column
    X_scaler : StandardScaler
        Scaler used for features
    y_scaler : MinMaxScaler
        Scaler used for target
    time_steps : int
        Number of time steps used for training
    forecast_horizon : int
        Number of periods to forecast
        
    Returns:
    --------
    pd.Series
        Forecasted values
    """
    print(f"Generating {forecast_horizon} period forecast...")
    
    # Get the last time_steps data points
    X_last = df.drop([col for col in ['cpi_mom', 'cpi_yoy', target_col] if col in df.columns], axis=1).values[-time_steps:]
    X_last_scaled = X_scaler.transform(X_last)
    
    # Reshape for TCN input [1, time_steps, features]
    X_last_scaled = X_last_scaled.reshape(1, time_steps, X_last_scaled.shape[1])
    
    # Get the last date in the dataframe
    last_date = df.index[-1]
    
    # Create a list to store forecasts
    forecasts = []
    
    # Generate forecasts recursively
    for i in range(forecast_horizon):
        # Make prediction
        forecast_scaled = model.predict(X_last_scaled)
        
        # Inverse transform prediction
        forecast = y_scaler.inverse_transform(forecast_scaled)[0][0]
        
        # Store forecast
        forecasts.append(forecast)
        
        # For a more accurate implementation, we would need to update all features
        # based on the new forecast, but this is a simplified version
        # Here we just shift the input sequence and append the new prediction
        X_last_scaled = np.roll(X_last_scaled, -1, axis=1)
        X_last_scaled[0, -1, :] = X_last_scaled[0, -2, :]  # Simple copy of the last known features
    
    # Create a Series with the forecasts
    future_dates = pd.date_range(start=last_date + pd.DateOffset(months=1), periods=forecast_horizon, freq='MS')
    forecast_series = pd.Series(forecasts, index=future_dates)
    
    # Plot historical data with forecasts
    plt.figure(figsize=(14, 7))
    plt.plot(df[target_col].index, df[target_col], label='Historical Data')
    plt.plot(forecast_series.index, forecast_series, label='Forecast', color='red', linestyle='--')
    plt.title(f'TCN: {target_col} Forecast')
    plt.xlabel('Date')
    plt.ylabel(target_col)
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(f'plots/tcn_future_forecast_{target_col}.png')
    plt.close()
    
    return forecast_series

def main():
    # Create plots and models directories if they don't exist
    import os
    for directory in ['plots', 'models']:
        if not os.path.exists(directory):
            os.makedirs(directory)
    
    # Load data
    file_path = 'data/analyzed_time_series.csv'
    df = load_data(file_path)
    
    # Engineer features
    df_engineered = engineer_features(df)
    
    # Run TCN for CPI Year-over-Year
    target_col = 'cpi_yoy'
    model_yoy, predictions_yoy, metrics_yoy, history_yoy, X_scaler_yoy, y_scaler_yoy = run_tcn(
        df_engineered, target_col, test_size=0.2, time_steps=12, epochs=100, batch_size=32
    )
    
    # Generate future forecasts for YoY
    forecast_yoy = forecast_future_tcn(
        model_yoy, df_engineered, target_col, X_scaler_yoy, y_scaler_yoy, time_steps=12, forecast_horizon=24
    )
    print(f"\nFuture {target_col} forecasts:")
    print(forecast_yoy)
    
    # Run TCN for CPI Month-over-Month
    target_col = 'cpi_mom'
    model_mom, predictions_mom, metrics_mom, history_mom, X_scaler_mom, y_scaler_mom = run_tcn(
        df_engineered, target_col, test_size=0.2, time_steps=12, epochs=100, batch_size=32
    )
    
    # Generate future forecasts for MoM
    forecast_mom = forecast_future_tcn(
        model_mom, df_engineered, target_col, X_scaler_mom, y_scaler_mom, time_steps=12, forecast_horizon=24
    )
    print(f"\nFuture {target_col} forecasts:")
    print(forecast_mom)
    
    # Compare with other deep learning models (if available)
    try:
        # Try to load metrics from other models
        lstm_metrics = pd.read_csv('plots/lstm_metrics.csv')
        gru_metrics = pd.read_csv('plots/gru_metrics.csv')
        
        # Create TCN metrics dataframe
        tcn_metrics = pd.DataFrame([
            {
                'Model': 'TCN', 'Target': 'CPI MoM',
                'Train_RMSE': metrics_mom['train_rmse'], 'Train_MAE': metrics_mom['train_mae'], 
                'Train_MAPE': metrics_mom['train_mape'], 'Train_R2': metrics_mom['train_r2'],
                'Test_RMSE': metrics_mom['test_rmse'], 'Test_MAE': metrics_mom['test_mae'], 
                'Test_MAPE': metrics_mom['test_mape'], 'Test_R2': metrics_mom['test_r2']
            },
            {
                'Model': 'TCN', 'Target': 'CPI YoY',
                'Train_RMSE': metrics_yoy['train_rmse'], 'Train_MAE': metrics_yoy['train_mae'], 
                'Train_MAPE': metrics_yoy['train_mape'], 'Train_R2': metrics_yoy['train_r2'],
                'Test_RMSE': metrics_yoy['test_rmse'], 'Test_MAE': metrics_yoy['test_mae'], 
                'Test_MAPE': metrics_yoy['test_mape'], 'Test_R2': metrics_yoy['test_r2']
            }
        ])
        
        # Combine metrics
        combined_metrics = pd.concat([lstm_metrics, gru_metrics, tcn_metrics])
        combined_metrics.to_csv('plots/deep_learning_comparison_metrics.csv', index=False)
        print("\nComparison with other deep learning models saved to plots/deep_learning_comparison_metrics.csv")
        
        # Create comparison plots
        for target, target_name in [('CPI MoM', 'cpi_mom'), ('CPI YoY', 'cpi_yoy')]:
            # Filter metrics for the target
            target_metrics = combined_metrics[combined_metrics['Target'] == target]
            
            # Compare test metrics
            metrics_to_compare = ['Test_RMSE', 'Test_MAE', 'Test_MAPE']
            
            plt.figure(figsize=(12, 6))
            
            # Create grouped bar chart
            x = np.arange(len(metrics_to_compare))
            width = 0.25
            
            # Plot bars for each model
            for i, model_name in enumerate(['LSTM', 'GRU', 'TCN']):
                model_data = target_metrics[target_metrics['Model'] == model_name]
                if not model_data.empty:
                    values = [model_data[metric].values[0] for metric in metrics_to_compare]
                    plt.bar(x + (i-1)*width, values, width, label=model_name)
            
            plt.xlabel('Metric')
            plt.ylabel('Value')
            plt.title(f'Deep Learning Models Comparison for {target}')
            plt.xticks(x, metrics_to_compare)
            plt.legend()
            plt.tight_layout()
            plt.savefig(f'plots/deep_learning_comparison_{target_name}.png')
            plt.close()
            
    except Exception as e:
        print(f"Could not compare with other models: {e}")
        # Save metrics to CSV
        metrics_data = [
            {
                'Model': 'TCN', 'Target': 'CPI MoM',
                'Train_RMSE': metrics_mom['train_rmse'], 'Train_MAE': metrics_mom['train_mae'], 
                'Train_MAPE': metrics_mom['train_mape'], 'Train_R2': metrics_mom['train_r2'],
                'Test_RMSE': metrics_mom['test_rmse'], 'Test_MAE': metrics_mom['test_mae'], 
                'Test_MAPE': metrics_mom['test_mape'], 'Test_R2': metrics_mom['test_r2']
            },
            {
                'Model': 'TCN', 'Target': 'CPI YoY',
                'Train_RMSE': metrics_yoy['train_rmse'], 'Train_MAE': metrics_yoy['train_mae'], 
                'Train_MAPE': metrics_yoy['train_mape'], 'Train_R2': metrics_yoy['train_r2'],
                'Test_RMSE': metrics_yoy['test_rmse'], 'Test_MAE': metrics_yoy['test_mae'], 
                'Test_MAPE': metrics_yoy['test_mape'], 'Test_R2': metrics_yoy['test_r2']
            }
        ]
        
        metrics_df = pd.DataFrame(metrics_data)
        metrics_df.to_csv('plots/tcn_metrics.csv', index=False)
        print("\nMetrics saved to plots/tcn_metrics.csv")

if __name__ == "__main__":
    main()

Running TCN model for cpi_yoy...
Training data: (260, 12, 63), Test data: (65, 12, 63)
Training TCN model...
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epo