# 02. Econometric Models (RW, VAR, ARIMA)

**Objective:** Implement and evaluate traditional econometric forecasting models

**Models:**
1. Random Walk (RW)
2. Vector Autoregression (VAR)
3. ARIMA (AutoRegressive Integrated Moving Average)

**Evaluation Metrics:**
- RMSFE (Root Mean Squared Forecast Error)
- MAPE (Mean Absolute Percentage Error)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Econometric models
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.api import VAR
from statsmodels.tsa.stattools import adfuller

# Custom metrics
import sys
sys.path.append('../')
from utils.metrics import rmsfe, mape, evaluate_model

# Paths
PROCESSED_DATA_PATH = Path('../data/processed')
RESULTS_PATH = Path('../results')
RESULTS_PATH.mkdir(parents=True, exist_ok=True)

## 1. Load Processed Data

In [None]:
# Load train and test sets
df_train = pd.read_csv(PROCESSED_DATA_PATH / 'df_train.csv', index_col=0, parse_dates=True)
df_test = pd.read_csv(PROCESSED_DATA_PATH / 'df_test.csv', index_col=0, parse_dates=True)

print(f"Train set: {df_train.shape}")
print(f"Test set:  {df_test.shape}")

# Assuming first column is the target (inflation)
target_col = df_train.columns[0]
print(f"\nTarget variable: {target_col}")

## 2. Model 1: Random Walk (RW)

Random Walk model assumes that the best forecast for next period is the current value:
$$\hat{y}_{t+1} = y_t$$

In [None]:
def random_walk_forecast(train_data, test_size):
    """
    Random Walk forecast: next value = current value
    """
    predictions = []
    history = train_data.copy()
    
    for i in range(test_size):
        # Forecast = last observed value
        yhat = history.iloc[-1]
        predictions.append(yhat)
        
    return np.array(predictions)

# Random Walk predictions
rw_predictions = random_walk_forecast(df_train[target_col], len(df_test))

# Evaluate
rw_results = evaluate_model(df_test[target_col].values, rw_predictions, "Random Walk")

## 3. Model 2: ARIMA

ARIMA(p, d, q) model:
- p: autoregressive order
- d: differencing order
- q: moving average order

In [None]:
# Check stationarity
adf_result = adfuller(df_train[target_col].dropna())
print(f"ADF Statistic: {adf_result[0]:.4f}")
print(f"p-value: {adf_result[1]:.4f}")
print(f"Stationary: {adf_result[1] < 0.05}")

In [None]:
# ARIMA model - adjust (p,d,q) parameters based on your data
# Example: ARIMA(1,1,1)
arima_order = (1, 1, 1)

model_arima = ARIMA(df_train[target_col], order=arima_order)
fitted_arima = model_arima.fit()

print(fitted_arima.summary())

In [None]:
# ARIMA predictions
arima_predictions = fitted_arima.forecast(steps=len(df_test))

# Evaluate
arima_results = evaluate_model(df_test[target_col].values, arima_predictions, "ARIMA")

## 4. Model 3: VAR (Vector Autoregression)

VAR model for multivariate time series (if you have multiple variables)

In [None]:
# VAR requires multiple time series
# Skip if you only have one variable

if df_train.shape[1] > 1:
    # Fit VAR model
    model_var = VAR(df_train)
    
    # Select optimal lag order
    lag_order = model_var.select_order(maxlags=12)
    print(lag_order.summary())
    
    # Fit with selected lag
    fitted_var = model_var.fit(lag_order.aic)
    print(fitted_var.summary())
    
    # VAR predictions
    var_predictions = fitted_var.forecast(df_train.values[-fitted_var.k_ar:], steps=len(df_test))
    var_predictions_target = var_predictions[:, 0]  # First column (target)
    
    # Evaluate
    var_results = evaluate_model(df_test[target_col].values, var_predictions_target, "VAR")
else:
    print("VAR requires multiple variables. Skipping VAR model.")
    var_results = None

## 5. Visualize Results

In [None]:
# Plot predictions vs actual
plt.figure(figsize=(14, 8))

# Actual values
plt.plot(df_test.index, df_test[target_col], label='Actual', linewidth=2, marker='o')

# Random Walk
plt.plot(df_test.index, rw_predictions, label='Random Walk', linewidth=2, marker='s', alpha=0.7)

# ARIMA
plt.plot(df_test.index, arima_predictions, label='ARIMA', linewidth=2, marker='^', alpha=0.7)

# VAR (if available)
if var_results:
    plt.plot(df_test.index, var_predictions_target, label='VAR', linewidth=2, marker='d', alpha=0.7)

plt.title('Econometric Models - Forecasts vs Actual', fontsize=16)
plt.xlabel('Date', fontsize=12)
plt.ylabel('Inflation Rate (%)', fontsize=12)
plt.legend(fontsize=11)
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig(RESULTS_PATH / 'figures' / 'econometric_forecasts.png', dpi=300, bbox_inches='tight')
plt.show()

## 6. Compare Model Performance

In [None]:
# Compile results
results_list = [rw_results, arima_results]
if var_results:
    results_list.append(var_results)

results_df = pd.DataFrame(results_list)
results_df = results_df.sort_values('RMSFE')

print("\n" + "="*50)
print("ECONOMETRIC MODELS PERFORMANCE SUMMARY")
print("="*50)
print(results_df.to_string(index=False))
print("="*50)

# Save results
results_df.to_csv(RESULTS_PATH / 'tables' / 'econometric_results.csv', index=False)
print("\n✓ Results saved to results/tables/econometric_results.csv")

In [None]:
# Save predictions for later comparison
predictions_df = pd.DataFrame({
    'Actual': df_test[target_col].values,
    'RW': rw_predictions,
    'ARIMA': arima_predictions
}, index=df_test.index)

if var_results:
    predictions_df['VAR'] = var_predictions_target

predictions_df.to_csv(RESULTS_PATH / 'tables' / 'econometric_predictions.csv')
print("✓ Predictions saved to results/tables/econometric_predictions.csv")

## Summary

**Econometric models completed:**
- ✓ Random Walk (RW)
- ✓ ARIMA
- ✓ VAR (if applicable)

**Next steps:**
- Run linear ML models (Notebook 03)
- Run nonlinear ML models (Notebook 04)
- Compare all models (Notebook 05)