[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/danpele/Time-Series-Analysis/blob/main/chapter10_lecture_notebook.ipynb)

---

# Chapter 10: Comprehensive Review

**Complete Time Series Analysis with Real Data**

**Course:** Time Series Analysis and Forecasting  
**Program:** Bachelor program, Faculty of Cybernetics, Statistics and Economic Informatics, Bucharest University of Economic Studies, Romania  
**Academic Year:** 2025-2026

---

## Learning Objectives

This comprehensive review demonstrates the complete time series analysis workflow using **real data**:

1. **Case Study 1: S&P 500** - Financial data analysis with ARIMA-GARCH
2. **Case Study 2: Air Passengers** - Classical seasonal data with SARIMA and Prophet
3. **Case Study 3: US Retail Sales** - Economic data with structural breaks

We will apply ALL methods covered in the course:
- Stationarity testing (ADF, KPSS)
- ARIMA/SARIMA modeling
- GARCH for volatility
- Prophet and TBATS
- Model comparison and evaluation

## Setup and Imports

In [None]:
# Install required packages (for Colab)
import sys
if 'google.colab' in sys.modules:
    !pip install prophet arch statsmodels -q

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

# Statistical tests and models
from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.seasonal import seasonal_decompose

# GARCH
try:
    from arch import arch_model
    HAS_ARCH = True
except ImportError:
    HAS_ARCH = False
    print("arch not installed. Install with: pip install arch")

# Prophet
try:
    from prophet import Prophet
    HAS_PROPHET = True
except ImportError:
    HAS_PROPHET = False
    print("Prophet not installed. Install with: pip install prophet")

from sklearn.metrics import mean_squared_error, mean_absolute_error

# Plotting style
plt.rcParams['figure.figsize'] = (14, 5)
plt.rcParams['font.size'] = 11
plt.rcParams['axes.facecolor'] = 'none'
plt.rcParams['figure.facecolor'] = 'none'
plt.rcParams['axes.grid'] = False
plt.rcParams['axes.spines.top'] = False
plt.rcParams['axes.spines.right'] = False
plt.rcParams['legend.frameon'] = False

COLORS = {'blue': '#1A3A6E', 'red': '#DC3545', 'green': '#2E7D32', 'orange': '#E67E22', 'gray': '#666666'}

print("Setup complete!")
print(f"ARCH/GARCH available: {HAS_ARCH}")
print(f"Prophet available: {HAS_PROPHET}")

## Real Data Loading Functions

In [None]:
def get_sp500_data():
    """S&P 500 simulated data based on real patterns (2019-2024)"""
    np.random.seed(42)
    dates = pd.date_range('2019-01-01', '2024-01-01', freq='B')
    n = len(dates)
    
    price = 2500
    prices = [price]
    
    for i in range(1, n):
        date = dates[i]
        # COVID crash
        if pd.Timestamp('2020-02-20') <= date <= pd.Timestamp('2020-03-23'):
            drift, vol = -0.015, 0.04
        # COVID recovery
        elif pd.Timestamp('2020-03-24') <= date <= pd.Timestamp('2020-08-01'):
            drift, vol = 0.003, 0.025
        # 2022 bear market
        elif pd.Timestamp('2022-01-01') <= date <= pd.Timestamp('2022-10-01'):
            drift, vol = -0.0005, 0.015
        else:
            drift, vol = 0.0003, 0.01
        
        ret = drift + vol * np.random.randn()
        price = prices[-1] * (1 + ret)
        prices.append(price)
    
    df = pd.DataFrame({'ds': dates, 'price': prices})
    df['returns'] = df['price'].pct_change() * 100
    return df


def get_air_passengers():
    """Classic Air Passengers dataset (1949-1960)"""
    data = [
        112, 118, 132, 129, 121, 135, 148, 148, 136, 119, 104, 118,
        115, 126, 141, 135, 125, 149, 170, 170, 158, 133, 114, 140,
        145, 150, 178, 163, 172, 178, 199, 199, 184, 162, 146, 166,
        171, 180, 193, 181, 183, 218, 230, 242, 209, 191, 172, 194,
        196, 196, 236, 235, 229, 243, 264, 272, 237, 211, 180, 201,
        204, 188, 235, 227, 234, 264, 302, 293, 259, 229, 203, 229,
        242, 233, 267, 269, 270, 315, 364, 347, 312, 274, 237, 278,
        284, 277, 317, 313, 318, 374, 413, 405, 355, 306, 271, 306,
        315, 301, 356, 348, 355, 422, 465, 467, 404, 347, 305, 336,
        340, 318, 362, 348, 363, 435, 491, 505, 404, 359, 310, 337,
        360, 342, 406, 396, 420, 472, 548, 559, 463, 407, 362, 405,
        417, 391, 419, 461, 472, 535, 622, 606, 508, 461, 390, 432
    ]
    dates = pd.date_range('1949-01-01', periods=len(data), freq='MS')
    return pd.DataFrame({'ds': dates, 'y': data})


def get_retail_sales():
    """US Retail Sales from FRED (2018-2023)"""
    data = [
        457.6, 459.1, 468.2, 469.2, 473.9, 477.6, 482.1, 483.0, 473.7, 476.2, 477.9, 502.7,
        455.6, 459.8, 472.0, 470.5, 479.3, 480.7, 485.9, 488.6, 479.9, 483.6, 481.7, 516.0,
        461.2, 461.5, 414.7, 384.9, 476.4, 509.3, 516.1, 521.7, 527.0, 524.7, 519.6, 553.3,
        510.6, 507.4, 560.1, 561.1, 567.0, 574.0, 582.0, 585.0, 581.0, 596.1, 595.6, 630.1,
        581.9, 587.8, 631.5, 613.8, 629.3, 633.0, 631.8, 638.7, 625.5, 641.0, 633.7, 671.9,
        620.6, 624.0, 670.2, 656.5, 666.3, 670.1, 673.2, 679.3, 668.6, 686.1, 672.3, 724.5
    ]
    dates = pd.date_range('2018-01-01', periods=len(data), freq='MS')
    return pd.DataFrame({'ds': dates, 'y': data})


print("Data loading functions defined!")

---
# Case Study 1: S&P 500 Financial Analysis

Financial time series require special treatment:
- Returns (not prices) for modeling
- GARCH for volatility clustering
- Often weak predictability in mean

In [None]:
# Load S&P 500 data
sp500 = get_sp500_data()

print("S&P 500 Data Overview")
print("="*50)
print(f"Period: {sp500['ds'].min().date()} to {sp500['ds'].max().date()}")
print(f"Observations: {len(sp500)} trading days")
print(f"\nPrice Statistics:")
print(f"  Min: ${sp500['price'].min():.2f}")
print(f"  Max: ${sp500['price'].max():.2f}")
print(f"\nReturn Statistics:")
print(f"  Mean: {sp500['returns'].mean():.4f}%")
print(f"  Std: {sp500['returns'].std():.4f}%")
print(f"  Skewness: {sp500['returns'].skew():.4f}")
print(f"  Kurtosis: {sp500['returns'].kurtosis():.4f}")

In [None]:
# Visualize prices and returns
fig, axes = plt.subplots(2, 1, figsize=(14, 8))

# Prices
axes[0].plot(sp500['ds'], sp500['price'], color=COLORS['blue'], linewidth=1)
axes[0].axvspan(pd.Timestamp('2020-02-20'), pd.Timestamp('2020-03-23'),
                alpha=0.3, color=COLORS['red'], label='COVID-19 Crash')
axes[0].set_title('S&P 500 Daily Prices (2019-2024)', fontweight='bold')
axes[0].set_ylabel('Price')
axes[0].legend(loc='upper left')

# Returns
axes[1].plot(sp500['ds'], sp500['returns'], color=COLORS['green'], linewidth=0.5)
axes[1].axhline(y=0, color='black', linewidth=0.5, alpha=0.3)
axes[1].set_title('S&P 500 Daily Returns (%)', fontweight='bold')
axes[1].set_xlabel('Date')
axes[1].set_ylabel('Return (%)')

plt.tight_layout()
plt.show()

print("Key Observations:")
print("- COVID-19 crash (Feb-Mar 2020): Sharp decline")
print("- Volatility clustering: Large returns followed by large returns")
print("- Returns appear stationary, prices do not")

## Step 1: Stationarity Testing

In [None]:
def test_stationarity(series, name):
    """Run ADF and KPSS tests"""
    print(f"\nStationarity Tests for {name}")
    print("-" * 40)
    
    # ADF Test
    adf_result = adfuller(series.dropna(), autolag='AIC')
    print(f"ADF Test:")
    print(f"  Statistic: {adf_result[0]:.4f}")
    print(f"  p-value: {adf_result[1]:.4f}")
    print(f"  Conclusion: {'Stationary' if adf_result[1] < 0.05 else 'Non-stationary'}")
    
    # KPSS Test
    kpss_result = kpss(series.dropna(), regression='c', nlags='auto')
    print(f"\nKPSS Test:")
    print(f"  Statistic: {kpss_result[0]:.4f}")
    print(f"  p-value: {kpss_result[1]:.4f}")
    print(f"  Conclusion: {'Stationary' if kpss_result[1] > 0.05 else 'Non-stationary'}")
    
    return adf_result[1] < 0.05 and kpss_result[1] > 0.05

# Test prices
prices_stationary = test_stationarity(sp500['price'], 'Prices')

# Test returns
returns_stationary = test_stationarity(sp500['returns'].dropna(), 'Returns')

print("\n" + "="*50)
print("CONCLUSION: Use RETURNS for modeling (stationary)")

## Step 2: ACF/PACF Analysis

In [None]:
returns = sp500['returns'].dropna()

fig, axes = plt.subplots(2, 2, figsize=(14, 8))

# ACF/PACF for returns
plot_acf(returns, ax=axes[0, 0], lags=30, alpha=0.05)
axes[0, 0].set_title('ACF: Returns', fontweight='bold')

plot_pacf(returns, ax=axes[0, 1], lags=30, alpha=0.05)
axes[0, 1].set_title('PACF: Returns', fontweight='bold')

# ACF/PACF for squared returns (volatility)
plot_acf(returns**2, ax=axes[1, 0], lags=30, alpha=0.05)
axes[1, 0].set_title('ACF: Squared Returns (Volatility)', fontweight='bold')

plot_pacf(returns**2, ax=axes[1, 1], lags=30, alpha=0.05)
axes[1, 1].set_title('PACF: Squared Returns (Volatility)', fontweight='bold')

plt.tight_layout()
plt.show()

print("Key Findings:")
print("- Returns: Near white noise (weak autocorrelation)")
print("- Squared returns: Strong persistence → GARCH needed!")

## Step 3: ARIMA-GARCH Model

In [None]:
if HAS_ARCH:
    # Fit GARCH(1,1) model
    print("Fitting GARCH(1,1) Model")
    print("="*50)
    
    model = arch_model(returns, vol='Garch', p=1, q=1, mean='AR', lags=1)
    results = model.fit(disp='off')
    
    print(results.summary())
else:
    print("ARCH package not available. Install with: pip install arch")

In [None]:
if HAS_ARCH:
    # Plot conditional volatility
    fig, axes = plt.subplots(2, 1, figsize=(14, 8))
    
    # Returns with volatility bands
    cond_vol = results.conditional_volatility
    axes[0].plot(sp500['ds'][1:], returns, color=COLORS['blue'], linewidth=0.5, alpha=0.7)
    axes[0].plot(sp500['ds'][1:], 2*cond_vol, color=COLORS['red'], linewidth=1, label='+2σ')
    axes[0].plot(sp500['ds'][1:], -2*cond_vol, color=COLORS['red'], linewidth=1, label='-2σ')
    axes[0].axhline(y=0, color='black', linewidth=0.5, alpha=0.3)
    axes[0].set_title('S&P 500 Returns with GARCH(1,1) Volatility Bands', fontweight='bold')
    axes[0].set_ylabel('Return (%)')
    axes[0].legend()
    
    # Conditional volatility
    axes[1].fill_between(sp500['ds'][1:], 0, cond_vol, color=COLORS['orange'], alpha=0.7)
    axes[1].set_title('GARCH(1,1) Conditional Volatility', fontweight='bold')
    axes[1].set_xlabel('Date')
    axes[1].set_ylabel('Volatility (σ)')
    
    # Mark COVID
    axes[1].axvspan(pd.Timestamp('2020-02-20'), pd.Timestamp('2020-04-30'),
                    alpha=0.3, color=COLORS['red'])
    
    plt.tight_layout()
    plt.show()
    
    print("GARCH Model Interpretation:")
    print(f"- α (ARCH): {results.params['alpha[1]']:.4f}")
    print(f"- β (GARCH): {results.params['beta[1]']:.4f}")
    print(f"- Persistence (α+β): {results.params['alpha[1]'] + results.params['beta[1]']:.4f}")
    print("\nHigh persistence indicates volatility shocks are long-lasting.")

---
# Case Study 2: Air Passengers with Seasonality

The classic dataset demonstrating:
- Trend + Seasonality
- Multiplicative pattern
- SARIMA vs Prophet comparison

In [None]:
# Load Air Passengers
air = get_air_passengers()

print("Air Passengers Data Overview")
print("="*50)
print(f"Period: {air['ds'].min().strftime('%Y-%m')} to {air['ds'].max().strftime('%Y-%m')}")
print(f"Observations: {len(air)} months")
print(f"\nStatistics:")
print(f"  Mean: {air['y'].mean():.1f}")
print(f"  Std: {air['y'].std():.1f}")
print(f"  Min: {air['y'].min():.0f} ({air.loc[air['y'].idxmin(), 'ds'].strftime('%Y-%m')})")
print(f"  Max: {air['y'].max():.0f} ({air.loc[air['y'].idxmax(), 'ds'].strftime('%Y-%m')})")

In [None]:
# Visualize with decomposition
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Original series
axes[0, 0].plot(air['ds'], air['y'], color=COLORS['blue'], linewidth=1.5)
axes[0, 0].set_title('Air Passengers (1949-1960)', fontweight='bold')
axes[0, 0].set_ylabel('Passengers (thousands)')

# Decomposition
air_series = pd.Series(air['y'].values, index=air['ds'])
decomposition = seasonal_decompose(air_series, model='multiplicative', period=12)

axes[0, 1].plot(air['ds'], decomposition.trend, color=COLORS['green'], linewidth=2)
axes[0, 1].set_title('Trend Component', fontweight='bold')

axes[1, 0].plot(air['ds'], decomposition.seasonal, color=COLORS['orange'], linewidth=1)
axes[1, 0].axhline(y=1, color='black', linewidth=0.5, linestyle='--')
axes[1, 0].set_title('Seasonal Component (Multiplicative)', fontweight='bold')

axes[1, 1].plot(air['ds'], decomposition.resid, color=COLORS['red'], linewidth=1)
axes[1, 1].axhline(y=1, color='black', linewidth=0.5, linestyle='--')
axes[1, 1].set_title('Residual Component', fontweight='bold')

plt.tight_layout()
plt.show()

print("Multiplicative Model: Y = Trend × Seasonal × Residual")
print("Peak season: July-August (vacation travel)")

## SARIMA Model Fitting

In [None]:
# Prepare data
y = air['y'].values
train_size = 120  # 10 years training
train, test = y[:train_size], y[train_size:]
test_dates = air['ds'][train_size:]

# Fit SARIMA model on log-transformed data
log_train = np.log(train)

print("Fitting SARIMA(2,1,1)(0,1,1)12 Model")
print("="*50)

sarima_model = SARIMAX(log_train, order=(2, 1, 1), seasonal_order=(0, 1, 1, 12))
sarima_results = sarima_model.fit(disp=False)

print(sarima_results.summary().tables[1])

In [None]:
# Forecast
sarima_forecast_log = sarima_results.get_forecast(steps=len(test))
sarima_forecast = np.exp(sarima_forecast_log.predicted_mean)
sarima_ci = np.exp(sarima_forecast_log.conf_int())

# Metrics
sarima_rmse = np.sqrt(mean_squared_error(test, sarima_forecast))
sarima_mape = np.mean(np.abs((test - sarima_forecast) / test)) * 100

print(f"\nSARIMA Forecast Performance:")
print(f"  RMSE: {sarima_rmse:.2f}")
print(f"  MAPE: {sarima_mape:.2f}%")

## Prophet Model for Comparison

In [None]:
if HAS_PROPHET:
    # Prepare data for Prophet
    train_prophet = pd.DataFrame({
        'ds': air['ds'][:train_size],
        'y': train
    })
    
    # Fit Prophet
    print("Fitting Prophet Model")
    print("="*50)
    
    prophet_model = Prophet(
        yearly_seasonality=True,
        weekly_seasonality=False,
        daily_seasonality=False,
        seasonality_mode='multiplicative'
    )
    prophet_model.fit(train_prophet)
    
    # Forecast
    future = prophet_model.make_future_dataframe(periods=len(test), freq='MS')
    prophet_forecast_df = prophet_model.predict(future)
    prophet_forecast = prophet_forecast_df['yhat'].iloc[-len(test):].values
    
    # Metrics
    prophet_rmse = np.sqrt(mean_squared_error(test, prophet_forecast))
    prophet_mape = np.mean(np.abs((test - prophet_forecast) / test)) * 100
    
    print(f"\nProphet Forecast Performance:")
    print(f"  RMSE: {prophet_rmse:.2f}")
    print(f"  MAPE: {prophet_mape:.2f}%")
else:
    print("Prophet not available")

In [None]:
# Compare SARIMA and Prophet
fig, axes = plt.subplots(2, 1, figsize=(14, 8))

# Forecasts
axes[0].plot(air['ds'][:train_size], train, color=COLORS['blue'], linewidth=1, label='Training')
axes[0].plot(test_dates, test, color=COLORS['blue'], linewidth=1.5, label='Actual')
axes[0].plot(test_dates, sarima_forecast, color=COLORS['green'], linewidth=1.5,
             linestyle='--', label=f'SARIMA (RMSE={sarima_rmse:.1f})')

if HAS_PROPHET:
    axes[0].plot(test_dates, prophet_forecast, color=COLORS['orange'], linewidth=1.5,
                 linestyle=':', label=f'Prophet (RMSE={prophet_rmse:.1f})')

axes[0].axvline(x=air['ds'].iloc[train_size], color='black', linestyle=':', alpha=0.5)
axes[0].set_title('Air Passengers: SARIMA vs Prophet Forecast', fontweight='bold')
axes[0].set_ylabel('Passengers (thousands)')
axes[0].legend(loc='upper left')

# Forecast errors
sarima_errors = test - sarima_forecast
axes[1].bar(test_dates, sarima_errors, width=20, alpha=0.7, color=COLORS['green'], label='SARIMA')

if HAS_PROPHET:
    prophet_errors = test - prophet_forecast
    axes[1].bar(test_dates + pd.Timedelta(days=10), prophet_errors, width=20, 
                alpha=0.7, color=COLORS['orange'], label='Prophet')

axes[1].axhline(y=0, color='black', linewidth=0.5)
axes[1].set_title('Forecast Errors', fontweight='bold')
axes[1].set_xlabel('Date')
axes[1].set_ylabel('Error')
axes[1].legend()

plt.tight_layout()
plt.show()

---
# Case Study 3: US Retail Sales with Structural Break

Real-world challenge: COVID-19 created a massive structural break in economic data.

In [None]:
# Load Retail Sales
retail = get_retail_sales()

print("US Retail Sales Data Overview")
print("="*50)
print(f"Period: {retail['ds'].min().strftime('%Y-%m')} to {retail['ds'].max().strftime('%Y-%m')}")
print(f"Observations: {len(retail)} months")
print(f"\nKey Events:")
print(f"  - COVID-19 Impact: March-April 2020")
print(f"  - Minimum: ${retail['y'].min():.1f}B (April 2020)")
print(f"  - Maximum: ${retail['y'].max():.1f}B (December 2023)")

In [None]:
# Visualize with COVID impact
fig, ax = plt.subplots(figsize=(14, 6))

ax.plot(retail['ds'], retail['y'], color=COLORS['blue'], linewidth=1.5)
ax.axvspan(pd.Timestamp('2020-03-01'), pd.Timestamp('2020-05-01'),
           alpha=0.3, color=COLORS['red'], label='COVID-19 Impact')

# Pre-COVID trend
pre_covid = retail[retail['ds'] < '2020-03-01']
z1 = np.polyfit(range(len(pre_covid)), pre_covid['y'], 1)
ax.plot(pre_covid['ds'], np.polyval(z1, range(len(pre_covid))),
        color=COLORS['gray'], linewidth=2, linestyle='--', label='Pre-COVID Trend')

# Post-COVID trend
post_covid = retail[retail['ds'] >= '2020-06-01']
z2 = np.polyfit(range(len(post_covid)), post_covid['y'], 1)
ax.plot(post_covid['ds'], np.polyval(z2, range(len(post_covid))),
        color=COLORS['green'], linewidth=2, linestyle='--', label='Post-COVID Trend')

ax.set_title('US Retail Sales (2018-2023): COVID-19 Structural Break', fontweight='bold')
ax.set_xlabel('Date')
ax.set_ylabel('Sales ($ billions)')
ax.legend()

plt.tight_layout()
plt.show()

print("\nChallenge: Traditional ARIMA struggles with structural breaks.")
print("Solution: Prophet's changepoint detection or use post-COVID data only.")

In [None]:
if HAS_PROPHET:
    # Prophet handles structural breaks well
    retail_prophet = retail.copy()
    
    # Split data
    train_retail = retail_prophet.iloc[:-12]
    test_retail = retail_prophet.iloc[-12:]
    
    # Fit Prophet with flexible changepoints
    print("Fitting Prophet with Changepoint Detection")
    print("="*50)
    
    prophet_retail = Prophet(
        changepoint_prior_scale=0.1,  # More flexible for COVID
        seasonality_mode='multiplicative',
        yearly_seasonality=True
    )
    prophet_retail.fit(train_retail)
    
    # Forecast
    future_retail = prophet_retail.make_future_dataframe(periods=12, freq='MS')
    forecast_retail = prophet_retail.predict(future_retail)
    
    # Evaluate
    pred_retail = forecast_retail['yhat'].iloc[-12:].values
    retail_rmse = np.sqrt(mean_squared_error(test_retail['y'], pred_retail))
    retail_mape = np.mean(np.abs((test_retail['y'].values - pred_retail) / test_retail['y'].values)) * 100
    
    print(f"\nProphet Forecast Performance:")
    print(f"  RMSE: ${retail_rmse:.2f}B")
    print(f"  MAPE: {retail_mape:.2f}%")
    
    # Plot
    fig = prophet_retail.plot_components(forecast_retail)
    plt.suptitle('Prophet Components: US Retail Sales', fontweight='bold', y=1.02)
    plt.tight_layout()
    plt.show()

---
# Summary: Model Selection Guide

In [None]:
print("""
╔══════════════════════════════════════════════════════════════════════╗
║                    TIME SERIES MODEL SELECTION GUIDE                  ║
╠══════════════════════════════════════════════════════════════════════╣
║                                                                      ║
║  DATA TYPE              RECOMMENDED MODEL         ALTERNATIVES       ║
║  ──────────────────────────────────────────────────────────────────  ║
║                                                                      ║
║  Financial returns      ARIMA-GARCH               EGARCH, GJR-GARCH  ║
║  (volatility)                                                        ║
║                                                                      ║
║  Single seasonality     SARIMA                    ETS, Prophet       ║
║  (monthly, quarterly)                                                ║
║                                                                      ║
║  Multiple seasonality   Prophet, TBATS            Dynamic regression ║
║  (daily+weekly+annual)                                               ║
║                                                                      ║
║  Structural breaks      Prophet                   Piecewise models   ║
║  (COVID, regime change)                                              ║
║                                                                      ║
║  Multiple time series   VAR, VECM                 Factor models      ║
║  (interdependencies)                                                 ║
║                                                                      ║
╠══════════════════════════════════════════════════════════════════════╣
║                         WORKFLOW STEPS                               ║
║  1. Visualize and explore the data                                   ║
║  2. Test for stationarity (ADF, KPSS)                                ║
║  3. Apply transformations if needed (log, diff)                      ║
║  4. Identify patterns (ACF/PACF, decomposition)                      ║
║  5. Fit candidate models                                             ║
║  6. Check diagnostics (residuals, Ljung-Box)                         ║
║  7. Compare with out-of-sample forecast                              ║
║  8. Select best model for the task                                   ║
╚══════════════════════════════════════════════════════════════════════╝
""")

In [None]:
# Final comparison table
print("\nCourse Case Studies Summary")
print("="*70)
print(f"{'Dataset':<20} {'Best Model':<20} {'RMSE':<15} {'MAPE (%)':<10}")
print("-"*70)
print(f"{'S&P 500 Returns':<20} {'ARIMA-GARCH(1,1)':<20} {'N/A (vol)':<15} {'N/A':<10}")
print(f"{'Air Passengers':<20} {'SARIMA(2,1,1)':<20} {sarima_rmse:<15.2f} {sarima_mape:<10.2f}")

if HAS_PROPHET:
    print(f"{'US Retail Sales':<20} {'Prophet':<20} {retail_rmse:<15.2f} {retail_mape:<10.2f}")

print("-"*70)
print("\nKey Takeaways:")
print("• Financial data: Focus on volatility (GARCH), not mean prediction")
print("• Seasonal data: SARIMA works well for clean, single-season data")
print("• Structural breaks: Prophet's flexibility handles regime changes")
print("• Always validate with out-of-sample testing!")