# TSA Chapter 1: Unit Root Tests (ADF and KPSS)

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/QuantLet/TSA/blob/main/TSA_Ch1/TSA_ch1_unit_root_tests/TSA_ch1_unit_root_tests.ipynb)

This notebook demonstrates:
- ADF test on S&P 500 prices (non-stationary) and returns (stationary)
- KPSS test for stationarity
- Combined ADF + KPSS testing framework
- Visual comparison of stationary vs non-stationary series

In [None]:
!pip install yfinance matplotlib numpy scipy statsmodels pandas -q

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller, kpss
import warnings
warnings.filterwarnings('ignore')

try:
    import yfinance as yf
    YF_AVAILABLE = True
except ImportError:
    YF_AVAILABLE = False

In [None]:
# Style configuration
COLORS = {
    'blue': '#1A3A6E',
    'red': '#DC3545',
    'green': '#2E7D32',
    'orange': '#E67E22',
    'gray': '#666666',
    'purple': '#8E44AD',
}

plt.rcParams.update({
    'axes.facecolor': 'none',
    'figure.facecolor': 'none',
    'savefig.transparent': True,
    'axes.spines.top': False,
    'axes.spines.right': False,
    'axes.grid': False,
    'font.size': 9,
    'axes.titlesize': 10,
    'axes.labelsize': 9,
    'xtick.labelsize': 8,
    'ytick.labelsize': 8,
    'legend.fontsize': 8,
    'figure.dpi': 150,
    'lines.linewidth': 1.2,
    'axes.edgecolor': '#333333',
    'axes.linewidth': 0.8,
})

np.random.seed(42)

def save_chart(fig, name):
    fig.savefig(f'{name}.pdf', bbox_inches='tight', transparent=True, dpi=150)
    fig.savefig(f'{name}.png', bbox_inches='tight', transparent=True, dpi=150)
    try:
        charts_path = os.path.join('..', '..', '..', 'charts', name)
        fig.savefig(f'{charts_path}.pdf', bbox_inches='tight', transparent=True, dpi=150)
        fig.savefig(f'{charts_path}.png', bbox_inches='tight', transparent=True, dpi=150)
    except Exception:
        pass
    print(f'Saved: {name}.pdf + .png')

def generate_ar1(n, phi, sigma=1.0):
    x = np.zeros(n)
    eps = np.random.normal(0, sigma, n)
    for t in range(1, n):
        x[t] = phi * x[t-1] + eps[t]
    return x

def generate_random_walk(n, sigma=1.0, drift=0.0):
    eps = np.random.normal(0, sigma, n)
    return np.cumsum(eps) + drift * np.arange(n)

In [None]:
# Load S&P 500 data
SP500_LOADED = False
if YF_AVAILABLE:
    try:
        df = yf.download('^GSPC', start='2020-01-01', end='2025-12-31', progress=False)
        close = df['Close'].squeeze().dropna()
        prices = close.values
        log_ret = np.diff(np.log(prices))
        dates = close.index
        SP500_LOADED = True
        print(f'S&P 500 loaded: {len(close)} observations')
    except Exception as e:
        print(f'Yahoo Finance failed: {e}')

if not SP500_LOADED:
    np.random.seed(42)
    n_synth = 1250
    lr = np.random.normal(0.0003, 0.012, n_synth)
    prices = 3000 * np.exp(np.cumsum(lr))
    log_ret = lr[1:]
    dates = pd.date_range('2020-01-02', periods=n_synth, freq='B')

In [None]:
# Chart: adf_test_visualization
# ADF test visualization with prices and returns
adf_prices = adfuller(prices, maxlag=20, autolag='AIC')
adf_returns = adfuller(log_ret, maxlag=20, autolag='AIC')

fig, axes = plt.subplots(1, 2, figsize=(8, 2.8))

axes[0].plot(dates[:len(prices)], prices, color=COLORS['blue'], linewidth=0.8)
axes[0].set_title(f'S&P 500 Prices: ADF = {adf_prices[0]:.2f} (p = {adf_prices[1]:.2f})\nNon-stationary',
                  fontsize=9, fontweight='bold')
axes[0].set_xlabel('Date')
axes[0].set_ylabel('Price')
axes[0].tick_params(axis='x', rotation=30)
axes[0].annotate('Do not reject $H_0$\n(unit root)',
                 xy=(0.5, 0.85), xycoords='axes fraction',
                 fontsize=8, color=COLORS['red'], fontweight='bold',
                 ha='center',
                 bbox=dict(boxstyle='round,pad=0.3', facecolor='white', edgecolor=COLORS['red'], alpha=0.8))

p_str = f'p = {adf_returns[1]:.4f}' if adf_returns[1] >= 0.01 else 'p < 0.01'
axes[1].plot(dates[1:len(log_ret)+1], log_ret, color=COLORS['green'], linewidth=0.4, alpha=0.8)
axes[1].axhline(0, color=COLORS['gray'], linewidth=0.5, linestyle='--')
axes[1].set_title(f'S&P 500 Returns: ADF = {adf_returns[0]:.1f} ({p_str})\nStationary',
                  fontsize=9, fontweight='bold')
axes[1].set_xlabel('Date')
axes[1].set_ylabel('Return')
axes[1].tick_params(axis='x', rotation=30)
axes[1].annotate('Reject $H_0$\n(stationary)',
                 xy=(0.5, 0.85), xycoords='axes fraction',
                 fontsize=8, color=COLORS['green'], fontweight='bold',
                 ha='center',
                 bbox=dict(boxstyle='round,pad=0.3', facecolor='white', edgecolor=COLORS['green'], alpha=0.8))

fig.tight_layout(rect=[0, 0.05, 1, 1])
save_chart(fig, 'adf_test_visualization')
plt.show()

In [None]:
# Chart: ch1_stationarity (KPSS illustration)
np.random.seed(42)
n = 200

fig, axes = plt.subplots(1, 2, figsize=(8, 2.8))

y_stat = generate_ar1(n, phi=0.5, sigma=1.0)
axes[0].plot(y_stat, color=COLORS['green'], linewidth=0.7)
axes[0].axhline(y_stat.mean(), color=COLORS['red'], linewidth=0.8, linestyle='--')
axes[0].set_title('KPSS: Do not reject $H_0$\n(Stationary)', fontsize=9, fontweight='bold', color=COLORS['green'])
axes[0].set_xlabel('Time')
axes[0].set_ylabel(r'$X_t$')
axes[0].annotate('$H_0$: Stationary',
                 xy=(0.5, 0.9), xycoords='axes fraction', ha='center',
                 fontsize=8, color=COLORS['green'], fontweight='bold',
                 bbox=dict(boxstyle='round', facecolor='white', edgecolor=COLORS['green'], alpha=0.8))

y_ns = generate_random_walk(n)
axes[1].plot(y_ns, color=COLORS['red'], linewidth=0.8)
axes[1].set_title('KPSS: Reject $H_0$\n(Non-stationary)', fontsize=9, fontweight='bold', color=COLORS['red'])
axes[1].set_xlabel('Time')
axes[1].set_ylabel(r'$X_t$')
axes[1].annotate('$H_1$: Unit root',
                 xy=(0.5, 0.9), xycoords='axes fraction', ha='center',
                 fontsize=8, color=COLORS['red'], fontweight='bold',
                 bbox=dict(boxstyle='round', facecolor='white', edgecolor=COLORS['red'], alpha=0.8))

fig.tight_layout(rect=[0, 0.05, 1, 1])
save_chart(fig, 'ch1_stationarity')
plt.show()

In [None]:
# Combined ADF + KPSS testing framework
np.random.seed(42)
n = 500

# Test series
ar1_stationary = generate_ar1(n, phi=0.7)
random_walk = generate_random_walk(n)
t_arr = np.arange(n)
trend_stationary = 0.05 * t_arr + np.random.normal(0, 1, n)
rw_drift = generate_random_walk(n, drift=0.1)

def run_tests(series, name):
    adf_result = adfuller(series, autolag='AIC')
    kpss_result = kpss(series, regression='c', nlags='auto')
    adf_reject = adf_result[1] < 0.05
    kpss_reject = kpss_result[1] < 0.05
    if adf_reject and not kpss_reject:
        conclusion = 'STATIONARY'
    elif not adf_reject and kpss_reject:
        conclusion = 'UNIT ROOT'
    elif adf_reject and kpss_reject:
        conclusion = 'TREND STAT.'
    else:
        conclusion = 'UNCERTAIN'
    return {'name': name, 'adf_stat': adf_result[0], 'adf_pval': adf_result[1],
            'kpss_stat': kpss_result[0], 'kpss_pval': kpss_result[1], 'conclusion': conclusion}

results = [
    run_tests(ar1_stationary, 'AR(1) Stationary'),
    run_tests(random_walk, 'Random Walk'),
    run_tests(trend_stationary, 'Trend Stationary'),
    run_tests(rw_drift, 'RW with Drift'),
]

print('='*80)
print('COMBINED ADF + KPSS TEST RESULTS')
print('='*80)
print(f'{"Series":<20} {"ADF Stat":>10} {"ADF p-val":>10} {"KPSS Stat":>10} {"KPSS p-val":>10} {"Conclusion":>15}')
print('-'*80)
for r in results:
    print(f'{r["name"]:<20} {r["adf_stat"]:>10.3f} {r["adf_pval"]:>10.4f} {r["kpss_stat"]:>10.3f} {r["kpss_pval"]:>10.4f} {r["conclusion"]:>15}')

print('\nDecision framework:')
print('  ADF rejects + KPSS does not reject => STATIONARY')
print('  ADF does not reject + KPSS rejects => UNIT ROOT')
print('  Both reject => TREND STATIONARY')
print('  Neither rejects => UNCERTAIN')