# TSA Chapter 1: Stochastic Processes and Stationarity

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/QuantLet/TSA/blob/main/TSA_Ch1/TSA_ch1_stationarity/TSA_ch1_stationarity.ipynb)

This notebook demonstrates:
- Stochastic processes: multiple realizations
- Strict vs weak (covariance) stationarity
- Counterexample: weak stationary but NOT strictly stationary
- Ergodicity: time average converges to ensemble average
- Stationary vs non-stationary examples (S&P 500)

In [None]:
!pip install yfinance matplotlib numpy scipy statsmodels pandas -q

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
from statsmodels.tsa.stattools import acf

try:
    import yfinance as yf
    YF_AVAILABLE = True
except ImportError:
    YF_AVAILABLE = False

In [None]:
# Style configuration
COLORS = {
    'blue': '#1A3A6E',
    'red': '#DC3545',
    'green': '#2E7D32',
    'orange': '#E67E22',
    'gray': '#666666',
    'purple': '#8E44AD',
    'light_blue': '#4A7ABE',
    'light_red': '#E8808A',
}

plt.rcParams.update({
    'axes.facecolor': 'none',
    'figure.facecolor': 'none',
    'savefig.transparent': True,
    'axes.spines.top': False,
    'axes.spines.right': False,
    'axes.grid': False,
    'font.size': 9,
    'axes.titlesize': 10,
    'axes.labelsize': 9,
    'xtick.labelsize': 8,
    'ytick.labelsize': 8,
    'legend.fontsize': 8,
    'figure.dpi': 150,
    'lines.linewidth': 1.2,
    'axes.edgecolor': '#333333',
    'axes.linewidth': 0.8,
})

np.random.seed(42)

def save_chart(fig, name):
    fig.savefig(f'{name}.pdf', bbox_inches='tight', transparent=True, dpi=150)
    fig.savefig(f'{name}.png', bbox_inches='tight', transparent=True, dpi=150)
    try:
        charts_path = os.path.join('..', '..', '..', 'charts', name)
        fig.savefig(f'{charts_path}.pdf', bbox_inches='tight', transparent=True, dpi=150)
        fig.savefig(f'{charts_path}.png', bbox_inches='tight', transparent=True, dpi=150)
    except Exception:
        pass
    print(f'Saved: {name}.pdf + .png')

def add_legend_below(ax, ncol=3):
    ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.18), ncol=ncol, frameon=False)

def generate_ar1(n, phi, sigma=1.0):
    x = np.zeros(n)
    eps = np.random.normal(0, sigma, n)
    for t in range(1, n):
        x[t] = phi * x[t-1] + eps[t]
    return x

def generate_random_walk(n, sigma=1.0, drift=0.0):
    eps = np.random.normal(0, sigma, n)
    return np.cumsum(eps) + drift * np.arange(n)

def generate_white_noise(n, sigma=1.0):
    return np.random.normal(0, sigma, n)

In [None]:
# Chart: ch1_def_stochastic
# Multiple realizations of a stochastic process
np.random.seed(42)
n = 200
colors_list = [COLORS['blue'], COLORS['red'], COLORS['green'], COLORS['orange'],
               COLORS['purple'], COLORS['gray'], COLORS['light_blue'], COLORS['light_red']]

fig, ax = plt.subplots(figsize=(7, 3.0))
for i in range(8):
    y = generate_ar1(n, phi=0.7, sigma=1.0)
    ax.plot(y, color=colors_list[i % len(colors_list)], alpha=0.6, linewidth=0.8,
            label=f'Realization {i+1}' if i < 5 else None)

ax.axhline(0, color='black', linewidth=0.5, linestyle='--', alpha=0.4)
ax.set_title('Multiple realizations of an AR(1) stochastic process', fontsize=10, fontweight='bold')
ax.set_xlabel('Time (t)')
ax.set_ylabel(r'$X_t(\omega)$')
add_legend_below(ax, ncol=5)
fig.tight_layout(rect=[0, 0.05, 1, 1])
save_chart(fig, 'ch1_def_stochastic')
plt.show()

In [None]:
# Chart: ch1_def_strict_stationarity
# Strict stationarity: distribution invariant under time shift
np.random.seed(42)
n = 300
y = generate_ar1(n, phi=0.5, sigma=1.0)

fig, axes = plt.subplots(1, 3, figsize=(8, 2.8))

axes[0].plot(y, color=COLORS['blue'], linewidth=0.7)
axes[0].axhline(0, color=COLORS['gray'], linewidth=0.5, linestyle='--')
axes[0].axvspan(50, 100, alpha=0.15, color=COLORS['red'])
axes[0].axvspan(180, 230, alpha=0.15, color=COLORS['green'])
axes[0].set_title('Stationary series', fontsize=9, fontweight='bold')
axes[0].set_xlabel('Time')
axes[0].set_ylabel(r'$X_t$')

w1 = y[50:100]
axes[1].hist(w1, bins=15, color=COLORS['red'], alpha=0.6, edgecolor='white', density=True)
x_range = np.linspace(w1.min()-1, w1.max()+1, 100)
axes[1].plot(x_range, stats.norm.pdf(x_range, w1.mean(), w1.std()), color=COLORS['red'], linewidth=1.5)
axes[1].set_title('Window 1 (t=50-100)', fontsize=9, fontweight='bold')
axes[1].set_xlabel(r'$X_t$')
axes[1].set_ylabel('Density')

w2 = y[180:230]
axes[2].hist(w2, bins=15, color=COLORS['green'], alpha=0.6, edgecolor='white', density=True)
x_range2 = np.linspace(w2.min()-1, w2.max()+1, 100)
axes[2].plot(x_range2, stats.norm.pdf(x_range2, w2.mean(), w2.std()), color=COLORS['green'], linewidth=1.5)
axes[2].set_title('Window 2 (t=180-230)', fontsize=9, fontweight='bold')
axes[2].set_xlabel(r'$X_t$')
axes[2].set_ylabel('Density')

fig.tight_layout(rect=[0, 0.05, 1, 1])
save_chart(fig, 'ch1_def_strict_stationarity')
plt.show()

In [None]:
# Chart: ch1_def_weak_stationarity
# Weak stationarity: constant mean, variance, and autocovariance
np.random.seed(42)
n = 300
y = generate_ar1(n, phi=0.5, sigma=1.0)

fig, axes = plt.subplots(1, 3, figsize=(8, 2.8))

mu = y.mean()
sigma = y.std()
axes[0].plot(y, color=COLORS['blue'], linewidth=0.7, label=r'$X_t$')
axes[0].axhline(mu, color=COLORS['red'], linewidth=1.0, linestyle='--', label=r'$\mu$')
axes[0].axhline(mu + 2*sigma, color=COLORS['orange'], linewidth=0.8, linestyle=':', label=r'$\mu \pm 2\sigma$')
axes[0].axhline(mu - 2*sigma, color=COLORS['orange'], linewidth=0.8, linestyle=':')
axes[0].set_title(r'$E[X_t] = \mu$, $Var(X_t) = \sigma^2$', fontsize=9, fontweight='bold')
axes[0].set_xlabel('Time')
axes[0].set_ylabel(r'$X_t$')
axes[0].legend(fontsize=7, loc='upper right', frameon=False)

window = 50
rolling_mean = pd.Series(y).rolling(window).mean()
axes[1].plot(rolling_mean, color=COLORS['red'], linewidth=1.0, label='Rolling mean')
axes[1].axhline(mu, color=COLORS['gray'], linewidth=0.8, linestyle='--', label=r'$\mu$ total')
axes[1].set_title('Constant mean over time', fontsize=9, fontweight='bold')
axes[1].set_xlabel('Time')
axes[1].set_ylabel('Mean')
axes[1].legend(fontsize=7, loc='upper right', frameon=False)

acf_vals = acf(y, nlags=20)
axes[2].bar(range(len(acf_vals)), acf_vals, color=COLORS['blue'], width=0.5, alpha=0.7)
axes[2].axhline(1.96/np.sqrt(n), color=COLORS['red'], linewidth=0.8, linestyle='--', label=r'$\pm 1.96/\sqrt{T}$')
axes[2].axhline(-1.96/np.sqrt(n), color=COLORS['red'], linewidth=0.8, linestyle='--')
axes[2].set_title(r'ACF: $\gamma(h)$ depends only on $h$', fontsize=9, fontweight='bold')
axes[2].set_xlabel('Lag')
axes[2].set_ylabel(r'$\hat{\rho}(h)$')
axes[2].legend(fontsize=7, loc='upper right', frameon=False)

fig.tight_layout(w_pad=2.0)
save_chart(fig, 'ch1_def_weak_stationarity')
plt.show()

In [None]:
# Chart: ch1_counterexample_stationarity
# Weak stationary but NOT strictly stationary
np.random.seed(42)
n = 200

x = np.zeros(n)
for t in range(n):
    if t % 2 == 0:
        x[t] = np.random.normal(0, 1)
    else:
        x[t] = (np.random.chisquare(5) - 5) / np.sqrt(10)

fig, axes = plt.subplots(1, 3, figsize=(8, 2.5))

axes[0].plot(x, color=COLORS['blue'], linewidth=0.6)
axes[0].axhline(0, color=COLORS['gray'], linewidth=0.5, linestyle='--')
axes[0].set_title('Complete series', fontsize=9, fontweight='bold')
axes[0].set_xlabel('Time')
axes[0].set_ylabel(r'$X_t$')

x_even = x[0::2]
axes[1].hist(x_even, bins=20, color=COLORS['green'], alpha=0.6, edgecolor='white', density=True, label='t even: N(0,1)')
xr = np.linspace(-4, 4, 100)
axes[1].plot(xr, stats.norm.pdf(xr, 0, 1), color=COLORS['green'], linewidth=1.5)
axes[1].set_title('t even: Symmetric', fontsize=9, fontweight='bold')
axes[1].set_xlabel(r'$X_t$')
axes[1].legend(fontsize=7, loc='upper right', frameon=False)

x_odd = x[1::2]
axes[2].hist(x_odd, bins=20, color=COLORS['red'], alpha=0.6, edgecolor='white', density=True, label=r't odd: scaled $\chi^2$')
xr2 = np.linspace(-3, 5, 100)
chi_pdf = stats.chi2.pdf(xr2 * np.sqrt(10) + 5, 5) * np.sqrt(10)
axes[2].plot(xr2, chi_pdf, color=COLORS['red'], linewidth=1.5)
axes[2].set_title('t odd: Asymmetric', fontsize=9, fontweight='bold')
axes[2].set_xlabel(r'$X_t$')
axes[2].legend(fontsize=7, loc='upper right', frameon=False)

fig.tight_layout(w_pad=2.0)
save_chart(fig, 'ch1_counterexample_stationarity')
plt.show()

In [None]:
# Chart: ch1_ergodicity
# Ergodicity illustration: time average converges to ensemble average
np.random.seed(42)
n = 500
mu_true = 2.0
num_realizations = 30

fig, axes = plt.subplots(1, 2, figsize=(8, 3.0))

y = generate_ar1(n, phi=0.5, sigma=1.0) + mu_true
cumulative_mean = np.cumsum(y) / np.arange(1, n+1)
axes[0].plot(cumulative_mean, color=COLORS['blue'], linewidth=1.0, label=r'$\bar{X}_T$ (time average)')
axes[0].axhline(mu_true, color=COLORS['red'], linewidth=1.2, linestyle='--', label=r'$\mu = 2.0$ (population mean)')
axes[0].set_title('Time average (one realization)', fontsize=9, fontweight='bold')
axes[0].set_xlabel('T (no. observations)')
axes[0].set_ylabel(r'$\bar{X}_T$')
axes[0].set_ylim(mu_true - 2, mu_true + 2)
axes[0].legend(fontsize=7, loc='upper right', frameon=False)

ensemble_means = []
for i in range(num_realizations):
    yi = generate_ar1(n, phi=0.5, sigma=1.0) + mu_true
    if i < 8:
        axes[1].plot(yi[:100], color=COLORS['gray'], alpha=0.2, linewidth=0.5)
    ensemble_means.append(yi.mean())

cumulative_ensemble = np.cumsum(ensemble_means) / np.arange(1, num_realizations+1)
ax2 = axes[1].twinx()
ax2.plot(range(1, num_realizations+1), cumulative_ensemble, color=COLORS['green'], linewidth=1.5,
         marker='o', markersize=3, label='Ensemble mean')
ax2.axhline(mu_true, color=COLORS['red'], linewidth=1.2, linestyle='--', label=r'$\mu = 2.0$')
ax2.set_ylabel('Ensemble mean', color=COLORS['green'])
ax2.spines['top'].set_visible(False)

axes[1].set_title('Ensemble mean (multiple realizations)', fontsize=9, fontweight='bold')
axes[1].set_xlabel('Time / No. realizations')
axes[1].set_ylabel(r'$X_t$')
ax2.legend(fontsize=7, loc='upper right', frameon=False)

fig.tight_layout(w_pad=2.0)
save_chart(fig, 'ch1_ergodicity')
plt.show()

In [None]:
# Chart: ch1_stationary_nonstationary_examples
# S&P 500 prices (nonstationary) vs log returns (stationary)

# Load S&P 500 data
if YF_AVAILABLE:
    try:
        df = yf.download('^GSPC', start='2020-01-01', end='2025-12-31', progress=False)
        close = df['Close'].squeeze().dropna()
        prices = close.values
        log_returns = np.diff(np.log(prices))
        dates = close.index
        print(f'S&P 500 loaded: {len(close)} observations')
    except Exception as e:
        print(f'Yahoo Finance failed: {e}, using synthetic data')
        YF_AVAILABLE = False

if not YF_AVAILABLE:
    np.random.seed(42)
    n_synth = 1250
    log_ret = np.random.normal(0.0003, 0.012, n_synth)
    prices = 3000 * np.exp(np.cumsum(log_ret))
    log_returns = log_ret[1:]
    dates = pd.date_range('2020-01-02', periods=n_synth, freq='B')

fig, axes = plt.subplots(1, 2, figsize=(8, 2.8))

axes[0].plot(dates[:len(prices)], prices, color=COLORS['blue'], linewidth=0.9)
axes[0].set_title('S&P 500 Prices (Non-stationary)', fontsize=9, fontweight='bold')
axes[0].set_xlabel('Date')
axes[0].set_ylabel('Price')
axes[0].tick_params(axis='x', rotation=30)

axes[1].plot(dates[1:len(log_returns)+1], log_returns, color=COLORS['red'], linewidth=0.5, alpha=0.8)
axes[1].axhline(0, color=COLORS['gray'], linewidth=0.5, linestyle='--')
axes[1].set_title('S&P 500 Log returns (Stationary)', fontsize=9, fontweight='bold')
axes[1].set_xlabel('Date')
axes[1].set_ylabel('Return')
axes[1].tick_params(axis='x', rotation=30)

fig.tight_layout(rect=[0, 0.05, 1, 1])
save_chart(fig, 'ch1_stationary_nonstationary_examples')
plt.show()