# TSA Chapter 1: Wold Decomposition, Lag Operator and Differencing

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/QuantLet/TSA/blob/main/TSA_Ch1/TSA_ch1_operators/TSA_ch1_operators.ipynb)

This notebook demonstrates:
- Wold decomposition: stochastic (MA infinity) + deterministic
- Lag operator and differencing operator
- Differencing for stationarity (regular and seasonal)
- Transformation sequence: raw prices -> log -> returns

In [None]:
!pip install yfinance matplotlib numpy scipy statsmodels pandas -q

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats

try:
    import yfinance as yf
    YF_AVAILABLE = True
except ImportError:
    YF_AVAILABLE = False

In [None]:
# Style configuration
COLORS = {
    'blue': '#1A3A6E',
    'red': '#DC3545',
    'green': '#2E7D32',
    'orange': '#E67E22',
    'gray': '#666666',
    'purple': '#8E44AD',
    'light_blue': '#4A7ABE',
    'light_red': '#E8808A',
}

plt.rcParams.update({
    'axes.facecolor': 'none',
    'figure.facecolor': 'none',
    'savefig.transparent': True,
    'axes.spines.top': False,
    'axes.spines.right': False,
    'axes.grid': False,
    'font.size': 9,
    'axes.titlesize': 10,
    'axes.labelsize': 9,
    'xtick.labelsize': 8,
    'ytick.labelsize': 8,
    'legend.fontsize': 8,
    'figure.dpi': 150,
    'lines.linewidth': 1.2,
    'axes.edgecolor': '#333333',
    'axes.linewidth': 0.8,
})

np.random.seed(42)

def save_chart(fig, name):
    fig.savefig(f'{name}.pdf', bbox_inches='tight', transparent=True, dpi=150)
    fig.savefig(f'{name}.png', bbox_inches='tight', transparent=True, dpi=150)
    try:
        charts_path = os.path.join('..', '..', '..', 'charts', name)
        fig.savefig(f'{charts_path}.pdf', bbox_inches='tight', transparent=True, dpi=150)
        fig.savefig(f'{charts_path}.png', bbox_inches='tight', transparent=True, dpi=150)
    except Exception:
        pass
    print(f'Saved: {name}.pdf + .png')

def add_legend_below(ax, ncol=3):
    ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.18), ncol=ncol, frameon=False)

def generate_ar1(n, phi, sigma=1.0):
    x = np.zeros(n)
    eps = np.random.normal(0, sigma, n)
    for t in range(1, n):
        x[t] = phi * x[t-1] + eps[t]
    return x

def generate_random_walk(n, sigma=1.0, drift=0.0):
    eps = np.random.normal(0, sigma, n)
    return np.cumsum(eps) + drift * np.arange(n)

In [None]:
# Chart: ch1_wold_decomposition
# Wold decomposition: stochastic (MA infinity) + deterministic
np.random.seed(42)
n = 200

# Wold coefficients (exponentially decaying)
psi = np.array([0.7**j for j in range(50)])

# Generate process
eps = np.random.normal(0, 1, n + 50)
stochastic = np.zeros(n)
for t in range(n):
    stochastic[t] = sum(psi[j] * eps[t + 50 - j] for j in range(min(t+1, 50)))

deterministic = 0.5 * np.sin(2 * np.pi * np.arange(n) / 50) + 1.0
total = stochastic + deterministic

fig, axes = plt.subplots(2, 2, figsize=(8, 5.0))

axes[0, 0].bar(range(20), psi[:20], color=COLORS['blue'], alpha=0.7, width=0.6)
axes[0, 0].set_title(r'Wold coefficients $\psi_j$', fontsize=9, fontweight='bold')
axes[0, 0].set_xlabel('j')
axes[0, 0].set_ylabel(r'$\psi_j$')

axes[0, 1].plot(total, color=COLORS['blue'], linewidth=0.8)
axes[0, 1].set_title(r'$X_t = \sum \psi_j \varepsilon_{t-j} + \eta_t$', fontsize=9, fontweight='bold')
axes[0, 1].set_xlabel('Time')

axes[1, 0].plot(stochastic, color=COLORS['red'], linewidth=0.8)
axes[1, 0].axhline(0, color=COLORS['gray'], linewidth=0.5, linestyle='--')
axes[1, 0].set_title(r'Stochastic: $\sum \psi_j \varepsilon_{t-j}$', fontsize=9, fontweight='bold')
axes[1, 0].set_xlabel('Time')

axes[1, 1].plot(deterministic, color=COLORS['green'], linewidth=1.2)
axes[1, 1].set_title(r'Deterministic: $\eta_t$', fontsize=9, fontweight='bold')
axes[1, 1].set_xlabel('Time')

fig.tight_layout(h_pad=2.0)
save_chart(fig, 'ch1_wold_decomposition')
plt.show()

In [None]:
# Chart: ch1_def_lag_operator
# Lag operator illustration
np.random.seed(42)
n = 100
y = generate_ar1(n, phi=0.8, sigma=1.0) + 5.0
t = np.arange(n)

fig, axes = plt.subplots(1, 3, figsize=(9, 3.0))

axes[0].plot(t, y, color=COLORS['blue'], linewidth=1.0, label=r'$X_t$')
axes[0].plot(t[1:], y[:-1], color=COLORS['red'], linewidth=1.0, linestyle='--', label=r'$LX_t = X_{t-1}$')
axes[0].set_title(r'$LX_t = X_{t-1}$', fontsize=9, fontweight='bold')
axes[0].set_xlabel('Time')
axes[0].set_ylabel('Value')
axes[0].legend(fontsize=7, loc='upper right', frameon=False)

diff1 = np.diff(y)
axes[1].plot(diff1, color=COLORS['green'], linewidth=0.8)
axes[1].axhline(0, color=COLORS['gray'], linewidth=0.5, linestyle='--')
axes[1].set_title(r'$\Delta X_t = (1-L)X_t$', fontsize=9, fontweight='bold')
axes[1].set_xlabel('Time')
axes[1].set_ylabel(r'$\Delta X_t$')

diff2 = np.diff(diff1)
axes[2].plot(diff2, color=COLORS['purple'], linewidth=0.8)
axes[2].axhline(0, color=COLORS['gray'], linewidth=0.5, linestyle='--')
axes[2].set_title(r'$\Delta^2 X_t = (1-L)^2 X_t$', fontsize=9, fontweight='bold')
axes[2].set_xlabel('Time')
axes[2].set_ylabel(r'$\Delta^2 X_t$')

fig.tight_layout(w_pad=2.0)
save_chart(fig, 'ch1_def_lag_operator')
plt.show()

In [None]:
# Chart: ch1_differencing
# Differencing for stationarity
np.random.seed(42)
n = 200
rw = generate_random_walk(n, drift=0.1)

fig, axes = plt.subplots(1, 2, figsize=(8, 2.8))

axes[0].plot(rw, color=COLORS['blue'], linewidth=0.8, label=r'$X_t$ (random walk)')
axes[0].set_title('Original series (non-stationary)', fontsize=9, fontweight='bold')
axes[0].set_xlabel('Time')
axes[0].set_ylabel(r'$X_t$')
axes[0].legend(fontsize=7, loc='upper right', frameon=False)

diff = np.diff(rw)
axes[1].plot(diff, color=COLORS['green'], linewidth=0.5, label=r'$\Delta X_t = X_t - X_{t-1}$')
axes[1].axhline(diff.mean(), color=COLORS['red'], linewidth=0.8, linestyle='--', label=f'Mean = {diff.mean():.2f}')
axes[1].set_title('After differencing (stationary)', fontsize=9, fontweight='bold')
axes[1].set_xlabel('Time')
axes[1].set_ylabel(r'$\Delta X_t$')
axes[1].legend(fontsize=7, loc='upper right', frameon=False)

fig.tight_layout(w_pad=2.0)
save_chart(fig, 'ch1_differencing')
plt.show()

In [None]:
# Chart: differencing_effect
# S&P 500 prices vs log returns (differencing effect)

# Load S&P 500 data
SP500_LOADED = False
if YF_AVAILABLE:
    try:
        df = yf.download('^GSPC', start='2020-01-01', end='2025-12-31', progress=False)
        close = df['Close'].squeeze().dropna()
        prices = close.values
        log_ret = np.diff(np.log(prices))
        dates = close.index
        SP500_LOADED = True
        print(f'S&P 500 loaded: {len(close)} observations')
    except Exception as e:
        print(f'Yahoo Finance failed: {e}')

if not SP500_LOADED:
    np.random.seed(42)
    n_synth = 1250
    lr = np.random.normal(0.0003, 0.012, n_synth)
    prices = 3000 * np.exp(np.cumsum(lr))
    log_ret = lr[1:]
    dates = pd.date_range('2020-01-02', periods=n_synth, freq='B')

fig, axes = plt.subplots(2, 1, figsize=(7, 3.5))

axes[0].plot(dates[:len(prices)], prices, color=COLORS['blue'], linewidth=0.8)
axes[0].set_title('S&P 500: Prices (non-stationary, I(1))', fontsize=9, fontweight='bold')
axes[0].set_ylabel('Price')
axes[0].tick_params(axis='x', rotation=30)

axes[1].plot(dates[1:len(log_ret)+1], log_ret, color=COLORS['red'], linewidth=0.4, alpha=0.8)
axes[1].axhline(0, color=COLORS['gray'], linewidth=0.5, linestyle='--')
axes[1].set_title(r'Log returns: $r_t = \ln P_t - \ln P_{t-1}$ (stationary)', fontsize=9, fontweight='bold')
axes[1].set_xlabel('Date')
axes[1].set_ylabel('Return')
axes[1].tick_params(axis='x', rotation=30)

fig.tight_layout(rect=[0, 0.05, 1, 1])
save_chart(fig, 'differencing_effect')
plt.show()

In [None]:
# Chart: ch1_transform_sequence_ro
# Transformation sequence: raw -> log -> diff
log_prices = np.log(prices)
returns = log_ret

fig, axes = plt.subplots(1, 3, figsize=(8, 2.5))

axes[0].plot(dates[:len(prices)], prices, color=COLORS['blue'], linewidth=0.8, label=r'$P_t$')
axes[0].set_title(r'S&P 500 $P_t$', fontsize=9, fontweight='bold')
axes[0].set_xlabel('Date')
axes[0].set_ylabel('Price')
axes[0].tick_params(axis='x', rotation=30)
axes[0].legend(fontsize=7, loc='upper right', frameon=False)

axes[1].plot(dates[:len(log_prices)], log_prices, color=COLORS['green'], linewidth=0.8, label=r'$\ln(P_t)$')
axes[1].set_title(r'$\ln(P_t)$', fontsize=9, fontweight='bold')
axes[1].set_xlabel('Date')
axes[1].set_ylabel('Log price')
axes[1].tick_params(axis='x', rotation=30)
axes[1].legend(fontsize=7, loc='upper right', frameon=False)

axes[2].plot(dates[1:len(returns)+1], returns, color=COLORS['red'], linewidth=0.4, alpha=0.8, label=r'$r_t = \Delta \ln(P_t)$')
axes[2].axhline(0, color=COLORS['gray'], linewidth=0.5, linestyle='--')
axes[2].set_title(r'Returns $r_t$', fontsize=9, fontweight='bold')
axes[2].set_xlabel('Date')
axes[2].set_ylabel('Return')
axes[2].tick_params(axis='x', rotation=30)
axes[2].legend(fontsize=7, loc='upper right', frameon=False)

fig.tight_layout(w_pad=2.0)
save_chart(fig, 'ch1_transform_sequence_ro')
plt.show()