# TSA Chapter 0: Detrending Methods

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/QuantLet/TSA/blob/main/TSA_Ch0/TSA_ch0_detrending/TSA_ch0_detrending.ipynb)

This notebook demonstrates detrending methods:
- Differencing, linear/polynomial detrending, LOESS, moving average detrending
- HP filter with lambda selection
- Business cycle extraction
- Deterministic vs stochastic trends
- Trend comparison side by side

In [None]:
!pip install matplotlib numpy scipy pandas -q

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
from scipy.ndimage import uniform_filter1d
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Style configuration
COLORS = {
    'blue': '#1A3A6E',
    'red': '#DC3545',
    'green': '#2E7D32',
    'orange': '#E67E22',
    'gray': '#666666',
    'purple': '#8E44AD',
}

plt.rcParams.update({
    'axes.facecolor': 'none',
    'figure.facecolor': 'none',
    'savefig.transparent': True,
    'axes.spines.top': False,
    'axes.spines.right': False,
    'font.size': 10,
    'axes.titlesize': 11,
    'axes.labelsize': 10,
    'legend.fontsize': 8,
    'xtick.labelsize': 8,
    'ytick.labelsize': 8,
    'lines.linewidth': 1.5,
    'axes.prop_cycle': plt.cycler('color', list(COLORS.values())),
    'axes.edgecolor': '#333333',
    'axes.linewidth': 0.8,
    'axes.grid': False,
})

np.random.seed(42)

CHARTS_DIR = os.path.join(os.path.dirname(os.path.abspath('.')), '..', '..', 'charts')

def save_chart(fig, name):
    fig.savefig(f'{name}.pdf', bbox_inches='tight', transparent=True, dpi=150)
    fig.savefig(f'{name}.png', bbox_inches='tight', transparent=True, dpi=150)
    # Also save to main charts directory for the lecture
    try:
        charts_path = os.path.join(CHARTS_DIR, name)
        fig.savefig(f'{charts_path}.pdf', bbox_inches='tight', transparent=True, dpi=150)
        fig.savefig(f'{charts_path}.png', bbox_inches='tight', transparent=True, dpi=150)
    except Exception:
        pass  # Skip if running on Colab without the charts dir
    print(f'Saved: {name}.pdf + .png')

def add_legend_below(ax, ncol=3):
    ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.12), ncol=ncol, frameon=False)

def simple_hp(y, lam):
    """HP filter implementation."""
    n = len(y)
    D = np.zeros((n-2, n))
    for i in range(n-2):
        D[i, i] = 1
        D[i, i+1] = -2
        D[i, i+2] = 1
    I = np.eye(n)
    trend = np.linalg.solve(I + lam * D.T @ D, y)
    return trend

In [None]:
# Chart: detrending_methods
# Six detrending approaches on real US GDP data
np.random.seed(42)

try:
    gdp_dt_df = pd.read_csv('https://fred.stlouisfed.org/graph/fredgraph.csv?id=GDPC1', parse_dates=['DATE'])
    gdp_dt_df['GDPC1'] = pd.to_numeric(gdp_dt_df['GDPC1'], errors='coerce')
    gdp_dt_df = gdp_dt_df.dropna().tail(100)
    t = np.arange(len(gdp_dt_df))
    y = gdp_dt_df['GDPC1'].values.astype(float)
    print(f'US Real GDP loaded: {len(y)} observations')
except Exception:
    t = np.arange(100)
    trend = 50 + 0.5 * t + 0.003 * t**2
    y = trend + np.random.randn(100) * 4

fig, axes = plt.subplots(2, 3, figsize=(10, 5.5))

# Original
axes[0, 0].plot(t, y, color=COLORS['blue'], linewidth=1.0)
axes[0, 0].set_title('Original (US Real GDP)', fontweight='bold', fontsize=9)

# Differencing
axes[0, 1].plot(t[1:], np.diff(y), color=COLORS['green'], linewidth=0.8)
axes[0, 1].axhline(y=0, color='black', linewidth=0.5)
axes[0, 1].set_title('Differencing ($\\Delta X_t$)', fontweight='bold', fontsize=9)

# Linear detrend
coef = np.polyfit(t, y, 1)
linear_trend = np.polyval(coef, t)
axes[0, 2].plot(t, y - linear_trend, color=COLORS['red'], linewidth=0.8)
axes[0, 2].axhline(y=0, color='black', linewidth=0.5)
axes[0, 2].set_title('Linear Detrend', fontweight='bold', fontsize=9)

# Polynomial detrend
coef3 = np.polyfit(t, y, 3)
poly_trend = np.polyval(coef3, t)
axes[1, 0].plot(t, y - poly_trend, color=COLORS['orange'], linewidth=0.8)
axes[1, 0].axhline(y=0, color='black', linewidth=0.5)
axes[1, 0].set_title('Polynomial Detrend', fontweight='bold', fontsize=9)
axes[1, 0].set_xlabel('Time')

# Moving average detrend
w = 15
kernel = np.ones(w) / w
ma = np.convolve(y, kernel, mode='valid')
offset = w // 2
detrended_ma = y[offset:offset+len(ma)] - ma
axes[1, 1].plot(t[offset:offset+len(ma)], detrended_ma, color=COLORS['purple'], linewidth=0.8)
axes[1, 1].axhline(y=0, color='black', linewidth=0.5)
axes[1, 1].set_title('MA(15) Detrend', fontweight='bold', fontsize=9)
axes[1, 1].set_xlabel('Time')

# LOESS approximation
loess_approx = uniform_filter1d(y.astype(float), size=15)
axes[1, 2].plot(t, y - loess_approx, color=COLORS['gray'], linewidth=0.8)
axes[1, 2].axhline(y=0, color='black', linewidth=0.5)
axes[1, 2].set_title('LOESS Detrend', fontweight='bold', fontsize=9)
axes[1, 2].set_xlabel('Time')

fig.suptitle('Detrending Methods', fontweight='bold', fontsize=12, y=1.02)
fig.tight_layout()
save_chart(fig, 'detrending_methods')
plt.show()

In [None]:
# Chart: trend_estimation_comparison
# MA, polynomial, HP filter comparison on real US GDP
np.random.seed(42)

try:
    gdp_te_df = pd.read_csv('https://fred.stlouisfed.org/graph/fredgraph.csv?id=GDPC1', parse_dates=['DATE'])
    gdp_te_df['GDPC1'] = pd.to_numeric(gdp_te_df['GDPC1'], errors='coerce')
    gdp_te_df = gdp_te_df.dropna().tail(100)
    t = np.arange(len(gdp_te_df))
    y = gdp_te_df['GDPC1'].values.astype(float)
except Exception:
    t = np.arange(100)
    trend_true = 50 + 0.4 * t + 0.002 * t**2
    y = trend_true + 10 * np.sin(2*np.pi*t/12) + np.random.randn(100) * 3

fig, ax = plt.subplots(figsize=(8, 4.5))
ax.plot(t, y, color=COLORS['gray'], linewidth=0.6, alpha=0.4, label='US Real GDP')

# Moving average
w = 12
kernel = np.ones(w) / w
ma = np.convolve(y, kernel, mode='valid')
offset = w // 2
ax.plot(t[offset:offset+len(ma)], ma, color=COLORS['blue'], linewidth=1.5, label='MA(12)')

# Polynomial regression
coef = np.polyfit(t, y, 3)
ax.plot(t, np.polyval(coef, t), color=COLORS['green'], linewidth=1.5, linestyle='--', label='Polynomial (degree 3)')

# HP filter
hp_trend = simple_hp(y, 1600)
ax.plot(t, hp_trend, color=COLORS['red'], linewidth=1.5, linestyle='-.', label='HP filter ($\\lambda=1600$)')

ax.set_title('Trend Estimation: Method Comparison', fontweight='bold')
ax.set_xlabel('Time ($t$)')
ax.set_ylabel('$\\hat{T}_t$')
add_legend_below(ax, ncol=4)

fig.tight_layout()
save_chart(fig, 'trend_estimation_comparison')
plt.show()

In [None]:
# Chart: ch1_hp_filter_lambda
# HP filter with different lambda values on real US GDP
np.random.seed(42)

try:
    gdp_hp_df = pd.read_csv('https://fred.stlouisfed.org/graph/fredgraph.csv?id=GDPC1', parse_dates=['DATE'])
    gdp_hp_df['GDPC1'] = pd.to_numeric(gdp_hp_df['GDPC1'], errors='coerce')
    gdp_hp_df = gdp_hp_df.dropna().tail(100)
    t = np.arange(len(gdp_hp_df))
    y = gdp_hp_df['GDPC1'].values.astype(float)
except Exception:
    t = np.arange(100)
    y = 50 + 0.3 * t + 0.002 * t**2 + 8 * np.sin(2*np.pi*t/20) + np.random.randn(100) * 3

fig, axes = plt.subplots(1, 3, figsize=(10, 3.5))

for ax, lam, title in zip(axes, [10, 1600, 100000],
                            ['$\\lambda = 10$ (flexible)', '$\\lambda = 1600$ (standard)',
                             '$\\lambda = 100000$ (smooth)']):
    hp_trend = simple_hp(y, lam)
    ax.plot(t, y, color=COLORS['gray'], linewidth=0.6, alpha=0.4, label='US Real GDP')
    ax.plot(t, hp_trend, color=COLORS['red'], linewidth=2, label='HP trend')
    ax.set_title(title, fontweight='bold', fontsize=9)
    ax.set_xlabel('Time')
    add_legend_below(ax, ncol=2)

axes[0].set_ylabel('$X_t$')

fig.suptitle('HP Filter: Effect of Parameter $\\lambda$', fontweight='bold', fontsize=11, y=1.02)
fig.tight_layout(rect=[0, 0.02, 1, 1])
save_chart(fig, 'ch1_hp_filter_lambda')
plt.show()

In [None]:
# Chart: ch1_hp_filter_cycle
# Business cycle extraction using HP filter on real US GDP
np.random.seed(42)

try:
    gdp_bc_df = pd.read_csv('https://fred.stlouisfed.org/graph/fredgraph.csv?id=GDPC1', parse_dates=['DATE'])
    gdp_bc_df['GDPC1'] = pd.to_numeric(gdp_bc_df['GDPC1'], errors='coerce')
    gdp_bc_df = gdp_bc_df.dropna().tail(120)
    t = np.arange(len(gdp_bc_df))
    y = gdp_bc_df['GDPC1'].values.astype(float)
    data_label = 'US Real GDP'
except Exception:
    t = np.arange(120)
    trend_true = 100 + 0.3 * t + 0.001 * t**2
    cycle_true = 5 * np.sin(2*np.pi*t/40) + 3 * np.sin(2*np.pi*t/20)
    y = trend_true + cycle_true + np.random.randn(120) * 1.5
    data_label = 'GDP (data)'

fig, axes = plt.subplots(2, 1, figsize=(8, 5), sharex=True)

hp_trend = simple_hp(y, 1600)
hp_cycle = y - hp_trend

axes[0].plot(t, y, color=COLORS['blue'], linewidth=1.0, alpha=0.7, label=data_label)
axes[0].plot(t, hp_trend, color=COLORS['red'], linewidth=2, label='HP trend ($\\lambda=1600$)')
axes[0].set_title('US Real GDP: Data and HP Trend', fontweight='bold')
axes[0].set_ylabel('Value')
add_legend_below(axes[0], ncol=2)

axes[1].plot(t, hp_cycle, color=COLORS['orange'], linewidth=1.2)
axes[1].axhline(y=0, color='black', linewidth=0.5)
axes[1].fill_between(t, 0, hp_cycle, where=hp_cycle > 0, alpha=0.2, color=COLORS['green'], label='Expansion')
axes[1].fill_between(t, 0, hp_cycle, where=hp_cycle < 0, alpha=0.2, color=COLORS['red'], label='Recession')
axes[1].set_title('Extracted Business Cycle (Cyclic Component)', fontweight='bold')
axes[1].set_xlabel('Quarters')
axes[1].set_ylabel('Cyclic Deviation')
add_legend_below(axes[1], ncol=2)

fig.tight_layout(rect=[0, 0.02, 1, 1])
save_chart(fig, 'ch1_hp_filter_cycle')
plt.show()

In [None]:
# Chart: deterministic_trend_example
# Deterministic trend with regression residuals and ACF
np.random.seed(42)
fig, axes = plt.subplots(1, 3, figsize=(10, 3.5))

t = np.arange(100)
eps = np.random.randn(100) * 3
y = 20 + 0.5 * t + eps
trend_fit = 20 + 0.5 * t

axes[0].plot(t, y, color=COLORS['blue'], linewidth=1.0, label='$X_t$')
axes[0].plot(t, trend_fit, color=COLORS['red'], linewidth=1.5, linestyle='--', label='$\\hat{T}_t$')
axes[0].set_title('Series with Deterministic Trend', fontweight='bold', fontsize=9)
axes[0].set_xlabel('Time')
axes[0].set_ylabel('$X_t$')
add_legend_below(axes[0], ncol=2)

# Residuals
resid = y - trend_fit
axes[1].plot(t, resid, color=COLORS['green'], linewidth=0.8)
axes[1].axhline(y=0, color='black', linewidth=0.5)
axes[1].set_title('Residuals (Stationary)', fontweight='bold', fontsize=9)
axes[1].set_xlabel('Time')
axes[1].set_ylabel('$e_t$')

# ACF of residuals
n = len(resid)
r_centered = resid - np.mean(resid)
acf_vals = []
for k in range(16):
    if k == 0:
        acf_vals.append(1.0)
    else:
        acf_vals.append(np.sum(r_centered[:n-k] * r_centered[k:]) / np.sum(r_centered**2))

axes[2].bar(range(16), acf_vals, color=COLORS['blue'], alpha=0.7, width=0.6)
ci = 1.96 / np.sqrt(n)
axes[2].axhline(y=ci, color=COLORS['red'], linestyle='--', linewidth=0.8)
axes[2].axhline(y=-ci, color=COLORS['red'], linestyle='--', linewidth=0.8)
axes[2].axhline(y=0, color='black', linewidth=0.5)
axes[2].set_title('ACF of Residuals', fontweight='bold', fontsize=9)
axes[2].set_xlabel('Lag')
axes[2].set_ylabel('$\\rho_k$')

fig.suptitle('Deterministic Trend: $X_t = \\beta_0 + \\beta_1 t + \\varepsilon_t$',
             fontweight='bold', fontsize=11, y=1.02)
fig.tight_layout(rect=[0, 0.02, 1, 1])
save_chart(fig, 'deterministic_trend_example')
plt.show()

In [None]:
# Chart: stochastic_trend_example
# Random walk with differencing and ACF of differences
np.random.seed(42)
fig, axes = plt.subplots(1, 3, figsize=(10, 3.5))

t = np.arange(100)
eps = np.random.randn(100) * 2
y = np.cumsum(eps) + 50  # Random walk

axes[0].plot(t, y, color=COLORS['blue'], linewidth=1.2)
axes[0].set_title('Random Walk ($X_t = X_{t-1} + \\varepsilon_t$)', fontweight='bold', fontsize=9)
axes[0].set_xlabel('Time')
axes[0].set_ylabel('$X_t$')

# Differences
diff_y = np.diff(y)
axes[1].plot(t[1:], diff_y, color=COLORS['green'], linewidth=0.8)
axes[1].axhline(y=0, color='black', linewidth=0.5)
axes[1].set_title('Differences $\\Delta X_t$ (Stationary)', fontweight='bold', fontsize=9)
axes[1].set_xlabel('Time')
axes[1].set_ylabel('$\\Delta X_t$')

# ACF of differences
n = len(diff_y)
r_centered = diff_y - np.mean(diff_y)
acf_vals = []
for k in range(16):
    if k == 0:
        acf_vals.append(1.0)
    else:
        acf_vals.append(np.sum(r_centered[:n-k] * r_centered[k:]) / np.sum(r_centered**2))

axes[2].bar(range(16), acf_vals, color=COLORS['blue'], alpha=0.7, width=0.6)
ci = 1.96 / np.sqrt(n)
axes[2].axhline(y=ci, color=COLORS['red'], linestyle='--', linewidth=0.8)
axes[2].axhline(y=-ci, color=COLORS['red'], linestyle='--', linewidth=0.8)
axes[2].axhline(y=0, color='black', linewidth=0.5)
axes[2].set_title('ACF of Differences', fontweight='bold', fontsize=9)
axes[2].set_xlabel('Lag')
axes[2].set_ylabel('$\\rho_k$')

fig.suptitle('Stochastic Trend: Removal by Differencing',
             fontweight='bold', fontsize=11, y=1.02)
fig.tight_layout(rect=[0, 0.02, 1, 1])
save_chart(fig, 'stochastic_trend_example')
plt.show()

In [None]:
# Chart: trend_comparison_sidebyside
# Deterministic vs stochastic trend comparison
np.random.seed(42)
fig, axes = plt.subplots(2, 2, figsize=(10, 5.5))

t = np.arange(100)

# Deterministic trend
eps_d = np.random.randn(100) * 3
y_det = 20 + 0.5 * t + eps_d

axes[0, 0].plot(t, y_det, color=COLORS['blue'], linewidth=1.2, label='Data')
axes[0, 0].plot(t, 20 + 0.5 * t, color=COLORS['red'], linewidth=1.5, linestyle='--', label='Trend')
axes[0, 0].set_title('Deterministic Trend', fontweight='bold', fontsize=10)
axes[0, 0].set_ylabel('$X_t$')
add_legend_below(axes[0, 0], ncol=2)

# Stochastic trend
eps_s = np.random.randn(100) * 2
y_sto = np.cumsum(eps_s) + 50

axes[0, 1].plot(t, y_sto, color=COLORS['blue'], linewidth=1.2, label='Data')
axes[0, 1].set_title('Stochastic Trend (Random Walk)', fontweight='bold', fontsize=10)
add_legend_below(axes[0, 1], ncol=1)

# Detrended (regression)
resid_det = y_det - (20 + 0.5 * t)
axes[1, 0].plot(t, resid_det, color=COLORS['green'], linewidth=0.8)
axes[1, 0].axhline(y=0, color='black', linewidth=0.5)
axes[1, 0].set_title('Detrend by Regression (Stationary)', fontweight='bold', fontsize=10)
axes[1, 0].set_xlabel('Time')
axes[1, 0].set_ylabel('$X_t - \\hat{T}_t$')

# Differenced
diff_sto = np.diff(y_sto)
axes[1, 1].plot(t[1:], diff_sto, color=COLORS['green'], linewidth=0.8)
axes[1, 1].axhline(y=0, color='black', linewidth=0.5)
axes[1, 1].set_title('Detrend by Differencing (Stationary)', fontweight='bold', fontsize=10)
axes[1, 1].set_xlabel('Time')
axes[1, 1].set_ylabel('$\\Delta X_t$')

fig.tight_layout(rect=[0, 0.02, 1, 1])
save_chart(fig, 'trend_comparison_sidebyside')
plt.show()