# TSA_ch8_hurst_estimation

## Hurst Exponent Estimation for Real Financial Data

**Data**: EUR/RON, S&P 500, Bitcoin

**Author**: Daniel Traian PELE

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
import yfinance as yf
import warnings
warnings.filterwarnings('ignore')

plt.style.use('seaborn-v0_8-whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

## 1. Hurst Estimation Methods

In [None]:
def hurst_rs(series, min_window=10, max_window=None):
    """
    Estimate Hurst exponent using R/S (Rescaled Range) analysis.
    
    H < 0.5: Anti-persistent (mean-reverting)
    H = 0.5: Random walk
    H > 0.5: Persistent (trending)
    """
    n = len(series)
    if max_window is None:
        max_window = n // 4
    
    n_values = np.unique(np.logspace(
        np.log10(min_window), np.log10(max_window), num=20
    ).astype(int))
    
    rs_values = []
    
    for window in n_values:
        n_windows = n // window
        rs_list = []
        
        for i in range(n_windows):
            subseries = series[i * window:(i + 1) * window]
            mean = np.mean(subseries)
            std = np.std(subseries, ddof=1)
            
            if std > 0:
                Z = np.cumsum(subseries - mean)
                R = np.max(Z) - np.min(Z)
                rs_list.append(R / std)
        
        if rs_list:
            rs_values.append(np.mean(rs_list))
    
    log_n = np.log(n_values[:len(rs_values)])
    log_rs = np.log(rs_values)
    
    slope, intercept, r_value, _, _ = stats.linregress(log_n, log_rs)
    
    return slope, n_values[:len(rs_values)], np.array(rs_values), r_value**2

In [None]:
def gph_estimator(series, m=None):
    """
    GPH (Geweke-Porter-Hudak) estimator for fractional differencing parameter d.
    
    Relationship: d = H - 0.5
    """
    n = len(series)
    if m is None:
        m = int(np.sqrt(n))
    
    fft_vals = np.fft.fft(series - np.mean(series))
    periodogram = (np.abs(fft_vals) ** 2) / n
    
    freqs = 2 * np.pi * np.arange(1, m + 1) / n
    y = np.log(periodogram[1:m + 1])
    x = np.log(4 * np.sin(freqs / 2) ** 2)
    
    slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
    d = -slope
    
    return d, std_err, d + 0.5  # d, se, H

## 2. Load Real Financial Data

In [None]:
# Download multiple assets
tickers = {
    'EUR/RON': 'EURRON=X',
    'S&P 500': '^GSPC',
    'Bitcoin': 'BTC-USD',
    'Gold': 'GC=F'
}

data = {}
for name, ticker in tickers.items():
    try:
        df = yf.download(ticker, start='2015-01-01', end='2024-12-31', progress=False)
        prices = df['Close'].dropna()
        returns = np.log(prices / prices.shift(1)).dropna() * 100
        data[name] = {
            'prices': prices,
            'returns': returns,
            'squared_returns': returns ** 2,
            'abs_returns': np.abs(returns)
        }
        print(f"{name}: {len(returns)} observations")
    except Exception as e:
        print(f"{name}: Failed to download - {e}")

## 3. Hurst Exponent Analysis

In [None]:
print("="*80)
print("HURST EXPONENT ANALYSIS - REAL FINANCIAL DATA")
print("="*80)
print(f"\n{'Asset':<12} {'Series':<18} {'H (R/S)':<10} {'d (GPH)':<10} {'H (GPH)':<10} {'Memory'}")
print("-"*80)

results = []

for name, d in data.items():
    for series_name, series in [('Returns', d['returns']), 
                                 ('Squared Returns', d['squared_returns']),
                                 ('|Returns|', d['abs_returns'])]:
        # R/S estimate
        H_rs, _, _, _ = hurst_rs(series.values)
        
        # GPH estimate
        d_gph, se_gph, H_gph = gph_estimator(series.values)
        
        # Interpret
        if H_rs < 0.45:
            memory = 'Anti-persistent'
        elif H_rs > 0.55:
            memory = 'LONG MEMORY'
        else:
            memory = 'Random walk'
        
        print(f"{name:<12} {series_name:<18} {H_rs:<10.3f} {d_gph:<10.3f} {H_gph:<10.3f} {memory}")
        
        results.append({
            'Asset': name, 'Series': series_name,
            'H_RS': H_rs, 'd_GPH': d_gph, 'H_GPH': H_gph
        })
    print()

## 4. Visualization

In [None]:
# R/S Analysis for EUR/RON
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# Returns
series = data['EUR/RON']['returns'].values
H, n_vals, rs_vals, r2 = hurst_rs(series)

ax = axes[0, 0]
ax.loglog(n_vals, rs_vals, 'bo-', markersize=6, label='Empirical R/S')
ax.loglog(n_vals, n_vals**H * (rs_vals[0] / n_vals[0]**H), 'r--', 
          linewidth=2, label=f'Fit: H = {H:.3f}')
ax.loglog(n_vals, n_vals**0.5 * (rs_vals[0] / n_vals[0]**0.5), 'g:', 
          linewidth=2, label='H = 0.5 (random walk)')
ax.set_xlabel('Window size n')
ax.set_ylabel('R/S(n)')
ax.set_title('EUR/RON Returns: R/S Analysis', fontweight='bold')
ax.legend()
ax.grid(True, alpha=0.3)

# Squared Returns
series = data['EUR/RON']['squared_returns'].values
H, n_vals, rs_vals, r2 = hurst_rs(series)

ax = axes[0, 1]
ax.loglog(n_vals, rs_vals, 'ro-', markersize=6, label='Empirical R/S')
ax.loglog(n_vals, n_vals**H * (rs_vals[0] / n_vals[0]**H), 'b--', 
          linewidth=2, label=f'Fit: H = {H:.3f}')
ax.loglog(n_vals, n_vals**0.5 * (rs_vals[0] / n_vals[0]**0.5), 'g:', 
          linewidth=2, label='H = 0.5 (random walk)')
ax.set_xlabel('Window size n')
ax.set_ylabel('R/S(n)')
ax.set_title('EUR/RON Squared Returns: R/S Analysis (LONG MEMORY)', fontweight='bold')
ax.legend()
ax.grid(True, alpha=0.3)

# Bar chart comparison
ax = axes[1, 0]
results_df = pd.DataFrame(results)
returns_h = results_df[results_df['Series'] == 'Returns']['H_RS'].values
squared_h = results_df[results_df['Series'] == 'Squared Returns']['H_RS'].values
assets = results_df[results_df['Series'] == 'Returns']['Asset'].values

x = np.arange(len(assets))
width = 0.35
ax.bar(x - width/2, returns_h, width, label='Returns', color='steelblue')
ax.bar(x + width/2, squared_h, width, label='Squared Returns', color='darkred')
ax.axhline(y=0.5, color='green', linestyle='--', linewidth=2, label='H=0.5')
ax.set_ylabel('Hurst Exponent')
ax.set_title('Hurst Exponent: Returns vs Volatility', fontweight='bold')
ax.set_xticks(x)
ax.set_xticklabels(assets)
ax.legend()
ax.grid(True, alpha=0.3, axis='y')

# Interpretation guide
ax = axes[1, 1]
ax.fill_between([0, 0.5], 0, 1, alpha=0.3, color='red', label='Anti-persistent')
ax.fill_between([0.5, 1], 0, 1, alpha=0.3, color='blue', label='Persistent')
ax.axvline(x=0.5, color='green', linestyle='--', linewidth=3, label='Random walk')

# Plot actual H values
for i, (asset, h_ret, h_sq) in enumerate(zip(assets, returns_h, squared_h)):
    ax.scatter([h_ret], [0.3 + i*0.15], marker='o', s=100, color='steelblue')
    ax.scatter([h_sq], [0.3 + i*0.15], marker='s', s=100, color='darkred')
    ax.text(0.02, 0.3 + i*0.15, asset, fontsize=10, va='center')

ax.set_xlabel('Hurst Exponent (H)')
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.set_title('Interpretation: H < 0.5 (mean-revert), H > 0.5 (trend)', fontweight='bold')
ax.legend(loc='upper right')

plt.tight_layout()
plt.savefig('ch8_hurst_estimation.pdf', bbox_inches='tight', dpi=150)
plt.show()

## 5. Key Findings

**Stylized Facts from Real Data:**

1. **Returns** have H â‰ˆ 0.5 (no memory, efficient market)
2. **Squared returns / |returns|** have H > 0.5 (long memory in volatility)
3. This justifies using ARFIMA/FIGARCH for volatility modeling