# Extended PCMCI+ Analysis

Deeper exploration:
1. Different alpha levels (catch weaker signals)
2. Different lag windows
3. CMI vs ParCorr (nonlinear detection)
4. Rolling window analysis (regime changes)
5. Crisis period deep-dive

In [None]:
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import pcmci

print(f"PCMCI+ version: {pcmci.version()}")

In [None]:
# Fetch data
ASSETS = {'SPY': 'US Equities', 'TLT': 'Treasuries', 'GLD': 'Gold', 'UUP': 'US Dollar', 'EEM': 'EM Equities'}

end_date = datetime.now()
start_date = end_date - timedelta(days=2*365)

data = yf.download(list(ASSETS.keys()), start=start_date, end=end_date, progress=False)

def parkinson_volatility(high, low, window=20):
    log_hl = np.log(high / low) ** 2
    return np.sqrt((1.0 / (4.0 * np.log(2))) * log_hl.rolling(window).mean() * 252)

volatility = pd.DataFrame()
for symbol in ASSETS.keys():
    volatility[symbol] = parkinson_volatility(data['High'][symbol], data['Low'][symbol], 20)
volatility = volatility.dropna()

var_names = list(ASSETS.keys())
vol_matrix = volatility[var_names].values.T
vol_arr = volatility.values

print(f"Data: {vol_matrix.shape[1]} days, {vol_matrix.shape[0]} assets")

## 1. Different Alpha Levels

Relaxing alpha catches weaker but potentially real signals.

In [None]:
print("Cross-Asset Spillovers at Different Significance Levels")
print("=" * 70)

for alpha in [0.01, 0.05, 0.10, 0.20]:
    result = pcmci.run_pcmci(vol_matrix, tau_max=5, alpha=alpha, var_names=var_names)
    
    # Cross-asset links only
    cross = [l for l in result.significant_links if l.source_var != l.target_var]
    
    print(f"\nα = {alpha:.2f}: {len(cross)} cross-asset spillovers")
    for link in cross:
        src, tgt = var_names[link.source_var], var_names[link.target_var]
        print(f"  {src}(t-{link.tau}) → {tgt}(t): r={link.val:+.3f}, p={link.pval:.4f}")

## 2. Different Lag Windows

Longer lags may reveal slower transmission channels.

In [None]:
print("Cross-Asset Spillovers at Different Lag Windows")
print("=" * 70)

for tau_max in [3, 5, 10, 15, 20]:
    result = pcmci.run_pcmci(vol_matrix, tau_max=tau_max, alpha=0.10, var_names=var_names)
    
    cross = [l for l in result.significant_links if l.source_var != l.target_var]
    
    print(f"\nτ_max = {tau_max:2d}: {len(cross)} links ({result.runtime*1000:.1f}ms)")
    for link in sorted(cross, key=lambda x: abs(x.val), reverse=True)[:5]:
        src, tgt = var_names[link.source_var], var_names[link.target_var]
        print(f"  {src}(t-{link.tau:2d}) → {tgt}(t): r={link.val:+.3f}")

## 3. Linear vs Nonlinear Dependencies

CMI can detect relationships that partial correlation misses.

In [None]:
print("Pairwise Comparison: Partial Correlation vs CMI")
print("=" * 70)
print(f"{'Pair':<12} {'ParCorr':>9} {'p':>8} {'CMI':>9} {'p':>8}  Notes")
print("-" * 70)

for i in range(len(var_names)):
    for j in range(i+1, len(var_names)):
        X, Y = vol_arr[:, i], vol_arr[:, j]
        
        r, p_r = pcmci.parcorr_test(X, Y)
        cmi_result = pcmci.cmi_test(X, Y, n_perm=100)
        
        pair = f"{var_names[i]}-{var_names[j]}"
        
        # Flag interesting cases
        note = ""
        if p_r > 0.05 and cmi_result.pvalue < 0.05:
            note = "← NONLINEAR!"
        elif abs(r) > 0.5:
            note = "← Strong"
        
        print(f"{pair:<12} {r:>+9.3f} {p_r:>8.4f} {cmi_result.cmi:>9.3f} {cmi_result.pvalue:>8.4f}  {note}")

## 4. Lead-Lag Deep Dive

Detailed lag-by-lag analysis for key pairs.

In [None]:
def lead_lag_plot(source_idx, target_idx, max_lag=15):
    """Analyze and plot lead-lag relationship"""
    src_name, tgt_name = var_names[source_idx], var_names[target_idx]
    
    lags = list(range(1, max_lag + 1))
    parcorrs = []
    pvalues = []
    cmis = []
    
    for lag in lags:
        source = vol_arr[:-lag, source_idx]
        target = vol_arr[lag:, target_idx]
        
        r, p = pcmci.parcorr_test(source, target)
        mi = pcmci.mi(source, target)
        
        parcorrs.append(r)
        pvalues.append(p)
        cmis.append(mi)
    
    # Plot
    fig, axes = plt.subplots(1, 2, figsize=(14, 4))
    
    # Partial correlation
    colors = ['green' if p < 0.05 else 'lightgray' for p in pvalues]
    axes[0].bar(lags, parcorrs, color=colors, edgecolor='black', alpha=0.7)
    axes[0].axhline(0, color='black', linewidth=0.5)
    axes[0].set_xlabel('Lag (days)')
    axes[0].set_ylabel('Partial Correlation')
    axes[0].set_title(f'{src_name}(t-lag) → {tgt_name}(t)\nGreen = p < 0.05')
    axes[0].grid(True, alpha=0.3)
    
    # CMI
    axes[1].bar(lags, cmis, color='steelblue', edgecolor='black', alpha=0.7)
    axes[1].set_xlabel('Lag (days)')
    axes[1].set_ylabel('Mutual Information (nats)')
    axes[1].set_title(f'{src_name}(t-lag) → {tgt_name}(t)\nMI (includes nonlinear)')
    axes[1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    # Find best lags
    best_parcorr_idx = np.argmax(np.abs(parcorrs))
    best_cmi_idx = np.argmax(cmis)
    
    print(f"Best ParCorr: lag={lags[best_parcorr_idx]}, r={parcorrs[best_parcorr_idx]:+.3f}, p={pvalues[best_parcorr_idx]:.4f}")
    print(f"Best CMI:     lag={lags[best_cmi_idx]}, MI={cmis[best_cmi_idx]:.3f}")

# Analyze key pairs
print("TLT → SPY (Do rates lead equities?)")
lead_lag_plot(1, 0)

In [None]:
print("SPY → EEM (Do US equities lead EM?)")
lead_lag_plot(0, 4)

In [None]:
print("TLT → UUP (Do bonds lead dollar?)")
lead_lag_plot(1, 3)

In [None]:
print("GLD → SPY (Does gold lead equities?)")
lead_lag_plot(2, 0)

## 5. Rolling Window Analysis (Time-Varying Causality)

Causal relationships change over time. Let's track them.

In [None]:
window_size = 120  # ~6 months
step = 10  # ~2 weeks

# Track multiple relationships
results_over_time = {f"{s}->{t}": [] for s in var_names for t in var_names if s != t}
dates = []

print(f"Running rolling PCMCI+ (window={window_size}, step={step})...")

for start in range(0, vol_matrix.shape[1] - window_size, step):
    end = start + window_size
    window_data = vol_matrix[:, start:end]
    window_date = volatility.index[end - 1]
    
    result = pcmci.run_pcmci(window_data, tau_max=3, alpha=0.20, var_names=var_names)
    
    # Extract all cross-asset links
    link_dict = {}
    for link in result.significant_links:
        if link.source_var != link.target_var:
            key = f"{var_names[link.source_var]}->{var_names[link.target_var]}"
            link_dict[key] = link.val
    
    for key in results_over_time:
        results_over_time[key].append(link_dict.get(key, 0))
    
    dates.append(window_date)

print(f"Completed {len(dates)} windows")

In [None]:
# Plot key relationships over time
key_pairs = ['SPY->EEM', 'TLT->SPY', 'TLT->UUP', 'GLD->SPY']

fig, axes = plt.subplots(len(key_pairs), 1, figsize=(14, 3*len(key_pairs)), sharex=True)

for ax, pair in zip(axes, key_pairs):
    values = results_over_time[pair]
    
    ax.plot(dates, values, 'b-', linewidth=1.5)
    ax.fill_between(dates, values, 0, alpha=0.3, 
                    color=['green' if v > 0 else 'red' for v in values])
    ax.axhline(0, color='gray', linestyle='--', linewidth=0.5)
    ax.set_ylabel('Strength')
    ax.set_title(f'{pair} (Rolling 6-month PCMCI+)')
    ax.grid(True, alpha=0.3)

axes[-1].set_xlabel('Date')
plt.tight_layout()
plt.show()

## 6. April 2025 Crisis Deep-Dive

Analyze the volatility spike in detail.

In [None]:
# Find crisis period
crisis_mask = volatility['SPY'] > 0.25

if crisis_mask.any():
    crisis_start = volatility.index[crisis_mask].min()
    crisis_end = volatility.index[crisis_mask].max()
    peak_date = volatility['SPY'].idxmax()
    
    print(f"Crisis period: {crisis_start.date()} to {crisis_end.date()}")
    print(f"Peak date: {peak_date.date()} (SPY vol = {volatility.loc[peak_date, 'SPY']:.1%})")
    
    # Show day-by-day around peak
    print("\nDay-by-day volatility around peak:")
    window = volatility.loc[peak_date - timedelta(days=10):peak_date + timedelta(days=5)]
    
    # Format nicely
    display_df = (window * 100).round(1)  # Convert to percentage
    display_df.columns = [f"{c} (%)" for c in display_df.columns]
    display(display_df)
else:
    print("No crisis period found (SPY vol never exceeded 25%)")

In [None]:
# Run PCMCI+ specifically on crisis period
if crisis_mask.any():
    # Include 60 days before crisis for lead detection
    pre_crisis_start = crisis_start - timedelta(days=60)
    crisis_data = volatility.loc[pre_crisis_start:crisis_end]
    
    print(f"Crisis analysis window: {crisis_data.index[0].date()} to {crisis_data.index[-1].date()}")
    print(f"Observations: {len(crisis_data)}\n")
    
    crisis_matrix = crisis_data[var_names].values.T
    
    result = pcmci.run_pcmci(crisis_matrix, tau_max=5, alpha=0.10, var_names=var_names)
    
    print("Causal links during crisis build-up:")
    print("=" * 50)
    
    cross_links = [l for l in result.significant_links if l.source_var != l.target_var]
    for link in sorted(cross_links, key=lambda x: abs(x.val), reverse=True):
        src, tgt = var_names[link.source_var], var_names[link.target_var]
        print(f"  {src}(t-{link.tau}) → {tgt}(t): r={link.val:+.3f}, p={link.pval:.4f}")

## 7. Distance Correlation Heatmap

Detect any dependence (including nonlinear).

In [None]:
n_assets = len(var_names)
dcor_matrix = np.eye(n_assets)

for i in range(n_assets):
    for j in range(i+1, n_assets):
        dc = pcmci.dcor(vol_arr[:, i], vol_arr[:, j])
        dcor_matrix[i, j] = dc
        dcor_matrix[j, i] = dc

# Plot
fig, ax = plt.subplots(figsize=(8, 6))
im = ax.imshow(dcor_matrix, cmap='YlOrRd', vmin=0, vmax=1)

ax.set_xticks(range(n_assets))
ax.set_yticks(range(n_assets))
ax.set_xticklabels(var_names, fontsize=12)
ax.set_yticklabels(var_names, fontsize=12)

for i in range(n_assets):
    for j in range(n_assets):
        ax.text(j, i, f'{dcor_matrix[i,j]:.2f}', ha='center', va='center', fontsize=14, fontweight='bold')

ax.set_title('Distance Correlation Matrix\n(Detects Any Dependence)', fontsize=14)
plt.colorbar(im, label='dCor')
plt.tight_layout()
plt.show()

## Summary

### Key Findings

| Finding | Tradeable? | Action |
|---------|------------|--------|
| SPY ↔ EEM contemporaneous | ❌ | No lead time |
| TLT → UUP at lag 5 | ✅ | Watch bond vol for dollar vol |
| GLD independent | ✅ | True diversifier |
| Strong AR(1) everywhere | ✅ | Vol clustering = predictable |

### Next Steps
1. Add more assets (VIX, BTC, sector ETFs)
2. Try intraday data for faster signals
3. Integrate with your BOCPD for regime detection
4. Build alerting system for spillover detection