# Volatility Path States: Quick Start

This notebook demonstrates the basic usage of the volatility path states package.

In [None]:
# Setup
import sys
sys.path.insert(0, '..')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Import package modules
from src.data.synthetic_data import SyntheticDataGenerator
from src.regimes.regime_classifier import RegimeClassifier
from src.portfolio.baseline import BaselinePortfolio
from src.portfolio.state_conditioned import StateConditionedPortfolio
from src.analysis.performance import PerformanceAnalyzer
from src.visualization.styles import set_publication_style, PlotStyles

set_publication_style()
np.random.seed(42)

## 1. Generate Synthetic Data

In [None]:
# Generate synthetic data matching paper statistics
generator = SyntheticDataGenerator(seed=42)
data = generator.generate(n_months=732)  # 1963-2023

print("Generated data:")
for key, value in data.items():
    if hasattr(value, 'shape'):
        print(f"  {key}: {value.shape}")

In [None]:
# Extract components
factors = data['factors']
volatility = data['volatility']
regimes = data['regimes']

if isinstance(regimes, pd.DataFrame):
    regimes = regimes['regime']

print("\nFactor returns summary:")
print(factors.describe())

## 2. Explore Regimes

In [None]:
# Regime frequencies
print("Regime frequencies:")
print(regimes.value_counts())
print("\nPercentages:")
print(regimes.value_counts(normalize=True) * 100)

In [None]:
# Plot regime time series
fig, ax = plt.subplots(figsize=(12, 3))

for i in range(len(regimes.index) - 1):
    regime = regimes.iloc[i]
    ax.axvspan(regimes.index[i], regimes.index[i+1], 
               alpha=0.7, color=PlotStyles.get_regime_color(regime))

ax.set_xlim([regimes.index[0], regimes.index[-1]])
ax.set_yticks([])
ax.set_title('Regime Classification Over Time')
plt.tight_layout()
plt.show()

## 3. Analyze Factor Returns by Regime

In [None]:
# Compute mean returns by regime
regime_returns = pd.DataFrame()

for regime in PlotStyles.REGIME_ORDER:
    mask = regimes == regime
    regime_returns[regime] = factors.loc[mask].mean() * 100

print("Mean Monthly Returns by Regime (%):")
print(regime_returns.T)

In [None]:
# Plot factor returns by regime
fig, ax = plt.subplots(figsize=(10, 6))

x = np.arange(len(PlotStyles.REGIME_ORDER))
width = 0.2

for i, factor in enumerate(factors.columns):
    vals = [regime_returns.loc[factor, r] for r in PlotStyles.REGIME_ORDER]
    ax.bar(x + i*width, vals, width, label=factor, 
           color=PlotStyles.get_factor_color(factor))

ax.axhline(y=0, color='black', linewidth=0.5)
ax.set_ylabel('Mean Monthly Return (%)')
ax.set_xticks(x + width * 1.5)
ax.set_xticklabels(PlotStyles.REGIME_ORDER, rotation=15, ha='right')
ax.legend()
ax.set_title('Factor Returns by Path State Regime')
plt.tight_layout()
plt.show()

## 4. Build Portfolios

In [None]:
# Define evaluation period
eval_start = '2000-01-01'

# Baseline portfolio (unconditional)
baseline = BaselinePortfolio(factors)
baseline_result = baseline.backtest(start=eval_start)

# State-conditioned portfolio
state_cond = StateConditionedPortfolio(factors, regimes)
state_cond.fit(training_end='1999-12-31')
state_cond_result = state_cond.backtest(start=eval_start)

print("Portfolio exposures by regime:")
print(state_cond.summary())

In [None]:
# Compare performance
analyzer = PerformanceAnalyzer()

comparison = analyzer.compare_strategies({
    'Baseline': baseline_result.returns['net'],
    'State-Conditioned': state_cond_result.returns['net'],
})

print("Performance Comparison:")
print(comparison[['mean_return', 'volatility', 'sharpe_ratio', 'max_drawdown']].T)

In [None]:
# Plot cumulative returns
fig, ax = plt.subplots(figsize=(10, 5))

cum_baseline = np.cumsum(baseline_result.returns['net']) * 100
cum_cond = np.cumsum(state_cond_result.returns['net']) * 100

ax.plot(cum_baseline.index, cum_baseline.values, 'b--', 
        label='Baseline', linewidth=1.5)
ax.plot(cum_cond.index, cum_cond.values, 'b-', 
        label='State-Conditioned', linewidth=2)

ax.set_xlabel('Date')
ax.set_ylabel('Cumulative Return (%)')
ax.set_title('Cumulative Performance: Baseline vs State-Conditioned')
ax.legend()
plt.tight_layout()
plt.show()

## 5. Analyze Momentum Crashes

In [None]:
# Identify worst momentum months
momentum = factors['Momentum']
threshold = momentum.quantile(0.05)

crashes = momentum[momentum <= threshold]
crash_regimes = regimes.loc[crashes.index]

print(f"Crash threshold (5th percentile): {threshold*100:.2f}%")
print(f"\nCrashes by regime:")
print(crash_regimes.value_counts())
print(f"\nCrash-Spike contains {(crash_regimes == 'Crash-Spike').sum()} of {len(crashes)} crashes")
print(f"That's {(crash_regimes == 'Crash-Spike').mean()*100:.1f}% of all crashes")

## 6. Key Takeaways

1. **Factor returns vary dramatically by regime**: Momentum earns positive returns in Calm Trend but negative in Crash-Spike
2. **Crashes are concentrated**: A disproportionate share of momentum crashes occur in Crash-Spike states
3. **State conditioning improves performance**: By reducing exposure in high-risk states, we can improve Sharpe ratios and reduce drawdowns
4. **Not all factors need timing**: Quality and Low-Risk factors perform well even in stress periods