# Volatility Exploratory Data Analysis
**FinRisk Analytics - Project 2**

Explore volatility patterns across asset classes.

In [None]:
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import yaml

from src.data.fetch_data import load_data
from src.data.preprocess import prepare_returns, align_returns, calculate_realized_volatility, calculate_summary_statistics
from src.utils.plotting import plot_returns, plot_volatility, plot_distribution, plot_qq, ensure_plot_dir

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 100)

# Load config
with open('../configs/config.yaml', 'r') as f:
    config = yaml.safe_load(f)

print('✅ Ready to analyze volatility!')

In [None]:
## Volatility Clustering
# Analyze volatility persistence
print("🔍 Volatility Clustering Analysis")
print("=" * 60)

for ticker in returns_df.columns:
    returns = returns_df[ticker]
    squared_returns = returns ** 2
    
    # Autocorrelation of squared returns (measure of volatility clustering)
    from pandas.plotting import autocorrelation_plot
    
    fig, ax = plt.subplots(figsize=(12, 6))
    autocorrelation_plot(squared_returns.dropna(), ax=ax)
    ax.set_title(f'{ticker} - Autocorrelation of Squared Returns\n(Evidence of Volatility Clustering)')
    ax.set_xlabel('Lag')
    ax.set_ylabel('Autocorrelation')
    plt.savefig(f'../results/plots/{ticker}_vol_clustering.png', dpi=300, bbox_inches='tight')
    plt.show()

print("\n✅ Volatility EDA Complete!")

In [None]:
## Realized Volatility
# Calculate and plot realized volatility
volatilities = {}

for ticker in returns_df.columns:
    vol = calculate_realized_volatility(returns_df[ticker], window=20)
    volatilities[ticker] = vol
    
    fig, ax = plot_volatility(vol,
                              title=f'{ticker} Realized Volatility (20-day)',
                              save_path=f'../results/plots/{ticker}_realized_vol.png')
    plt.show()

# Combine volatilities
vol_df = pd.DataFrame(volatilities)
print("\n📈 Average Annualized Volatility:")
print(vol_df.mean().sort_values(ascending=False))

In [None]:
## Normality Check: Q-Q Plots
# Test for normality
for ticker in returns_df.columns:
    fig, ax = plot_qq(returns_df[ticker],
                     title=f'{ticker} Q-Q Plot',
                     save_path=f'../results/plots/{ticker}_qq.png')
    plt.show()

In [None]:
## Return Distribution Analysis
# Check distribution characteristics
for ticker in returns_df.columns:
    fig, ax = plot_distribution(returns_df[ticker],
                                title=f'{ticker} Return Distribution',
                                save_path=f'../results/plots/{ticker}_distribution.png')
    plt.show()

In [None]:
## Returns Visualization
# Plot returns for each asset
ensure_plot_dir('../results/plots')

for ticker in returns_df.columns:
    fig, ax = plot_returns(returns_df[ticker], 
                          title=f'{ticker} Daily Returns',
                          save_path=f'../results/plots/{ticker}_returns.png')
    plt.show()

In [None]:
## Summary Statistics
# Calculate summary statistics for each asset
summary_stats = {}

for col in returns_df.columns:
    stats = calculate_summary_statistics(returns_df[col])
    summary_stats[col] = stats

stats_df = pd.DataFrame(summary_stats).T
print("\n📊 Summary Statistics:")
stats_df

In [None]:
# Load data for case studies
case_studies = config['assets']['case_studies']
price_data = {}

for ticker in case_studies:
    df = load_data(ticker, data_dir='../data/raw')
    if df is not None:
        price_data[ticker] = df
        print(f"Loaded {ticker}: {df.shape[0]} rows")

# Calculate returns
returns_dict = prepare_returns(price_data)
returns_df = align_returns(returns_dict)

print(f"\nReturns DataFrame shape: {returns_df.shape}")
print(f"Date range: {returns_df.index[0]} to {returns_df.index[-1]}")
returns_df.head()