In [None]:
# ==========================================
# PROJECT: Cross-Asset Contagion in Stress Regimes
# Evidence from Bitcoin Crash Events and Equity Correlation Breakdown
# ==========================================

# 1. SETUP & LIBRARIES
!pip install yfinance pandas numpy matplotlib seaborn scikit-learn tensorflow -q

import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

print(" Setup complete!")

# ==========================================
# 2. DATA COLLECTION
# ==========================================

print("\n" + "="*70)
print("PHASE 1: DATA COLLECTION")
print("="*70)

# Settings
start_date = '2020-01-01'
end_date = datetime.now().strftime('%Y-%m-%d')
crisis_threshold = -0.10  # BTC drops >10% in a week = "Crisis Regime"

# Stock Selection - Behavioral Classification
tickers = {
    'Crypto': ['BTC-USD'],
    'High_Beta_Speculative': ['COIN', 'MSTR', 'TSLA', 'NVDA'],  # Sentiment-driven equities
    'Defensive_Low_Beta': ['JPM', 'JNJ', 'KO', 'PG']            # Defensive equities
}

all_tickers = []
for category in tickers.values():
    all_tickers.extend(category)

print(f"\n Downloading data for: {all_tickers}")
print(f"   Period: {start_date} to Present")

# Download data
raw_data = yf.download(all_tickers, start=start_date, end=end_date)

# Robust data extraction
data = pd.DataFrame()
if isinstance(raw_data.columns, pd.MultiIndex):
    if 'Adj Close' in raw_data.columns.levels[0]:
        data = raw_data.xs('Adj Close', level=0, axis=1)
    elif 'Adj Close' in raw_data.columns.levels[1]:
        data = raw_data.xs('Adj Close', level=1, axis=1)
    else:
        print("WARNING: 'Adj Close' not found. Using 'Close'.")
        if 'Close' in raw_data.columns.levels[0]:
            data = raw_data.xs('Close', level=0, axis=1)
        elif 'Close' in raw_data.columns.levels[1]:
            data = raw_data.xs('Close', level=1, axis=1)
else:
    if 'Adj Close' in raw_data.columns:
        data = raw_data['Adj Close']
    elif 'Close' in raw_data.columns:
        data = raw_data['Close']

if isinstance(data, pd.Series):
    data = data.to_frame()

data = data.ffill().bfill()
print(f" Data Shape: {data.shape}")
print(f"   Columns: {list(data.columns)}")

# ==========================================
# 3. REGIME DETECTION
# ==========================================

print("\n" + "="*70)
print("PHASE 2: REGIME DETECTION")
print("="*70)

# Compute percentage returns
returns = data.pct_change().dropna()

# Identify stress regimes based on Bitcoin weekly returns
btc_weekly_return = returns['BTC-USD'].rolling(window=7).sum()

regime = pd.DataFrame(index=returns.index)
regime['Regime'] = np.where(btc_weekly_return < crisis_threshold, 'Crisis', 'Normal')

crisis_days = (regime['Regime'] == 'Crisis').sum()
normal_days = (regime['Regime'] == 'Normal').sum()

print(f"\nRegime Classification:")
print(f"  Crisis days: {crisis_days} ({crisis_days/len(regime)*100:.1f}%)")
print(f"  Normal days: {normal_days} ({normal_days/len(regime)*100:.1f}%)")
print(f"  Threshold: Bitcoin weekly return < {crisis_threshold*100:.0f}%")
print(f"  (Aligns with institutional 'Correction' definition)")

# ==========================================
# 4. CORRELATION ANALYSIS
# ==========================================

print("\n" + "="*70)
print("PHASE 3: CORRELATION ANALYSIS")
print("="*70)

correlations = pd.DataFrame(columns=['Stock', 'Category', 'Normal_Corr', 'Crisis_Corr', 'Change'])
returns_with_regime = returns.join(regime)

for category, ticker_list in tickers.items():
    if category == 'Crypto':
        continue

    for ticker in ticker_list:
        if ticker not in returns.columns:
            print(f" {ticker} not available, skipping")
            continue

        normal_data = returns_with_regime[returns_with_regime['Regime'] == 'Normal']
        crisis_data = returns_with_regime[returns_with_regime['Regime'] == 'Crisis']

        normal_corr = normal_data[ticker].corr(normal_data['BTC-USD'])
        crisis_corr = crisis_data[ticker].corr(crisis_data['BTC-USD'])

        correlations = pd.concat([correlations, pd.DataFrame([{
            'Stock': ticker,
            'Category': 'High-Beta' if category == 'High_Beta_Speculative' else 'Defensive',
            'Normal_Corr': normal_corr,
            'Crisis_Corr': crisis_corr,
            'Change': crisis_corr - normal_corr
        }])], ignore_index=True)

        print(f"{ticker:6s} ({category:25s}): {normal_corr:>6.3f} â†’ {crisis_corr:>6.3f} (Î” {crisis_corr - normal_corr:>+6.3f})")

print("\n Correlations Calculated.")

# ==========================================
# 5. KEY FINDINGS ANALYSIS
# ==========================================

print("\n" + "="*70)
print("PHASE 4: KEY FINDINGS")
print("="*70)

# Calculate group averages
highbeta_avg_normal = correlations[correlations['Category'] == 'High-Beta']['Normal_Corr'].mean()
highbeta_crisis_avg = correlations[correlations['Category'] == 'High-Beta']['Crisis_Corr'].mean()
defensive_avg_normal = correlations[correlations['Category'] == 'Defensive']['Normal_Corr'].mean()
defensive_crisis_avg = correlations[correlations['Category'] == 'Defensive']['Crisis_Corr'].mean()

# Calculate absolute changes
highbeta_absolute_change = highbeta_crisis_avg - highbeta_avg_normal
defensive_absolute_change = defensive_crisis_avg - defensive_avg_normal

# Calculate percentage increases
highbeta_increase_pct = ((highbeta_crisis_avg - highbeta_avg_normal) / abs(highbeta_avg_normal)) * 100
defensive_increase_pct = ((defensive_crisis_avg - defensive_avg_normal) / abs(defensive_avg_normal)) * 100

# Calculate diversification gap
gap_normal = highbeta_avg_normal - defensive_avg_normal
gap_crisis = highbeta_crisis_avg - defensive_crisis_avg

print(f"\n CORRELATION CONVERGENCE ANALYSIS:")
print(f"\n  High-Beta / Sentiment-Driven Equities:")
print(f"    Normal correlation:  {highbeta_avg_normal:>6.3f}")
print(f"    Crisis correlation:  {highbeta_crisis_avg:>6.3f}")
print(f"    Absolute change:     {highbeta_absolute_change:>+6.3f}")
print(f"    Percentage increase: {highbeta_increase_pct:>+6.1f}%")

print(f"\n  Defensive / Low-Beta Equities:")
print(f"    Normal correlation:  {defensive_avg_normal:>6.3f}")
print(f"    Crisis correlation:  {defensive_crisis_avg:>6.3f}")
print(f"    Absolute change:     {defensive_absolute_change:>+6.3f}")
print(f"    Percentage increase: {defensive_increase_pct:>+6.1f}%")

print(f"\n  ðŸ“‰ DIVERSIFICATION GAP:")
print(f"    Normal regime gap:   {gap_normal:>6.3f}")
print(f"    Crisis regime gap:   {gap_crisis:>6.3f}")
print(f"    Gap compression:     {gap_normal - gap_crisis:>6.3f} ({((gap_normal - gap_crisis)/gap_normal)*100:.1f}% reduction)")

# Determine finding type
if gap_crisis < 0.10:  # Gap essentially disappeared
    print(f"\n FINDING: SYSTEMIC CONTAGION DETECTED")
    print(f"   During Bitcoin crashes, defensive equities correlate almost")
    print(f"   as strongly as high-beta equities ({defensive_crisis_avg:.3f} vs {highbeta_crisis_avg:.3f}).")
    print(f"   â†’ Diversification gap collapses from {gap_normal:.3f} to {gap_crisis:.3f}")
    print(f"   â†’ Bitcoin crashes signal MARKET-WIDE liquidity stress")
    finding_type = "SYSTEMIC"
else:
    print(f"\n FINDING: ASYMMETRIC SECTOR-SPECIFIC CONTAGION")
    print(f"   Defensive equities show disproportionate correlation increases")
    print(f"   ({defensive_increase_pct:.1f}% vs {highbeta_increase_pct:.1f}%)")
    print(f"   â†’ Gap narrows but persists: {gap_crisis:.3f}")
    finding_type = "SECTOR-SPECIFIC"

# ==========================================
# 6. COMPREHENSIVE VISUALIZATIONS
# ==========================================

print("\n" + "="*70)
print("PHASE 5: VISUALIZATION")
print("="*70)

fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# Plot 1: Correlation levels during crises
categories = ['High-Beta\nSpeculative', 'Defensive\nLow-Beta']
values = [highbeta_crisis_avg, defensive_crisis_avg]
colors = ['crimson', 'steelblue']

bars = axes[0, 0].bar(categories, values, color=colors, alpha=0.8, edgecolor='black', linewidth=2)
axes[0, 0].set_title('Crisis Correlation Levels:\nStructural Break in Diversification',
                      fontsize=14, fontweight='bold')
axes[0, 0].set_ylabel('Correlation with Bitcoin', fontsize=12)
axes[0, 0].grid(axis='y', alpha=0.3)
axes[0, 0].set_ylim(0, 0.6)

for bar in bars:
    height = bar.get_height()
    axes[0, 0].text(bar.get_x() + bar.get_width()/2., height + 0.01,
                    f'{height:.3f}', ha='center', va='bottom', fontsize=12, fontweight='bold')

# Plot 2: Individual stock ranking
correlations_sorted = correlations.sort_values('Change', ascending=True)
colors_map = {'High-Beta': 'crimson', 'Defensive': 'steelblue'}
colors_rank = [colors_map[cat] for cat in correlations_sorted['Category']]

axes[0, 1].barh(correlations_sorted['Stock'], correlations_sorted['Change'],
                color=colors_rank, alpha=0.7, edgecolor='black', linewidth=1.5)
axes[0, 1].set_xlabel('Correlation Change (Crisis - Normal)', fontsize=12, fontweight='bold')
axes[0, 1].set_title('Individual Asset Exposure Ranking', fontsize=14, fontweight='bold')
axes[0, 1].axvline(x=0, color='black', linestyle='-', alpha=0.3, linewidth=1.5)
axes[0, 1].grid(True, alpha=0.3, axis='x')

from matplotlib.patches import Patch
legend_elements = [Patch(facecolor='crimson', alpha=0.7, label='High-Beta Speculative'),
                   Patch(facecolor='steelblue', alpha=0.7, label='Defensive Low-Beta')]
axes[0, 1].legend(handles=legend_elements, fontsize=10)

# Plot 3: Regime comparison
x = np.arange(len(categories))
width = 0.35
normal_means = [highbeta_avg_normal, defensive_avg_normal]
crisis_means = [highbeta_crisis_avg, defensive_crisis_avg]

bars1 = axes[1, 0].bar(x - width/2, normal_means, width, label='Normal Regime',
                       color='green', alpha=0.6, edgecolor='black', linewidth=1.5)
bars2 = axes[1, 0].bar(x + width/2, crisis_means, width, label='Crisis Regime',
                       color='red', alpha=0.8, edgecolor='black', linewidth=1.5)

axes[1, 0].set_ylabel('Correlation with Bitcoin', fontsize=12)
axes[1, 0].set_title('Regime-Dependent Correlation Dynamics', fontsize=14, fontweight='bold')
axes[1, 0].set_xticks(x)
axes[1, 0].set_xticklabels(categories, fontsize=11)
axes[1, 0].legend(fontsize=11)
axes[1, 0].grid(True, alpha=0.3, axis='y')

for bars in [bars1, bars2]:
    for bar in bars:
        height = bar.get_height()
        axes[1, 0].annotate(f'{height:.3f}',
                           xy=(bar.get_x() + bar.get_width() / 2, height),
                           xytext=(0, 3),
                           textcoords="offset points",
                           ha='center', va='bottom', fontsize=10, fontweight='bold')

# Plot 4: Key findings summary
axes[1, 1].axis('off')
summary_text = f"""
KEY RESEARCH FINDINGS

1. CORRELATION CONVERGENCE
   â€¢ High-Beta Normal:    {highbeta_avg_normal:.3f}
   â€¢ High-Beta Crisis:    {highbeta_crisis_avg:.3f}
   â€¢ Defensive Normal:    {defensive_avg_normal:.3f}
   â€¢ Defensive Crisis:    {defensive_crisis_avg:.3f}

2. DIVERSIFICATION GAP
   â€¢ Normal regime:       {gap_normal:.3f}
   â€¢ Crisis regime:       {gap_crisis:.3f}
   â€¢ Gap reduction:       {((gap_normal - gap_crisis)/gap_normal)*100:.1f}%

3. MARKET REGIMES
   â€¢ Crisis days:         {crisis_days} ({crisis_days/len(regime)*100:.1f}%)
   â€¢ Normal days:         {normal_days} ({normal_days/len(regime)*100:.1f}%)

4. INTERPRETATION
   {finding_type} CONTAGION

   Bitcoin crashes coincide with periods
   where traditional diversification
   benefits temporarily weaken.

5. IMPLICATION
   Static correlation assumptions may
   underestimate portfolio risk during
   crypto-led liquidity stress events.
"""
axes[1, 1].text(0.1, 0.5, summary_text, fontsize=10.5, verticalalignment='center',
                family='monospace', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.3))

plt.tight_layout()
plt.show()

print(" Visualizations complete!")

# ==========================================
# 7. PREDICTIVE MODELING (LSTM)
# ==========================================

print("\n" + "="*70)
print("PHASE 6: PREDICTIVE MODELING (Optional Extension)")
print("="*70)

target_stock = 'COIN'

if target_stock in data.columns:
    print(f"\n Building LSTM model to predict {target_stock}...")
    print(f"   (Incorporating Bitcoin volatility as systemic stress indicator)")

    df_model = pd.DataFrame()
    df_model['Target'] = data[target_stock]
    df_model['BTC_Price'] = data['BTC-USD']
    df_model['BTC_Volatility'] = returns['BTC-USD'].rolling(window=10).std()

    df_model = df_model.dropna()
    print(f"   Dataset size: {len(df_model)} samples")

    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(df_model)

    def create_sequences(data, seq_length=60):
        X, y = [], []
        for i in range(len(data) - seq_length):
            X.append(data[i:i+seq_length])
            y.append(data[i+seq_length, 0])
        return np.array(X), np.array(y)

    X, y = create_sequences(scaled_data, seq_length=60)
    train_size = int(len(X) * 0.8)
    X_train, X_test = X[:train_size], X[train_size:]
    y_train, y_test = y[:train_size], y[train_size:]

    print(f"   Training samples: {len(X_train)}")
    print(f"   Testing samples: {len(X_test)}")

    model = Sequential([
        LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
        Dropout(0.2),
        LSTM(50, return_sequences=False),
        Dropout(0.2),
        Dense(25),
        Dense(1)
    ])

    model.compile(optimizer='adam', loss='mean_squared_error')

    print("\n   Training model...")
    history = model.fit(X_train, y_train,
                       batch_size=32,
                       epochs=10,
                       validation_data=(X_test, y_test),
                       verbose=1)

    train_loss = model.evaluate(X_train, y_train, verbose=0)
    test_loss = model.evaluate(X_test, y_test, verbose=0)

    print(f"\n LSTM Training Complete!")
    print(f"   Train Loss: {train_loss:.6f}")
    print(f"   Test Loss: {test_loss:.6f}")

    plt.figure(figsize=(10, 6))
    plt.plot(history.history['loss'], label='Training Loss', linewidth=2, color='blue')
    plt.plot(history.history['val_loss'], label='Validation Loss', linewidth=2, color='red')
    plt.title(f'LSTM Model: Predicting {target_stock} with Bitcoin Volatility Features',
              fontsize=14, fontweight='bold')
    plt.xlabel('Epoch', fontsize=12)
    plt.ylabel('Loss (MSE)', fontsize=12)
    plt.legend(fontsize=11)
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()

else:
    print(f" {target_stock} not available in dataset, skipping LSTM modeling")

# ==========================================
# 8. FINAL SUMMARY
# ==========================================

print("\n" + "="*70)
print("PROJECT SUMMARY")
print("="*70)

print(f"""
 ANALYSIS COMPLETE

DATA:
â€¢ Trading days analyzed: {len(data)}
â€¢ Assets: {len(all_tickers)} (1 crypto + 8 equities)
â€¢ Period: January 2020 - Present

REGIME CLASSIFICATION:
â€¢ Crisis regimes identified: {crisis_days} days ({crisis_days/len(regime)*100:.1f}%)
â€¢ Normal regimes: {normal_days} days ({normal_days/len(regime)*100:.1f}%)
â€¢ Threshold: Weekly Bitcoin return < -10% (institutional correction level)

KEY FINDINGS:
â€¢ High-Beta equities: {highbeta_avg_normal:.3f} â†’ {highbeta_crisis_avg:.3f} correlation
â€¢ Defensive equities: {defensive_avg_normal:.3f} â†’ {defensive_crisis_avg:.3f} correlation
â€¢ Diversification gap: {gap_normal:.3f} â†’ {gap_crisis:.3f} ({((gap_normal - gap_crisis)/gap_normal)*100:.1f}% reduction)

INTERPRETATION:
{finding_type} CONTAGION - Bitcoin crashes coincide with {'market-wide' if finding_type == 'SYSTEMIC' else 'asymmetric sector-specific'} correlation increases

IMPLICATION:
Static correlation assumptions may underestimate portfolio risk during
crypto-led liquidity stress events, potentially leading to VaR breaches
and unexpected drawdowns in "diversified" portfolios.

 FINAL ANALYSIS:
â€¢ High-Beta increase: {highbeta_increase_pct:+.1f}%
â€¢ Defensive increase: {defensive_increase_pct:+.1f}%
â€¢ Gap compression: {((gap_normal - gap_crisis)/gap_normal)*100:.1f}%
â€¢ Finding type: {finding_type}
""")

print("\n PROJECT COMPLETE!")