In [None]:
!pip install hmmlearn

Collecting hmmlearn
  Downloading hmmlearn-0.3.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Downloading hmmlearn-0.3.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (165 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m166.0/166.0 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: hmmlearn
Successfully installed hmmlearn-0.3.3


In [None]:
import numpy as np
import pandas as pd
import yfinance as yf
from hmmlearn import hmm

# --- Configuration ---
# Tickers
TICKERS = ['SPY', '^VIX']
# Date Range (Adjust as needed)
START_DATE = '2015-01-01'
END_DATE = '2024-01-01'
# HMM Parameters
N_COMPONENTS = 3 # Number of hidden states (regimes)

# 1. Download Data
data = yf.download(TICKERS, start=START_DATE, end=END_DATE)['Close']
data.columns = ['SPY', 'VIX']

# 2. Feature Engineering
# SPY returns are key for direction/magnitude of movement
data['SPY_Returns'] = data['SPY'].pct_change() * 100

# VIX change is a good measure of increasing/decreasing fear/volatility
data['VIX_Change'] = data['VIX'].pct_change() * 100

# Drop the first row which contains NaN from the pct_change calculation
data.dropna(inplace=True)

# 3. Prepare Observation Data (X)
# We combine the two features: SPY Returns and VIX Change
# The HMM input (X) must be a 2D numpy array
X = data[['SPY_Returns', 'VIX_Change']].values

print(f"Data shape (Samples, Features): {X.shape}")

  data = yf.download(TICKERS, start=START_DATE, end=END_DATE)['Close']
[*********************100%***********************]  2 of 2 completed

Data shape (Samples, Features): (2263, 2)





In [None]:
# 4. Initialize and Train the HMM
# GaussianHMM is suitable for continuous data
model = hmm.GaussianHMM(
    n_components=N_COMPONENTS,
    covariance_type="full", # Allows covariance between SPY_Returns and VIX_Change
    n_iter=100
)

# Fit the model to the observation data
print("\nTraining HMM...")
model.fit(X)
print("Training complete.")

# 5. Predict the Hidden States
# Predict the most likely sequence of hidden states (regimes)
hidden_states = model.predict(X)

# Add the states back to the original DataFrame for analysis
data['Regime'] = hidden_states


Training HMM...
Training complete.


In [None]:
# 6. Analyze the Regimes
print("\n--- Regime Analysis (Mean of Features) ---")
regime_analysis = data.groupby('Regime')[['SPY_Returns', 'VIX_Change']].mean()
print(regime_analysis)

# 7. Directional Prediction Insight
# Identify the most "Bullish" and "Bearish" regimes
# Sort by SPY_Returns to find the order
sorted_regimes = regime_analysis.sort_values(by='SPY_Returns', ascending=False)

print("\n--- Directional Insight ---")
print(f"The most **Bullish Regime** (highest positive SPY Return) is Regime **{sorted_regimes.index[0]}**")
print(f"The most **Bearish Regime** (highest negative SPY Return) is Regime **{sorted_regimes.index[-1]}**")

# 8. Making a Forward Prediction (The core step)
# To predict the next day's regime, you use the HMM's transition matrix and the current state.
# Let's get the state of the *last* day in the dataset
current_regime = data['Regime'].iloc[-1]
transition_matrix = model.transmat_

# Get the transition probabilities from the current regime to all others
next_state_probs = transition_matrix[current_regime]

print("\n--- Next Day Regime Probability ---")
print(f"Current Regime (Today's Close): **Regime {current_regime}**")
print("Probability of transitioning to each regime tomorrow:")
for i in range(N_COMPONENTS):
    # Lookup the mean SPY return for this predicted state
    mean_return = regime_analysis.loc[i, 'SPY_Returns']
    print(f"  - Regime {i}: {next_state_probs[i]:.2f} (Avg SPY Return: {mean_return:.2f}%)")

# The regime with the highest probability is your prediction
predicted_next_regime = np.argmax(next_state_probs)
predicted_return = regime_analysis.loc[predicted_next_regime, 'SPY_Returns']

print(f"\n**Prediction:** The most likely regime for tomorrow is **Regime {predicted_next_regime}** with an expected average SPY return of **{predicted_return:.2f}%**.")


--- Regime Analysis (Mean of Features) ---
        SPY_Returns  VIX_Change
Regime                         
0          0.107331   -0.184165
1          0.062983   -0.368293
2         -1.013577   14.237379

--- Directional Insight ---
The most **Bullish Regime** (highest positive SPY Return) is Regime **0**
The most **Bearish Regime** (highest negative SPY Return) is Regime **2**

--- Next Day Regime Probability ---
Current Regime (Today's Close): **Regime 0**
Probability of transitioning to each regime tomorrow:
  - Regime 0: 0.97 (Avg SPY Return: 0.11%)
  - Regime 1: 0.00 (Avg SPY Return: 0.06%)
  - Regime 2: 0.03 (Avg SPY Return: -1.01%)

**Prediction:** The most likely regime for tomorrow is **Regime 0** with an expected average SPY return of **0.11%**.
