In [11]:
import numpy as np
import pandas as pd
from hmmlearn.hmm import GaussianHMM
import yfinance as yf
import matplotlib.pyplot as plt

def fetch_stock_data(ticker, start_date, end_date):
    data = yf.download(ticker, start=start_date, end=end_date)
    data['Returns'] = data['Close'].pct_change()
    data['Log Returns'] = np.log(1 + data['Returns'])
    return data.dropna()

def prepare_features(data):
    # Features: Log Returns and Volatility (rolling standard deviation)
    data['Volatility'] = data['Log Returns'].rolling(window=5).std()
    print("Columns after data preprocessing:", data.columns)  # Check columns here

    features = data[['Log Returns', 'Volatility']].dropna().values
    # Drop rows with NaN values in original data for alignment
    return data.dropna(subset=['Log Returns', 'Volatility']), features

def train_hmm(features, n_states):
    model = GaussianHMM(n_components=n_states, covariance_type='diag', n_iter=1000, random_state=42)
    model.fit(features)
    hidden_states = model.predict(features)
    return model, hidden_states

def plot_hidden_states(data, hidden_states, n_states):
    plt.figure(figsize=(15, 8))
    for state in range(n_states):
        state_data = data[hidden_states == state]
        plt.plot(state_data.index, state_data['Close'], '.', label=f"State {state}")
    plt.title("Stock Price by Hidden States")
    plt.xlabel("Date")
    plt.ylabel("Close Price")
    plt.legend()
    plt.show()

def simulate_trading(data, hidden_states):
    data['State'] = hidden_states
    data['Signal'] = data['State'].apply(lambda x: 1 if x == 0 else -1)  # Buy in state 0, sell in others
    data['Strategy Returns'] = data['Signal'].shift(1) * data['Returns']
    cumulative_strategy_returns = (1 + data['Strategy Returns'].dropna()).cumprod()
    cumulative_market_returns = (1 + data['Returns'].dropna()).cumprod()
    
    plt.figure(figsize=(15, 8))
    plt.plot(cumulative_strategy_returns, label="Strategy Returns")
    plt.plot(cumulative_market_returns, label="Market Returns")
    plt.legend()
    plt.title("Trading Strategy vs. Market")
    plt.show()


In [12]:

ticker = "AAPL"
start_date = "2015-01-01"
end_date = "2023-12-31"

# Fetch and preprocess data
stock_data = fetch_stock_data(ticker, start_date, end_date)
features = prepare_features(stock_data)

# Train HMM and predict hidden states
n_states = 2
hmm_model, hidden_states = train_hmm(features, n_states)

# Visualize hidden states
plot_hidden_states(stock_data, hidden_states, n_states)

# Simulate trading strategy
simulate_trading(stock_data, hidden_states)

[*********************100%***********************]  1 of 1 completed

Columns after data preprocessing: MultiIndex([(      'Close', 'AAPL'),
            (       'High', 'AAPL'),
            (        'Low', 'AAPL'),
            (       'Open', 'AAPL'),
            (     'Volume', 'AAPL'),
            (    'Returns',     ''),
            ('Log Returns',     ''),
            ( 'Volatility',     '')],
           names=['Price', 'Ticker'])





KeyError: ['Log Returns', 'Volatility']