In [1]:
import numpy as np
import pandas as pd
from data import Stock
from analysis import Analysis
from hidden_markov_model import Hidden_Markov_Model

In [2]:
sa40 = Stock('JTOPI')
sa40.series_data = pd.read_csv('South_Africa_Top_40_Historical_Data.csv')
sa40.preprocess()

analysis = Analysis()
analysis.add_log_return(sa40)
sa40.series_data.dropna(inplace=True)

In [None]:
model = Hidden_Markov_Model(sa40.series_data['Log_Return'].to_numpy(), 4)
model.baum_welch_algorithm(0.001, 100)

Iteration 1 . . . Log-Likelihood = 3829.6423359948685
Iteration 2 . . . Log-Likelihood = 3829.833415894259
Iteration 3 . . . Log-Likelihood = 3833.085002321147
Iteration 4 . . . Log-Likelihood = 3859.128015648992
Iteration 5 . . . Log-Likelihood = 3907.61710031364
Iteration 6 . . . Log-Likelihood = 3934.924241010514
Iteration 7 . . . Log-Likelihood = 3945.978054219807


In [None]:
model.get_parameters()

In [None]:
mean_array, variance_array = model.get_mean_variance()

analysis.plot_distributions(mean_array, variance_array)

In [None]:
import matplotlib.pyplot as plt
import numpy as np

def plot_means_vs_variances(mean_array, variance_array):
    num_points = len(mean_array)
    colors = plt.cm.tab10(np.arange(num_points) % 10)  # Cycle through tab10 colors

    plt.figure(figsize=(8, 6))
    plt.scatter(mean_array, variance_array, c=colors, s=100, edgecolors='black')

    for i, (mu, var) in enumerate(zip(mean_array, variance_array)):
        plt.text(mu, var, f'State {i}', fontsize=9, ha='right', va='bottom')

    plt.title("Means vs Variances of Hidden States")
    plt.xlabel("Mean ($\\mu$)")
    plt.ylabel("Variance ($\\sigma^2$)")
    plt.grid(True)
    plt.tight_layout()
    plt.show()

plot_means_vs_variances(mean_array, variance_array)

In [None]:
state_sequence = model.viterbi_algorithm()

In [None]:
state_sequence

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.dates as mdates

def plot_price_with_states(stock, column, state_sequence):
    colours = ['red', 'green', 'yellow', 'blue']
    stock_data = stock.series_data[[column, 'Date']].copy()
    stock_data = stock_data.reset_index(drop=True)

    fig, ax = plt.subplots(figsize=(12, 6))
    ax.plot(stock_data['Date'], stock_data[column], label=column, color='black')

    legend_handles = {}

    current_state = state_sequence[0]
    start_idx = 0

    for t in range(1, len(state_sequence)):
        if state_sequence[t] != current_state or t == len(state_sequence) - 1:
            end_idx = t if state_sequence[t] != current_state else t + 1
            color = colours[current_state % len(colours)] # Use modulo to handle cases with more states than colors
            label = f"State {current_state}"
            if current_state not in legend_handles:
                ax.axvspan(stock_data['Date'][start_idx], stock_data['Date'][end_idx - 1],
                           color=color, alpha=0.2, label=label)
                legend_handles[current_state] = True
            else:
                ax.axvspan(stock_data['Date'][start_idx], stock_data['Date'][end_idx - 1],
                           color=color, alpha=0.2)
            current_state = state_sequence[t]
            start_idx = t

    color = colours[current_state % len(colours)]
    label = f"State {current_state}"
    ax.axvspan(stock_data['Date'][start_idx], stock_data['Date'][len(stock_data) - 1],
               color=color, alpha=0.2, label=label if current_state not in legend_handles else None)
    legend_handles[current_state] = True

    ax.set_title(f"{stock.ticker} {column} with Hidden States")
    ax.set_xlabel("Date")
    ax.set_ylabel(column)
    ax.legend()
    ax.grid(True)
    fig.tight_layout()
    plt.show()


In [None]:
plot_price_with_states(sa40, 'Price', state_sequence)