In [None]:
import os
import pandas as pd
import numpy as np
import re
from pathlib import Path  

import plotly.express as px
import plotly.graph_objects as go

from sklearn.preprocessing import StandardScaler

In [2]:
DATA_SUGGESTIONS = Path("data/suggestions") 
OUTPUT_DIR = Path("output/gaussian_hmm")

for p in (DATA_SUGGESTIONS, OUTPUT_DIR):
    p.mkdir(parents=True, exist_ok=True)

In [3]:
def latest_file_in_directory(directory=DATA_SUGGESTIONS):
    latest_file = max(
        f for f in os.listdir(directory)
        if f.startswith("trade_suggestions_") and f.endswith(".csv")
    )
    return latest_file

In [20]:
latest_file = latest_file_in_directory(DATA_SUGGESTIONS)
df = pd.read_csv(os.path.join(DATA_SUGGESTIONS, latest_file))
df.head()

Unnamed: 0,ticker,price,ma50,mean_ret_fwd_1d,mean_ret_fwd_5d,mean_ret_fwd_20d,appearances,appearances_persistent,avg_HotScore,rule_score
0,AAOI,45.23,31.8185,-0.311142,-10.595525,-5.102458,10,0.0,0.0,2
1,ALGM,34.549999,28.3769,1.319282,-0.56294,19.665489,7,0.0,0.0,2
2,ALKT,22.41,21.7686,-1.175693,-0.111159,-1.110581,4,0.0,0.0,2
3,ALM,12.11,8.2284,1.555871,9.710901,16.216566,12,0.0,0.0,4
4,APLS,23.07,22.9265,-0.166945,-3.795587,-2.074871,5,0.0,0.0,2


In [25]:
df[["mean_ret_fwd_1d", "price", "avg_HotScore"]].isna().sum()


mean_ret_fwd_1d    5
price              0
avg_HotScore       0
dtype: int64

In [26]:
feature_cols = ["mean_ret_fwd_1d", "price", "avg_HotScore"]

df_hmm = df.dropna(subset=feature_cols).copy()

X = df_hmm[feature_cols].values
X = StandardScaler().fit_transform(X)

In [27]:
from hmmlearn.hmm import GaussianHMM

hmm = GaussianHMM(
    n_components=3,
    covariance_type="full",
    n_iter=300,
    random_state=42
)

hmm.fit(X)
df_hmm["regime"] = hmm.predict(X)


In [28]:
df_hmm.head()

Unnamed: 0,ticker,price,ma50,mean_ret_fwd_1d,mean_ret_fwd_5d,mean_ret_fwd_20d,appearances,appearances_persistent,avg_HotScore,rule_score,regime
0,AAOI,45.23,31.8185,-0.311142,-10.595525,-5.102458,10,0.0,0.0,2,1
1,ALGM,34.549999,28.3769,1.319282,-0.56294,19.665489,7,0.0,0.0,2,0
2,ALKT,22.41,21.7686,-1.175693,-0.111159,-1.110581,4,0.0,0.0,2,1
3,ALM,12.11,8.2284,1.555871,9.710901,16.216566,12,0.0,0.0,4,0
4,APLS,23.07,22.9265,-0.166945,-3.795587,-2.074871,5,0.0,0.0,2,1


In [29]:
df_hmm.groupby("regime")[[
    "mean_ret_fwd_1d",
    "mean_ret_fwd_5d",
    "mean_ret_fwd_20d",
    "avg_HotScore",
    "appearances",
    "rule_score"
]].mean().round(3)


Unnamed: 0_level_0,mean_ret_fwd_1d,mean_ret_fwd_5d,mean_ret_fwd_20d,avg_HotScore,appearances,rule_score
regime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,1.213,1.654,10.631,0.0,7.833,2.889
1,0.302,0.966,14.144,0.0,8.381,3.238
2,1.279,6.977,30.744,0.788,25.3,5.6


In [32]:
fig = px.scatter(
    df_hmm,
    x="avg_HotScore",
    y="mean_ret_fwd_5d",
    color=df_hmm["regime"].astype(str),
    hover_name="ticker",
    size="appearances",
    title="Signal Regimes: HotScore vs 5D Forward Return"
)
fig.show()



In [38]:
TOP_N = 30

top = (
    df_hmm
    .sort_values("avg_HotScore", ascending=False)
    .head(TOP_N)
    .set_index("ticker")
)

heat_cols = [
    "mean_ret_fwd_1d",
    "mean_ret_fwd_5d",
    "mean_ret_fwd_20d",
    "avg_HotScore",
    "appearances"
]

Z = StandardScaler().fit_transform(top[heat_cols])


In [39]:
fig = go.Figure(
    data=go.Heatmap(
        z=Z,
        x=heat_cols,
        y=top.index.tolist(),
        colorscale="Blues",
        zmid=0,
        colorbar=dict(title="Standardized Value")
    )
)

fig.update_layout(
    title=f"Top {TOP_N} Tickers – Signal Heatmap",
    xaxis_title="Features",
    yaxis_title="Ticker",
    template="plotly_white",
    height=800
)

fig.show()
