In [3]:
import os
import pandas as pd
import numpy as np
import glob
import re
from pathlib import Path

from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import classification_report, roc_auc_score
from xgboost import XGBClassifier

In [4]:
DATA_DAILY = Path("data/daily")
HOT_SCORE_OUT = Path("data/hotscore")
OUTPUT_DIR = Path("output/classification")

for p in (DATA_DAILY, HOT_SCORE_OUT, OUTPUT_DIR):
    p.mkdir(parents=True, exist_ok=True)

In [5]:
def extract_timestamp(filename):
    ts = re.search(r'_(\d{14})', filename).group(1)
    return pd.to_datetime(ts, format='%Y%m%d%H%M%S')

In [None]:
hot_files = sorted(glob.glob(str(DATA_DAILY / "hot_stocks_*.csv")))

hot_dfs = []
for f in hot_files:
    df = pd.read_csv(f)
    df['snapshot_time'] = extract_timestamp(f)
    hot_dfs.append(df)

hot_data = pd.concat(hot_dfs, ignore_index=True)

print(hot_data.shape)

(40172, 13)


Unnamed: 0,symbol,regularMarketPrice,regularMarketChangePercent,regularMarketVolume,averageDailyVolume3Month,marketCap,VolumeSpike,MomentumScore,VolumeScore,VolatilityScore,TrendScore,HotScore,snapshot_time
0,URBN,77.07,12.823895,8497781.0,2098090.0,6913018000.0,4.050246,0.986979,0.994792,0.921875,0.804688,0.958464,2025-11-26 20:46:26
1,SYM,87.51,13.222928,5076274.0,2168859.0,51875260000.0,2.340527,0.989583,0.973958,0.940104,0.820312,0.957292,2025-11-26 20:46:26
2,ARWR,58.675,25.400724,5782060.0,2311350.0,8112262000.0,2.501594,1.0,0.976562,0.955729,0.734375,0.95638,2025-11-26 20:46:26
3,DUOL,188.24,7.100598,2265762.0,1926600.0,8701837000.0,1.176042,0.919271,0.880208,0.960938,0.901042,0.912109,2025-11-26 20:46:26
4,VERA,33.48,13.761466,2616403.0,1539957.0,2140312000.0,1.69901,0.994792,0.942708,0.807292,0.697917,0.909375,2025-11-26 20:46:26


In [None]:
score_files = sorted(glob.glob(str(HOT_SCORE_OUT / "hotscore_*.csv")))

score_dfs = []
for f in score_files:
    df = pd.read_csv(f)
    df['score_time'] = extract_timestamp(f)
    score_dfs.append(df)


score_data = pd.concat(score_dfs, ignore_index=True)


print(score_data.shape)

(12682868, 9)


Unnamed: 0,symbol,date,HotScore,regularMarketPrice,regularMarketChangePercent,VolumeSpike,averageDailyVolume3Month,marketCap,score_time
0,AA,2025-11-26 20:46:26,0.794401,41.845,6.74745,0.940394,6727448.0,10836350000.0,2025-11-26 20:46:38
1,AAUC,2025-11-26 20:46:26,0.846094,19.18,9.788214,1.052893,342331.0,2378320000.0,2025-11-26 20:46:38
2,ALAB,2025-11-26 20:46:26,0.773307,156.16,7.860205,0.638383,6266829.0,26375990000.0,2025-11-26 20:46:38
3,ANF,2025-11-26 20:46:26,0.905599,96.205,6.610155,1.88349,2070173.0,4583525000.0,2025-11-26 20:46:38
4,ARWR,2025-11-26 20:46:26,0.95638,58.675,25.400724,2.501594,2311350.0,8112262000.0,2025-11-26 20:46:38


In [8]:
hot_data = hot_data.sort_values(['snapshot_time', 'symbol']).reset_index(drop=True)
score_data = score_data.sort_values(['score_time', 'symbol']).reset_index(drop=True)

merged = pd.merge_asof(
    hot_data,
    score_data,
    by='symbol',
    left_on='snapshot_time',
    right_on='score_time',
    direction='forward'
)

merged.head()

Unnamed: 0,symbol,regularMarketPrice_x,regularMarketChangePercent_x,regularMarketVolume,averageDailyVolume3Month_x,marketCap_x,VolumeSpike_x,MomentumScore,VolumeScore,VolatilityScore,...,HotScore_x,snapshot_time,date,HotScore_y,regularMarketPrice_y,regularMarketChangePercent_y,VolumeSpike_y,averageDailyVolume3Month_y,marketCap_y,score_time
0,AA,41.845,6.74745,6326454.0,6727448.0,10836350000.0,0.940394,0.903646,0.802083,0.726562,...,0.794401,2025-11-26 20:46:26,2025-11-26 20:46:26,0.794401,41.845,6.74745,0.940394,6727448.0,10836350000.0,2025-11-26 20:46:38
1,AAUC,19.18,9.788214,360438.0,342331.0,2378320000.0,1.052893,0.96875,0.848958,0.622396,...,0.846094,2025-11-26 20:46:26,2025-11-26 20:46:26,0.846094,19.18,9.788214,1.052893,342331.0,2378320000.0,2025-11-26 20:46:38
2,ALAB,156.16,7.860205,4000639.0,6266829.0,26375990000.0,0.638383,0.942708,0.518229,0.947917,...,0.773307,2025-11-26 20:46:26,2025-11-26 20:46:26,0.773307,156.16,7.860205,0.638383,6266829.0,26375990000.0,2025-11-26 20:46:38
3,ANF,96.205,6.610155,3899150.0,2070173.0,4583525000.0,1.88349,0.898438,0.958333,0.864583,...,0.905599,2025-11-26 20:46:26,2025-11-26 20:46:26,0.905599,96.205,6.610155,1.88349,2070173.0,4583525000.0,2025-11-26 20:46:38
4,ARWR,58.675,25.400724,5782060.0,2311350.0,8112262000.0,2.501594,1.0,0.976562,0.955729,...,0.95638,2025-11-26 20:46:26,2025-11-26 20:46:26,0.95638,58.675,25.400724,2.501594,2311350.0,8112262000.0,2025-11-26 20:46:38


In [9]:
# 1. Rename snapshot feature columns (_x ‚Üí clean names)
merged = merged.rename(columns={
    'regularMarketPrice_x': 'regularMarketPrice',
    'regularMarketChangePercent_x': 'regularMarketChangePercent',
    'VolumeSpike_x': 'VolumeSpike',
    'averageDailyVolume3Month_x': 'averageDailyVolume3Month',
    'marketCap_x': 'marketCap',
    'HotScore_y': 'HotScore_future'
})

# 2. Drop leaky / duplicated columns
drop_cols = [
    'HotScore_x',
    'regularMarketPrice_y',
    'regularMarketChangePercent_y',
    'VolumeSpike_y',
    'averageDailyVolume3Month_y',
    'marketCap_y',
    'date'
]

merged = merged.drop(columns=[c for c in drop_cols if c in merged.columns])

# 3. Drop rows without a future label
merged = merged.dropna(subset=['HotScore_future'])
merged.head()

Unnamed: 0,symbol,regularMarketPrice,regularMarketChangePercent,regularMarketVolume,averageDailyVolume3Month,marketCap,VolumeSpike,MomentumScore,VolumeScore,VolatilityScore,TrendScore,snapshot_time,HotScore_future,score_time
0,AA,41.845,6.74745,6326454.0,6727448.0,10836350000.0,0.940394,0.903646,0.802083,0.726562,0.520833,2025-11-26 20:46:26,0.794401,2025-11-26 20:46:38
1,AAUC,19.18,9.788214,360438.0,342331.0,2378320000.0,1.052893,0.96875,0.848958,0.622396,0.854167,2025-11-26 20:46:26,0.846094,2025-11-26 20:46:38
2,ALAB,156.16,7.860205,4000639.0,6266829.0,26375990000.0,0.638383,0.942708,0.518229,0.947917,0.723958,2025-11-26 20:46:26,0.773307,2025-11-26 20:46:38
3,ANF,96.205,6.610155,3899150.0,2070173.0,4583525000.0,1.88349,0.898438,0.958333,0.864583,0.828125,2025-11-26 20:46:26,0.905599,2025-11-26 20:46:38
4,ARWR,58.675,25.400724,5782060.0,2311350.0,8112262000.0,2.501594,1.0,0.976562,0.955729,0.734375,2025-11-26 20:46:26,0.95638,2025-11-26 20:46:38


In [10]:
threshold = merged['HotScore_future'].quantile(0.90)
merged['target_hot'] = (merged['HotScore_future'] >= threshold).astype(int)

merged['target_hot'].value_counts(normalize=True)

features = [
    'regularMarketPrice',
    'regularMarketChangePercent',
    'VolumeSpike',
    'MomentumScore',
    'VolumeScore',
    'VolatilityScore',
    'TrendScore'
]

X = merged[features]
y = merged['target_hot']

In [11]:
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import classification_report, roc_auc_score
from xgboost import XGBClassifier
 
tscv = TimeSeriesSplit(n_splits=5)

model = XGBClassifier(
    n_estimators=300,
    max_depth=5,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    eval_metric='logloss',
    random_state=42
)

for fold, (train_idx, test_idx) in enumerate(tscv.split(X), 1):
    X_train, X_test = X.iloc[train_idx].copy(), X.iloc[test_idx].copy()
    y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

    # clean infinities / NaNs
    X_train.replace([np.inf, -np.inf], np.nan, inplace=True)
    X_test.replace([np.inf, -np.inf], np.nan, inplace=True)
    X_train.fillna(0, inplace=True)
    X_test.fillna(0, inplace=True)

    # optional log-transform for large features
    for col in ['marketCap', 'regularMarketVolume']:
        if col in X_train.columns:
            X_train[col] = np.log1p(X_train[col])
            X_test[col] = np.log1p(X_test[col])

    # train
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    probs = model.predict_proba(X_test)[:, 1]

    print(f"Fold {fold} Classification Report:")
    print(classification_report(y_test, preds))
    auc = roc_auc_score(y_test, probs)
    print(f"Fold {fold} ROC AUC: {auc:.4f}\n")


Fold 1 Classification Report:
              precision    recall  f1-score   support

           0       0.94      0.79      0.86      5977
           1       0.24      0.58      0.33       681

    accuracy                           0.76      6658
   macro avg       0.59      0.68      0.60      6658
weighted avg       0.87      0.76      0.80      6658

Fold 1 ROC AUC: 0.7635

Fold 2 Classification Report:
              precision    recall  f1-score   support

           0       0.90      0.95      0.93      5908
           1       0.36      0.21      0.27       750

    accuracy                           0.87      6658
   macro avg       0.63      0.58      0.60      6658
weighted avg       0.84      0.87      0.85      6658

Fold 2 ROC AUC: 0.7653

Fold 3 Classification Report:
              precision    recall  f1-score   support

           0       0.93      0.97      0.95      6177
           1       0.19      0.09      0.13       481

    accuracy                           0.91 

In [12]:
# 1Ô∏è‚É£ Compute hot probabilities
merged['hot_probability'] = model.predict_proba(X)[:, 1]

# 2Ô∏è‚É£ Optional: remove duplicate symbols per snapshot if needed
signals = (
    merged.sort_values(['snapshot_time', 'symbol', 'hot_probability'], ascending=[True, True, False])
    .drop_duplicates(subset=['snapshot_time', 'symbol'], keep='first')
)

# 3Ô∏è‚É£ Keep only relevant columns for inspection
all_signals = signals[['snapshot_time', 'symbol', 'hot_probability']]

# 4Ô∏è‚É£ Show all (or first 50 rows for sanity)
all_signals.head()

Unnamed: 0,snapshot_time,symbol,hot_probability
0,2025-11-26 20:46:26,AA,0.002915
1,2025-11-26 20:46:26,AAUC,0.00354
2,2025-11-26 20:46:26,ALAB,0.061764
3,2025-11-26 20:46:26,ANF,0.501452
4,2025-11-26 20:46:26,ARWR,0.973248


In [None]:
import plotly.graph_objects as go
import matplotlib.cm as cm
import matplotlib.colors as colors 
import plotly.express as px

In [None]:
all_signals['snapshot_time'] = pd.to_datetime(all_signals['snapshot_time'])

# Optional: pick top 50 symbols by max probability for readability
top_symbols = (
    all_signals.groupby('symbol')['hot_probability']
    .max()
    .sort_values(ascending=False)
    .head(50)
    .index
)

df_chart = all_signals[all_signals['symbol'].isin(top_symbols)]

heatmap_df = df_chart.pivot(index='symbol', columns='snapshot_time', values='hot_probability')

# Plot interactive heatmap
fig = px.imshow(
    heatmap_df,
    labels=dict(x="Snapshot Time", y="Symbol", color="Hot Probability"),
    aspect="auto",
    color_continuous_scale="YlOrRd",
    text_auto=True
)

fig.update_layout(
    title="AI Hot-Stock Probability Heatmap",
    xaxis_nticks=20,
    yaxis={'categoryorder':'total ascending'}
)


chart_path = os.path.join(OUTPUT_DIR, f"superchart-1.0.html")
fig.write_html(chart_path, include_plotlyjs='cdn')




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
all_signals['snapshot_time'] = pd.to_datetime(all_signals['snapshot_time'])

# Select top 20 symbols by max probability for clarity
top_symbols = (
    all_signals.groupby('symbol')['hot_probability']
    .max()
    .sort_values(ascending=False)
    .head(20)
    .index
)

df_chart = all_signals[all_signals['symbol'].isin(top_symbols)]

# Create figure
fig = go.Figure()

# Add a line for each symbol
for symbol in top_symbols:
    df_sym = df_chart[df_chart['symbol'] == symbol].sort_values('snapshot_time')
    fig.add_trace(go.Scatter(
        x=df_sym['snapshot_time'],
        y=df_sym['hot_probability'],
        mode='lines+markers',
        name=symbol,
        line=dict(width=2),
        marker=dict(size=6),
        hovertemplate='Symbol: %{text}<br>Time: %{x}<br>Prob: %{y:.3f}',
        text=[symbol]*len(df_sym)
    ))

# Highlight top-N per snapshot (e.g., top 3)
top_n = 3
top_per_snapshot = (
    df_chart.sort_values(['snapshot_time', 'hot_probability'], ascending=[True, False])
            .groupby('snapshot_time')
            .head(top_n)
)

fig.add_trace(go.Scatter(
    x=top_per_snapshot['snapshot_time'],
    y=top_per_snapshot['hot_probability'],
    mode='markers',
    marker=dict(size=10, color='red', symbol='star'),
    name='Top Hot',
    hovertemplate='Top Hot Symbol: %{text}<br>Time: %{x}<br>Prob: %{y:.3f}',
    text=top_per_snapshot['symbol']
))

# Layout
fig.update_layout(
    title="AI Hot-Stock Probabilities Over Time",
    xaxis_title="Snapshot Time",
    yaxis_title="Hot Probability",
    yaxis=dict(range=[0, 1]),
    hovermode='closest',
    legend_title="Symbols",
    template="plotly_dark",
    legend=dict(
        x=-0.15,
        y=1,
        xanchor="left",
        yanchor="top"
    )
)


chart_path = os.path.join(OUTPUT_DIR, f"superchart-2.0.html")
fig.write_html(chart_path, include_plotlyjs='cdn')



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
import plotly.express as px

df_bar = (
    all_signals
    .groupby('symbol', as_index=False)
    .agg(max_hot_probability=('hot_probability', 'max'))
    .sort_values('max_hot_probability', ascending=False)
    .head(20)
)

fig = px.bar(
    df_bar,
    x="max_hot_probability",
    y="symbol",
    orientation="h",
    color="max_hot_probability",
    color_continuous_scale="Turbo",
    title="üß† AI Hot Stocks ‚Äî Max Probability Ranking",
    template="plotly_dark"
)

fig.update_layout(
    xaxis=dict(range=[0, 1]),
    height=700,
    margin=dict(l=160, r=40, t=60, b=40),
    coloraxis_colorbar=dict(title="Hot Prob")
)

fig.write_html(
    os.path.join(OUTPUT_DIR, "superchart-3.0.html"),
    include_plotlyjs="cdn"
)


In [None]:
import plotly.express as px

all_signals['snapshot_time'] = pd.to_datetime(all_signals['snapshot_time'])

# Take ONE snapshot at a time (latest or selectable)
latest_snapshot = all_signals['snapshot_time'].max()

df_snapshot = (
    all_signals[all_signals['snapshot_time'] == latest_snapshot]
    .sort_values('hot_probability', ascending=False)
    .head(15)
)

fig = px.bar(
    df_snapshot,
    x='symbol',
    y='hot_probability',
    color='hot_probability',
    color_continuous_scale='Turbo',
    title=f"üß† AI Hot Stocks ‚Äî Snapshot {latest_snapshot}",
    template='plotly_dark'
)

fig.update_layout(
    yaxis=dict(range=[0,1]),
    xaxis_title="Symbol",
    yaxis_title="Hot Probability",
    height=700
)

fig.write_html(
    os.path.join(OUTPUT_DIR, "superchart-4.0.html"),
    include_plotlyjs="cdn"
)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [None]:
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
import os

df_chart = all_signals.copy()
df_chart['snapshot_time'] = pd.to_datetime(df_chart['snapshot_time'])

# Top 15 symbols by peak probability
top_symbols = (
    df_chart.groupby('symbol')['hot_probability']
    .max()
    .sort_values(ascending=False)
    .head(15)
    .index
)

df_chart = df_chart[df_chart['symbol'].isin(top_symbols)]

# Smooth probabilities
df_chart['hot_probability_smooth'] = (
    df_chart.groupby('symbol')['hot_probability']
    .transform(lambda x: x.rolling(3, min_periods=1).mean())
)

# Pivot for stream chart
pivot_df = (
    df_chart.pivot(
        index='snapshot_time',
        columns='symbol',
        values='hot_probability_smooth'
    )
    .fillna(0)
    .sort_index()
)

pivot_df = pivot_df[top_symbols]

# Plotly-native color palette (modern)
colors = px.colors.qualitative.Bold
color_map = {sym: colors[i % len(colors)] for i, sym in enumerate(top_symbols)}

fig = go.Figure()

# Stream layers
for sym in top_symbols:
    fig.add_trace(go.Scatter(
        x=pivot_df.index,
        y=pivot_df[sym],
        stackgroup='one',
        mode='lines',
        name=sym,
        line=dict(
            width=1.2,
            color=color_map[sym]
        ),
        fillcolor=color_map[sym],
        opacity=0.55,
        hovertemplate=(
            "<b>%{fullData.name}</b><br>"
            "Time: %{x}<br>"
            "Hot Prob: %{y:.3f}<extra></extra>"
        )
    ))

# Highlight top-3 per snapshot
top_n = 3
top_per_snapshot = (
    df_chart.sort_values(
        ['snapshot_time', 'hot_probability_smooth'],
        ascending=[True, False]
    )
    .groupby('snapshot_time')
    .head(top_n)
)

fig.add_trace(go.Scatter(
    x=top_per_snapshot['snapshot_time'],
    y=top_per_snapshot['hot_probability_smooth'],
    mode='markers',
    name='üî• Top AI Picks',
    marker=dict(
        size=12,
        color='gold',
        symbol='star',
        line=dict(width=1.5, color='#ff9900')
    ),
    hovertemplate=(
        "<b>üî• %{text}</b><br>"
        "Time: %{x}<br>"
        "Prob: %{y:.3f}<extra></extra>"
    ),
    text=top_per_snapshot['symbol']
))

# Layout ‚Äî MODERN
fig.update_layout(
    title="üß† AI Market Streamflow",
    template="plotly_dark",
    hovermode="x unified",
    yaxis=dict(
        title="Relative Hot Probability",
        showgrid=False,
        zeroline=False
    ),
    xaxis=dict(
        title="Snapshot Time",
        showgrid=False
    ),
    legend=dict(
        orientation="h",
        y=-0.2,
        x=0,
        font=dict(size=11)
    ),
    margin=dict(l=40, r=40, t=80, b=80),
    height=720
)

chart_path = os.path.join(OUTPUT_DIR, "superchart-5.0.html")
fig.write_html(chart_path, include_plotlyjs="cdn")
fig.show()


In [None]:
import plotly.graph_objects as go

df = all_signals.copy()
df['snapshot_time'] = pd.to_datetime(df['snapshot_time'])

# Select top 15 symbols by max probability
top_symbols = (
    df.groupby('symbol')['hot_probability']
      .max()
      .sort_values(ascending=False)
      .head(15)
      .index.tolist()
)
df = df[df['symbol'].isin(top_symbols)]

# Smooth probabilities
df['hot_smooth'] = df.groupby('symbol')['hot_probability'] \
                     .transform(lambda x: x.rolling(3, min_periods=1).mean())

# Map symbols to y-axis lanes
symbol_to_y = {sym: i for i, sym in enumerate(top_symbols)}
df['y_lane'] = df['symbol'].map(symbol_to_y)

# Create figure
fig = go.Figure()

# Plot ribbon markers + subtle connecting line per symbol
for sym in top_symbols:
    df_sym = df[df['symbol'] == sym].sort_values('snapshot_time')
    fig.add_trace(go.Scatter(
        x=df_sym['snapshot_time'],
        y=[symbol_to_y[sym]]*len(df_sym),  # lane
        mode='markers+lines',
        marker=dict(
            size=14,
            color=df_sym['hot_smooth'],       # numeric array
            colorscale='Turbo',
            cmin=0,
            cmax=1,
            showscale=(sym == top_symbols[-1]),  # only show one colorbar
            colorbar=dict(title="Hot Probability", thickness=12)
        ),
        line=dict(width=2, color='rgba(255,255,255,0.1)'),  # subtle line
        text=[sym]*len(df_sym),
        customdata=df_sym['hot_smooth'],
        hovertemplate='Symbol: %{text}<br>Time: %{x}<br>Prob: %{customdata:.3f}',
        name=sym,
        showlegend=False
    ))

# Highlight top 3 hot per snapshot
top_n = 3
top_hits = (
    df.sort_values(['snapshot_time','hot_smooth'], ascending=[True,False])
      .groupby('snapshot_time')
      .head(top_n)
)

fig.add_trace(go.Scatter(
    x=top_hits['snapshot_time'],
    y=top_hits['y_lane'],
    mode='markers',
    marker=dict(
        size=18,
        color='rgba(255,215,0,0.9)',
        line=dict(width=2, color='orange'),
        symbol='star'
    ),
    text=top_hits['symbol'],
    customdata=top_hits['hot_smooth'],
    hovertemplate='üî• Top Hot<br>Symbol: %{text}<br>Prob: %{customdata:.3f}',
    name='Top Hot'
))

# Layout
fig.update_layout(
    title="AI Probability Ribbon",
    xaxis_title="Snapshot Time",
    yaxis=dict(
        title="Symbols",
        tickmode='array',
        tickvals=list(symbol_to_y.values()),
        ticktext=list(symbol_to_y.keys()),
        automargin=True
    ),
    yaxis_range=[-1, len(top_symbols)],
    hovermode='closest',
    template='plotly_dark',
    margin=dict(l=180, r=60, t=60, b=40)
)

chart_path = os.path.join(OUTPUT_DIR, f"superchart-6.0.html")
fig.write_html(chart_path, include_plotlyjs='cdn')

In [20]:
importance = pd.Series(
model.feature_importances_,
index=features
).sort_values(ascending=False)

importance

VolatilityScore               0.177117
regularMarketPrice            0.165249
MomentumScore                 0.151318
VolumeScore                   0.137674
VolumeSpike                   0.135004
TrendScore                    0.126643
regularMarketChangePercent    0.106995
dtype: float32

In [None]:
import plotly.graph_objects as go

fig = go.Figure()

fig.add_trace(go.Bar(
    x=importance.values,
    y=importance.index,
    orientation='h',
    marker=dict(
        color=importance.values,
        colorscale='Viridis'
    ),
    hovertemplate=
        "<b>Feature:</b> %{y}<br>" +
        "<b>Importance:</b> %{x:.4f}<extra></extra>"
))

fig.update_layout(
    title="üß† AI Feature Importance ‚Äî What the Model Cares About",
    xaxis_title="Importance",
    yaxis_title="Feature",
    template="plotly_dark",
    height=600,
    margin=dict(l=180, r=40, t=60, b=40)
)

chart_path = os.path.join(OUTPUT_DIR, f"ai_feature-1.0.html")
fig.write_html(chart_path, include_plotlyjs='cdn')

In [None]:
from sklearn.metrics import confusion_matrix
import numpy as np

cm = confusion_matrix(y_test, preds)
labels = ["Not Hot", "Hot"]

fig = go.Figure(data=go.Heatmap(
    z=cm,
    x=labels,
    y=labels,
    colorscale="Blues",
    hovertemplate=
        "Predicted: %{x}<br>" +
        "Actual: %{y}<br>" +
        "Count: %{z}<extra></extra>"
))

fig.update_layout(
    title="ü§ñ Confusion Matrix ‚Äî Model Decisions",
    xaxis_title="Predicted Label",
    yaxis_title="Actual Label",
    template="plotly_dark",
    height=500
)

chart_path = os.path.join(OUTPUT_DIR, f"ai_confusion_matrix-1.0.html")
fig.write_html(chart_path, include_plotlyjs='cdn')

In [None]:
fig = go.Figure()

fig.add_trace(go.Histogram(
    x=merged['hot_probability'],
    nbinsx=40,
    marker=dict(color="#00f2ff"),
    hovertemplate=
        "Probability Bin: %{x}<br>" +
        "Count: %{y}<extra></extra>"
))

fig.update_layout(
    title="üìä AI Confidence Distribution ‚Äî Hot Probability",
    xaxis_title="Predicted Hot Probability",
    yaxis_title="Count",
    template="plotly_dark",
    height=500
)

chart_path = os.path.join(OUTPUT_DIR, f"ai_probability_distribution-1.0.html")
fig.write_html(chart_path, include_plotlyjs='cdn')