In [3]:
import pandas as pd
import numpy as np
import os
import glob
import re
import plotly.express as px
import plotly.graph_objects as go
import plotly.subplots as sp

from lightgbm import LGBMRanker
from pathlib import Path   
from catboost import CatBoostRegressor, Pool

pd.set_option("display.max_columns", 100)

In [4]:
DATA_DAILY = Path("data/daily")
DATA_HOT_SCORE = Path("data/hotscore")
OUTPUT_DIR = Path("output/trend")

for p in (DATA_DAILY, DATA_HOT_SCORE, OUTPUT_DIR):
    p.mkdir(parents=True, exist_ok=True)

In [5]:
def latest_file_in_directory(directory=DATA_HOT_SCORE):
    latest_file = max(
        f for f in os.listdir(directory)
        if f.startswith("hotscore_") and f.endswith(".csv")
    )
    return latest_file

In [6]:
latest_file = latest_file_in_directory(DATA_HOT_SCORE)
score_data = pd.read_csv(os.path.join(DATA_HOT_SCORE, latest_file))

display(score_data.head(4))

Unnamed: 0,symbol,date,HotScore,TrendScore,regularMarketPrice,regularMarketChangePercent,VolumeSpike,averageDailyVolume3Month,MomentumScore,VolumeScore,VolatilityScore,marketCap
0,AA,2025-11-26 20:46:26,0.794401,0.520833,41.845,6.74745,0.940394,6727448.0,0.903646,0.802083,0.726562,10836350000.0
1,AAUC,2025-11-26 20:46:26,0.846094,0.854167,19.18,9.788214,1.052893,342331.0,0.96875,0.848958,0.622396,2378320000.0
2,ALAB,2025-11-26 20:46:26,0.773307,0.723958,156.16,7.860205,0.638383,6266829.0,0.942708,0.518229,0.947917,26375990000.0
3,ANF,2025-11-26 20:46:26,0.905599,0.828125,96.205,6.610155,1.88349,2070173.0,0.898438,0.958333,0.864583,4583525000.0


In [8]:
features = [
    "regularMarketPrice",
    "regularMarketChangePercent",
    "averageDailyVolume3Month",
    "marketCap",
    "VolumeSpike",
    "MomentumScore",
    "VolumeScore",
    "VolatilityScore",
    "TrendScore"
]

# Select features + target together
data = score_data[features + ["HotScore"]]

# Replace inf values
data = data.replace([np.inf, -np.inf], np.nan)

# Drop rows where HotScore is NaN
data = data.dropna(subset=["HotScore"])

# Fill remaining NaNs in features
data[features] = data[features].fillna(0)

X = data[features]
y = data["HotScore"]


In [9]:
cat_model = CatBoostRegressor(
    iterations=500,
    learning_rate=0.05,
    depth=6,
    eval_metric="RMSE",
    random_seed=42,
    verbose=50
)

cat_model.fit(X, y)

0:	learn: 0.1103573	total: 155ms	remaining: 1m 17s
50:	learn: 0.0189911	total: 441ms	remaining: 3.88s
100:	learn: 0.0079346	total: 705ms	remaining: 2.78s
150:	learn: 0.0057407	total: 947ms	remaining: 2.19s
200:	learn: 0.0046451	total: 1.31s	remaining: 1.94s
250:	learn: 0.0039617	total: 1.65s	remaining: 1.63s
300:	learn: 0.0035286	total: 1.91s	remaining: 1.26s
350:	learn: 0.0032372	total: 2.31s	remaining: 981ms
400:	learn: 0.0030142	total: 2.89s	remaining: 715ms
450:	learn: 0.0028471	total: 3.63s	remaining: 394ms
499:	learn: 0.0027125	total: 4.31s	remaining: 0us


<catboost.core.CatBoostRegressor at 0x20d437b3440>

In [10]:
feature_importance = pd.Series(
    cat_model.get_feature_importance(),
    index=features
).sort_values(ascending=False)
feature_importance

VolumeScore                   34.209188
MomentumScore                 28.656556
VolatilityScore               19.435663
TrendScore                     7.006070
regularMarketChangePercent     6.138482
VolumeSpike                    4.142197
regularMarketPrice             0.205899
averageDailyVolume3Month       0.203724
marketCap                      0.002221
dtype: float64

In [11]:
fig_pie = px.pie(
    names=feature_importance.index,
    values=feature_importance.values,
    title="Feature Importance Contribution (Normalized)"
)
fig_pie.update_traces(textinfo='percent+label', pull=[0.05]*len(feature_importance))
fig_pie.update_layout(template="plotly_dark")

chart_path = os.path.join(OUTPUT_DIR, f"catboost_feature_importance_pie.html")
fig_pie.write_html(chart_path, include_plotlyjs='cdn')

In [12]:
fig_line = go.Figure()
fig_line.add_trace(go.Scatter(
    x=feature_importance.index,
    y=feature_importance.values,
    mode='lines+markers',
    line=dict(width=2, color='lime'),
    marker=dict(size=8, color='cyan')
))
fig_line.update_layout(
    title="Feature Importance Trend",
    xaxis_title="Feature",
    yaxis_title="Importance",
    template="plotly_dark"
)

chart_path = os.path.join(OUTPUT_DIR, f"catboost_feature_importance_line.html")
fig_line.write_html(chart_path, include_plotlyjs='cdn')