In [2]:
import os
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import yfinance as yf
from datetime import datetime, timedelta
import json
from pathlib import Path

sns.set(style="whitegrid")
plt.rcParams['figure.figsize'] = (12,6)

# -------------------------
# Folders
# -------------------------
DATA_DAILY = Path("data/daily")
HOT_SCORE_OUT = Path("data/hotscore")
DATA_TRADE_SUGGESTIONS = Path("data/recommendations")
PERIOD_OUT = Path("output/periods")
FINAL_OUT = Path("output/recommendations")
FINAL_OUT_APPEARANCES = Path("output/appearances")
FINAL_OUT_DAILY = Path("output/daily")

for p in (DATA_DAILY, HOT_SCORE_OUT, PERIOD_OUT, FINAL_OUT,FINAL_OUT_APPEARANCES, FINAL_OUT_DAILY):
    p.mkdir(parents=True, exist_ok=True)

# -------------------------
# Parameters
# -------------------------
MIN_DAYS_HISTORY = 30
PERSISTENT_DAYS = 180
STREAK_THRESHOLD = 3
TOP_N_PERSISTENT = 10
FORWARD_HORIZONS = [1, 5, 20]
TOP_SYMBOLS_PLOT = 60

# -------------------------
# Load daily hot stocks
# -------------------------
files = sorted(glob.glob(str(DATA_DAILY / "hot_stocks_*.csv")))
if not files:
    raise FileNotFoundError(f"No hot stocks files found in {DATA_DAILY}")

dfs = []

for i, f in enumerate(files):
    df = pd.read_csv(f)

    if 'symbol' not in df.columns and 'Symbol' in df.columns:
        df = df.rename(columns={'Symbol':'symbol'})
        
    dfs.append(df)

full_df = pd.concat(dfs, ignore_index=True, sort=False)
full_df.head()



Unnamed: 0,symbol,regularMarketPrice,regularMarketChangePercent,regularMarketVolume,averageDailyVolume3Month,marketCap,VolumeSpike,MomentumScore,VolumeScore,VolatilityScore,TrendScore,HotScore
0,DIOD,78.0,26.41815,1864168.0,354504.0,3622533000.0,5.258525,0.997602,0.971223,0.935252,0.906475,0.966787
1,GNRC,214.99,17.932,4280336.0,1053216.0,12616490000.0,4.064063,0.990408,0.94964,0.976019,0.904077,0.964628
2,THC,226.35,17.2555,3158135.0,926442.0,19998250000.0,3.408886,0.983213,0.930456,0.980815,0.911271,0.957074
3,VRT,248.51,24.4915,19750197.0,6201262.0,95014890000.0,3.184867,0.995204,0.920863,0.990408,0.769784,0.945683
4,TDC,37.88,29.5929,11363211.0,1424086.0,3579660000.0,7.979301,1.0,0.985612,0.877698,0.705036,0.941007


In [5]:

# Ensure numeric columns
cols_num = [
    'regularMarketPrice',
    'regularMarketChangePercent',
    'regularMarketVolume',
    'averageDailyVolume3Month',
    'marketCap',
    'HotScore',
    'VolumeSpike',
    'MomentumScore',
    'VolumeScore',
    'VolatilityScore',
    'TrendScore'
]

for c in cols_num:
    if c in full_df.columns:
        full_df[c] = pd.to_numeric(full_df[c], errors='coerce')


keep_cols = [
    'symbol',
    'HotScore',
    'TrendScore',
    'regularMarketPrice',
    'regularMarketChangePercent',
    'VolumeSpike',
    'averageDailyVolume3Month',
    'MomentumScore',
    'VolumeScore',
    'VolatilityScore',
    'marketCap'
]


for c in keep_cols:
    if c not in full_df.columns:
        full_df[c] = np.nan

full_df = full_df[keep_cols]
full_df['symbol'] = full_df['symbol'].astype(str).str.upper()
full_df = full_df.sort_values(['HotScore','symbol']).reset_index(drop=True)



# -------------------------
# Save HotScore snapshot
# -------------------------


timestamp = datetime.now().strftime("%Y%m%d")
hotscore_file = HOT_SCORE_OUT / f"hotscore_{timestamp}.csv"

if hotscore_file.exists():
    old_df = pd.read_csv(hotscore_file)
    full_df = pd.concat([old_df, full_df], ignore_index=True)

full_df.to_csv(hotscore_file, index=False)
print("Saved HotScore data:", hotscore_file)

# -------------------------
# Heatmap for top symbols
# -------------------------
top_symbols = full_df['symbol'].value_counts().head(TOP_SYMBOLS_PLOT).index.tolist()

pivot = (
    full_df
    .groupby('symbol')['HotScore']
    .mean()
    .sort_values(ascending=False)
    .head(TOP_SYMBOLS_PLOT)
    .to_frame()
)

pivot[:2]

Saved HotScore data: data\hotscore\hotscore_20260212.csv


Unnamed: 0_level_0,HotScore
symbol,Unnamed: 1_level_1
DIOD,0.966787
GNRC,0.964628


In [34]:

timestamp = datetime.now().strftime("%Y%m%d%H%M%S")

heatmap_file = PERIOD_OUT / f"60_periods_{timestamp}.png"
plt.figure(figsize=(14, max(6, 0.15*len(pivot))))
sns.heatmap(pivot, cmap='YlOrRd', linewidths=0.3, linecolor='gray', cbar_kws={'label':'HotScore'})
plt.title(f"HotScore heatmap â€” top {len(pivot)} symbols by appearances")
plt.xlabel("HotScore")
plt.ylabel("Symbol")
plt.tight_layout()
plt.savefig(heatmap_file, dpi=200)
plt.close()
print("Saved heatmap:", heatmap_file)


Saved heatmap: output\periods\60_periods_20260211112921.png


In [35]:
# Compute aggregated statistics per symbol
stats = full_df.groupby('symbol').agg(
    avg_HotScore = ('HotScore','mean'),
    median_HotScore = ('HotScore','median'),
    max_HotScore = ('HotScore','max'),
    avg_TrendScore = ('TrendScore','mean'),
    avg_MomentumScore = ('MomentumScore','mean'),
    avg_VolumeScore = ('VolumeScore','mean'),
    avg_VolatilityScore = ('VolatilityScore','mean'),
    avg_VolumeSpike = ('VolumeSpike','mean'),
    avg_regularMarketChangePercent = ('regularMarketChangePercent','mean'),
    avg_regularMarketPrice = ('regularMarketPrice','mean')
).sort_values('avg_HotScore', ascending=False)

# Select top N symbols based on avg_HotScore
persistent_top = stats.head(TOP_N_PERSISTENT)

# Candidates for technical checks and ranking
candidates = sorted(persistent_top.index.tolist())

print(f"Top {TOP_N_PERSISTENT} symbols selected:")
print(persistent_top)


Top 10 symbols selected:
        avg_HotScore  median_HotScore  max_HotScore  avg_TrendScore  \
symbol                                                                
SPOT        0.980376         0.980233      0.983960        0.906590   
UNF         0.976326         0.976220      0.985551        0.984383   
RACE        0.964932         0.966925      0.974260        0.963609   
DDOG        0.949595         0.951292      0.952941        0.693494   
MAR         0.943844         0.946699      0.952062        0.926752   
TPL         0.930919         0.938501      0.942665        0.979306   
HAS         0.923206         0.928049      0.933120        0.810294   
ENTG        0.922331         0.924289      0.926289        0.772822   
UCTT        0.921108         0.917115      0.945579        0.837490   
PHG         0.920269         0.925194      0.931579        0.775831   

        avg_MomentumScore  avg_VolumeScore  avg_VolatilityScore  \
symbol                                                 

In [36]:
# -------------------------
# Technical checks
# -------------------------

def technical_checks(symbol):
    try:
        hist = yf.download(symbol, period="1y", interval="1d", progress=False, auto_adjust=True)
        if hist.empty:
            return None
        close = hist['Close']
        price = float(close.iloc[-1])
        ma50 = float(close.rolling(50).mean().iloc[-1])
        return {'symbol': symbol, 'price': price, 'ma50': ma50}
    except:
        return None

tech_rows = []
for s in candidates:
    r = technical_checks(s)
    if r: 
        tech_rows.append(r)

tech_df = pd.DataFrame(tech_rows).set_index('symbol')

# -------------------------
# Build aggregated ranking (snapshot-free, no forward returns)
# -------------------------

agg_df = persistent_top.copy()
agg_df['appearances'] = 1  # placeholder for consistency
agg_df = agg_df.sort_values('avg_HotScore', ascending=False)

print("Aggregated ranking (snapshot-free):")
print(agg_df.head(TOP_N_PERSISTENT))

# -------------------------
# Ready for trade suggestions
# -------------------------

all_symbols = sorted(set(tech_df.index) | set(agg_df.index))
rec = pd.DataFrame(index=all_symbols)
rec = rec.join(tech_df, how='left')
rec = rec.join(agg_df, how='left')

# Fill missing numeric values
for col in ['price','ma50','avg_HotScore','appearances']:
    if col not in rec.columns:
        rec[col] = 0
    else:
        rec[col] = pd.to_numeric(rec[col], errors='coerce').fillna(0)

# Rule-based scoring
rec['rule_score'] = 0
rec['rule_score'] += (rec['price'] > rec['ma50']).astype(int) * 2
rec['rule_score'] += (rec['avg_HotScore'] > rec['avg_HotScore'].median()).astype(int) * 1
rec['rule_score'] += (rec['appearances'] >= 1).astype(int) * 1

# Final sorted recommendations
rec = rec.sort_values(['rule_score','avg_HotScore'], ascending=[False, False])

print("Top trade suggestions:")
print(rec.head(20))


  price = float(close.iloc[-1])
  ma50 = float(close.rolling(50).mean().iloc[-1])
  price = float(close.iloc[-1])
  ma50 = float(close.rolling(50).mean().iloc[-1])
  price = float(close.iloc[-1])
  ma50 = float(close.rolling(50).mean().iloc[-1])
  price = float(close.iloc[-1])
  ma50 = float(close.rolling(50).mean().iloc[-1])
  price = float(close.iloc[-1])
  ma50 = float(close.rolling(50).mean().iloc[-1])
  price = float(close.iloc[-1])
  ma50 = float(close.rolling(50).mean().iloc[-1])
  price = float(close.iloc[-1])
  ma50 = float(close.rolling(50).mean().iloc[-1])
  price = float(close.iloc[-1])
  ma50 = float(close.rolling(50).mean().iloc[-1])
  price = float(close.iloc[-1])
  ma50 = float(close.rolling(50).mean().iloc[-1])


Aggregated ranking (snapshot-free):
        avg_HotScore  median_HotScore  max_HotScore  avg_TrendScore  \
symbol                                                                
SPOT        0.980376         0.980233      0.983960        0.906590   
UNF         0.976326         0.976220      0.985551        0.984383   
RACE        0.964932         0.966925      0.974260        0.963609   
DDOG        0.949595         0.951292      0.952941        0.693494   
MAR         0.943844         0.946699      0.952062        0.926752   
TPL         0.930919         0.938501      0.942665        0.979306   
HAS         0.923206         0.928049      0.933120        0.810294   
ENTG        0.922331         0.924289      0.926289        0.772822   
UCTT        0.921108         0.917115      0.945579        0.837490   
PHG         0.920269         0.925194      0.931579        0.775831   

        avg_MomentumScore  avg_VolumeScore  avg_VolatilityScore  \
symbol                                      

  price = float(close.iloc[-1])
  ma50 = float(close.rolling(50).mean().iloc[-1])


In [37]:
# -------------------------
# Build trade suggestions (snapshot-free)
# -------------------------
def build_trade_suggestions(tech_df, agg_df, persistent_top, output_folder=FINAL_OUT):
    output_folder = Path(output_folder)
    output_folder.mkdir(parents=True, exist_ok=True)

    # All symbols to include
    all_symbols = sorted(set(tech_df.index) | set(agg_df.index) | set(persistent_top.index))
    rec = pd.DataFrame(index=all_symbols)

    # Join dataframes
    rec = rec.join(tech_df, how='left')
    rec = rec.join(agg_df, how='left')

    # Include persistent_top info if needed
    rec = rec.join(
        persistent_top[['avg_HotScore']].rename(columns={'avg_HotScore':'avg_HotScore_persistent'}),
        how='left'
    )

    # Ensure numeric columns exist
    for col in ['price','ma50','avg_HotScore','avg_HotScore_persistent']:
        if col not in rec.columns:
            rec[col] = 0
        else:
            rec[col] = pd.to_numeric(rec[col], errors='coerce').fillna(0)

    # Rule-based scoring
    rec['rule_score'] = 0
    rec['rule_score'] += (rec['price'] > rec['ma50']).astype(int) * 2
    rec['rule_score'] += (rec['avg_HotScore'] > rec['avg_HotScore'].median()).astype(int) * 1
    rec['rule_score'] += (rec['avg_HotScore_persistent'] > 0).astype(int) * 1  # use persistent top as 1 point

    # Sort recommendations
    rec = rec.sort_values(['rule_score','avg_HotScore'], ascending=[False, False])

    # Save CSV
    timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
    rec_file = output_folder / f"trade_suggestions_{timestamp}.csv"
    rec.to_csv(rec_file)

    return rec, rec_file


# Build
rec_df, rec_file = build_trade_suggestions(tech_df, agg_df, persistent_top, DATA_TRADE_SUGGESTIONS)
print("Trade suggestions saved:", rec_file)


Trade suggestions saved: data\recommendations\trade_suggestions_20260211112927.csv


In [38]:
appearance_chart_file = FINAL_OUT_APPEARANCES / f"appearances_{timestamp}.png"

plt.figure(figsize=(12,6))
plt.bar(agg_df.index, agg_df["appearances"])
plt.xticks(rotation=90)
plt.title("Top 20 appearances")
plt.ylabel("Appearances")
plt.tight_layout()
plt.savefig(appearance_chart_file)
plt.close()
