In [16]:
# 1) Imports & config
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
from pathlib import Path
import json
import logging
import os
# Optional Firebase admin
import firebase_admin
from firebase_admin import credentials, storage

# Config - adjust paths if needed
SNAPSHOT_FOLDER = Path("output/runtime")     # Where 05 saves runtime_suggestions_*.csv
BUFFER_FILE     = SNAPSHOT_FOLDER / "hour_buffer.csv"  # rolling buffer file (local)
OUTPUT_FOLDER   = Path("output/hourly")              # where hourly outputs will be written locally
KEEP_MINUTES    = 60                            # rolling window in minutes
SNAPSHOT_PERIOD = 10                            # minutes (how frequently snapshots are taken)
TOP_N = 3                                       # number of top trending symbols to surface
TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")

# Logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")


In [17]:
def append_snapshot_to_buffer(snapshot_file, buffer_file=BUFFER_FILE):
    df_rec = snapshot_to_records(snapshot_file)
    if df_rec.empty:
        return 0
    
    if "symbol" not in df_rec.columns:
        logging.warning(f"No symbol column in {snapshot_file}, skipping")
        return 0
    
    if buffer_file.exists():
        buffer_df = pd.read_csv(buffer_file)
    else:
        buffer_df = pd.DataFrame()
    
    # append and dedupe by (snapshot_ts, symbol)
    buf = pd.concat([buffer_df, df_rec], ignore_index=True, sort=False)
    buf.drop_duplicates(subset=["snapshot_ts","symbol"], inplace=True)
    
    # keep only last KEEP_MINUTES minutes
    cutoff = datetime.now() - timedelta(minutes=KEEP_MINUTES)
    buf["snapshot_ts"] = pd.to_datetime(buf["snapshot_ts"], errors='coerce')
    buf = buf.loc[buf["snapshot_ts"] >= cutoff].copy()
    
    buf.to_csv(buffer_file, index=False)
    logging.info(f"Appended snapshot {snapshot_file.name} to buffer. Buffer rows: {len(buf)}")
    return len(df_rec)


In [18]:
files = read_latest_snapshot_files()
if not files:
    logging.warning("No snapshot files found in SNAPSHOT_FOLDER.")
else:
    added_rows = sum(append_snapshot_to_buffer(f, BUFFER_FILE) for f in files)
    logging.info(f"Total rows added from snapshots: {added_rows}")


2025-11-24 15:21:59,722 INFO Appended snapshot runtime_suggestions_20251121135205.csv to buffer. Buffer rows: 0


  buf = pd.concat([buffer_df, df_rec], ignore_index=True, sort=False)
2025-11-24 15:21:59,751 INFO Appended snapshot runtime_suggestions_20251124121050.csv to buffer. Buffer rows: 0
  buf = pd.concat([buffer_df, df_rec], ignore_index=True, sort=False)
2025-11-24 15:21:59,761 INFO Appended snapshot runtime_suggestions_20251124143901.csv to buffer. Buffer rows: 10
2025-11-24 15:21:59,788 INFO Appended snapshot runtime_suggestions_20251124143912.csv to buffer. Buffer rows: 20
2025-11-24 15:21:59,817 INFO Appended snapshot runtime_suggestions_20251124143929.csv to buffer. Buffer rows: 30
2025-11-24 15:21:59,840 INFO Appended snapshot runtime_suggestions_20251124143940.csv to buffer. Buffer rows: 40
2025-11-24 15:21:59,868 INFO Appended snapshot runtime_suggestions_20251124143948.csv to buffer. Buffer rows: 50
2025-11-24 15:21:59,869 INFO Total rows added from snapshots: 70


In [19]:
# 4) Read buffer and compute hourly trend scores
if not BUFFER_FILE.exists():
    raise FileNotFoundError(f"Buffer file not found: {BUFFER_FILE}. Run 05 first to create snapshots.")

# Read CSV buffer
buf = pd.read_csv(BUFFER_FILE)

# Ensure 'snapshot_ts' exists and is datetime
if "snapshot_ts" not in buf.columns:
    logging.warning("'snapshot_ts' column missing in buffer CSV, adding current timestamp")
    buf["snapshot_ts"] = datetime.now()
else:
    buf["snapshot_ts"] = pd.to_datetime(buf["snapshot_ts"], errors='coerce')

if buf.empty:
    raise RuntimeError("Buffer is empty after pruning - nothing to compute.")

# normalize column presence
for col in ["HotScore_today","RuntimeScore","regularMarketPrice","regularMarketVolume","ma50"]:
    if col not in buf.columns:
        buf[col] = np.nan

# compute per-symbol stats over the rolling window
grouped = buf.groupby("symbol")
trend_rows = []
for symbol, g in grouped:
    g = g.sort_values("snapshot_ts")
    appearances = g["snapshot_ts"].nunique()
    first_hot = g["HotScore_today"].iloc[0] if not g["HotScore_today"].isna().all() else 0
    last_hot = g["HotScore_today"].iloc[-1] if not g["HotScore_today"].isna().all() else 0
    trend_accel = last_hot - first_hot
    mean_runtime = g["RuntimeScore"].mean(skipna=True)
    last_price = g["regularMarketPrice"].iloc[-1] if not g["regularMarketPrice"].isna().all() else np.nan
    last_vol = g["regularMarketVolume"].iloc[-1] if not g["regularMarketVolume"].isna().all() else np.nan
    mean_vol = g["regularMarketVolume"].mean(skipna=True)
    rvol = (last_vol / mean_vol) if mean_vol and not np.isnan(mean_vol) else 0
    ma50 = g["ma50"].iloc[-1] if "ma50" in g.columns and not g["ma50"].isna().all() else np.nan
    is_above_ma50 = 1 if (not np.isnan(last_price) and not np.isnan(ma50) and last_price > ma50) else 0
    top_counts = (g["RuntimeScore"] >= g["RuntimeScore"].quantile(0.9)).sum() if "RuntimeScore" in g.columns else 0
    rank_score = top_counts
    trend_rows.append({
        "symbol": symbol,
        "appearances": appearances,
        "first_hot": first_hot,
        "last_hot": last_hot,
        "trend_accel": float(trend_accel),
        "mean_runtime": float(mean_runtime) if not np.isnan(mean_runtime) else 0,
        "last_price": float(last_price) if not np.isnan(last_price) else np.nan,
        "rvol": float(rvol),
        "is_above_ma50": int(is_above_ma50),
        "rank_score": int(rank_score)
    })

trend_df = pd.DataFrame(trend_rows).set_index("symbol")

# compute final trend_score with tunable weights
trend_df = trend_df.fillna(0)
trend_df["trend_score"] = (
    2.0 * trend_df["appearances"] + 
    3.0 * trend_df["trend_accel"] +
    1.5 * trend_df["rvol"] +
    2.0 * trend_df["is_above_ma50"] +
    2.0 * trend_df["rank_score"]
)

trend_df = trend_df.sort_values("trend_score", ascending=False)
trend_df.head(10)


Unnamed: 0_level_0,appearances,first_hot,last_hot,trend_accel,mean_runtime,last_price,rvol,is_above_ma50,rank_score,trend_score
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
ADPT,5,0.868842,0.868842,0.0,0.959882,18.61,1.0,0,5,21.5
BLTE,5,0.977053,0.977053,0.0,1.01138,120.56,1.0,0,5,21.5
GRAL,5,0.921789,0.921789,0.0,1.011066,93.55,1.0,0,5,21.5
HTFL,5,0.889474,0.889474,0.0,0.969438,30.85,1.0,0,5,21.5
LITE,5,0.935474,0.935474,0.0,0.997892,255.59,1.0,0,5,21.5
MOG-A,5,0.968632,0.968632,0.0,0.984316,214.77,1.0,0,5,21.5
OSIS,5,0.924842,0.924842,0.0,0.962421,251.1,1.0,0,5,21.5
PODD,5,0.898,0.898,0.0,0.979218,331.17,1.0,0,5,21.5
RH,5,0.857263,0.857263,0.0,0.969214,153.08,1.0,0,5,21.5
ROST,5,0.954105,0.954105,0.0,0.977053,174.0,1.0,0,5,21.5


In [20]:
# 5) Save outputs (HTML, PNG, JSON)
OUTPUT_FOLDER.mkdir(parents=True, exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d%H%M%S")

png_out = OUTPUT_FOLDER / f"hour_trend_{timestamp}.png"
html_out = OUTPUT_FOLDER / f"hour_trend_{timestamp}.html"
meta_out = OUTPUT_FOLDER / f"metadata_hour_trend_{timestamp}.json"

# Chart: top TOP_N symbols
topN = trend_df.head(TOP_N).reset_index()
plt.figure(figsize=(8,5))
sns.barplot(x="symbol", y="trend_score", data=topN, palette="viridis")
plt.title(f"Trend of the Hour — Top {TOP_N}")
plt.ylabel("Trend Score")
plt.tight_layout()
plt.savefig(png_out, dpi=200)
plt.close()
logging.info(f"Saved trend PNG: {png_out}")

# Build HTML report
html_parts = [
    f"<h1>Hourly Trend Report — {timestamp}</h1>",
    "<h2>Top Symbols Table</h2>",
    trend_df.head(TOP_N).to_html(index=True, classes='table table-sm', border=0),
    "<h2>Trend Score Chart</h2>",
    f'<img src="{png_out.name}" alt="Trend chart" style="max-width:100%;">'
]

with open(html_out, "w", encoding="utf-8") as fh:
    fh.write("<html><head><meta charset='utf-8'><title>Hourly Trend</title></head><body>")
    fh.write("\n".join(html_parts))
    fh.write("</body></html>")

logging.info(f"Saved HTML report: {html_out}")

# Metadata
meta = {
    "timestamp": timestamp,
    "top_symbols": topN["symbol"].tolist(),
    "counts": len(trend_df),
    "files": {
        "html": html_out.name,
        "png": png_out.name
    }
}

with open(meta_out, "w") as f:
    json.dump(meta, f, indent=2)
logging.info(f"Saved metadata JSON: {meta_out}")



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x="symbol", y="trend_score", data=topN, palette="viridis")
2025-11-24 15:22:14,276 INFO Saved trend PNG: output\hourly\hour_trend_20251124152214.png
2025-11-24 15:22:14,281 INFO Saved HTML report: output\hourly\hour_trend_20251124152214.html
2025-11-24 15:22:14,285 INFO Saved metadata JSON: output\hourly\metadata_hour_trend_20251124152214.json
