In [1]:
import os
from datetime import datetime

# Paths
NOTEBOOK_DIR = os.path.abspath(os.getcwd())
PROJECT_ROOT = os.path.abspath(os.path.join(NOTEBOOK_DIR, ".."))
REPORTS_DIR = os.path.join(PROJECT_ROOT, "reports")
os.makedirs(REPORTS_DIR, exist_ok=True)

def render_report_html(run_ts: str, window_hours: int, tickers: list, sections: dict) -> str:
    """
    sections: dict[ticker] = {
        "narrative": str,
        "themes": list of dicts: [{"label": str, "count": int, "bull": int, "bear": int, "neutral": int, "points": [str,str,str]}],
        "bull_case": str,
        "bear_case": str,
        "flags": [str, ...]
    }
    """
    # very simple inline styling (MVP)
    def esc(s):
        return (s or "").replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")

    ticker_blocks = []
    for t in tickers:
        d = sections.get(t, {})
        narrative = esc(d.get("narrative", ""))
        bull_case = esc(d.get("bull_case", ""))
        bear_case = esc(d.get("bear_case", ""))
        flags = d.get("flags", [])

        themes_html = ""
        for th in d.get("themes", []):
            points = th.get("points", [])
            points_html = "".join([f"<li>{esc(p)}</li>" for p in points])
            themes_html += f"""
            <div class="theme">
              <div class="theme-title">{esc(th.get("label","Theme"))}</div>
              <div class="meta">
                Comments: {th.get("count",0)} |
                Bull: {th.get("bull",0)} |
                Bear: {th.get("bear",0)} |
                Neutral: {th.get("neutral",0)}
              </div>
              <ul class="points">{points_html}</ul>
            </div>
            """

        flags_html = "".join([f"<span class='flag'>{esc(f)}</span>" for f in flags]) or "<span class='flag'>None</span>"

        ticker_blocks.append(f"""
        <section class="ticker">
          <h2>{esc(t)}</h2>
          <p class="narrative">{narrative}</p>

          <h3>Themes</h3>
          {themes_html}

          <div class="cases">
            <div class="case">
              <h3>Bull case</h3>
              <p>{bull_case}</p>
            </div>
            <div class="case">
              <h3>Bear case</h3>
              <p>{bear_case}</p>
            </div>
          </div>

          <h3>Noise / risk flags</h3>
          <div class="flags">{flags_html}</div>
        </section>
        """)

    ticker_blocks_html = "\n".join(ticker_blocks)

    html = f"""
    <!doctype html>
    <html>
      <head>
        <meta charset="utf-8" />
        <meta name="viewport" content="width=device-width, initial-scale=1" />
        <title>Retail Chatter Report</title>
        <style>
          body {{ font-family: Arial, sans-serif; margin: 24px; line-height: 1.45; }}
          .header {{ margin-bottom: 18px; }}
          .sub {{ color: #444; }}
          .ticker {{ border-top: 1px solid #ddd; padding-top: 18px; margin-top: 18px; }}
          .narrative {{ font-size: 1.05rem; }}
          .theme {{ border: 1px solid #eee; border-radius: 10px; padding: 12px; margin: 10px 0; }}
          .theme-title {{ font-weight: 700; margin-bottom: 4px; }}
          .meta {{ color: #555; font-size: 0.95rem; margin-bottom: 8px; }}
          .points {{ margin: 0; padding-left: 18px; }}
          .cases {{ display: flex; gap: 14px; flex-wrap: wrap; }}
          .case {{ border: 1px solid #eee; border-radius: 10px; padding: 12px; flex: 1; min-width: 260px; }}
          .flags {{ display: flex; gap: 8px; flex-wrap: wrap; }}
          .flag {{ display: inline-block; border: 1px solid #ddd; border-radius: 999px; padding: 4px 10px; background: #fafafa; }}
          .footer {{ margin-top: 22px; color: #666; font-size: 0.95rem; }}
        </style>
      </head>
      <body>
        <div class="header">
          <h1>Retail Chatter Report</h1>
          <div class="sub">Run time: {esc(run_ts)} | Window: rolling {window_hours} hours | Tickers: {", ".join([esc(t) for t in tickers])}</div>
        </div>

        {ticker_blocks_html}

        <div class="footer">
          <p><strong>Disclaimer:</strong> This report summarizes public Reddit discussions and may contain rumors, jokes, or misinformation. Not financial advice.</p>
        </div>
      </body>
    </html>
    """
    return html


In [2]:
import os
import sqlite3
import time
from datetime import datetime, timezone

NOTEBOOK_DIR = os.path.abspath(os.getcwd())
PROJECT_ROOT = os.path.abspath(os.path.join(NOTEBOOK_DIR, ".."))
DB_PATH = os.path.join(PROJECT_ROOT, "data", "retail_chatter.db")

def get_conn():
    conn = sqlite3.connect(DB_PATH)
    conn.execute("PRAGMA foreign_keys = ON;")
    return conn

print("DB:", DB_PATH)
print("Exists:", os.path.exists(DB_PATH))


DB: C:\Users\jacks\Downloads\retail-chatter-research\data\retail_chatter.db
Exists: True


In [3]:
def start_run(tickers, window_hours=48, notes=None):
    tickers = sorted(set([t.strip().upper() for t in tickers if t and t.strip()]))
    if not tickers:
        raise ValueError("No tickers provided.")

    run_ts = datetime.now(timezone.utc).isoformat()

    conn = get_conn()
    cur = conn.cursor()
    cur.execute(
        "INSERT INTO runs (run_ts_utc, window_hours, notes) VALUES (?, ?, ?)",
        (run_ts, int(window_hours), notes)
    )
    run_id = cur.execute("SELECT last_insert_rowid()").fetchone()[0]
    cur.executemany(
        "INSERT OR IGNORE INTO run_tickers (run_id, ticker) VALUES (?, ?)",
        [(run_id, t) for t in tickers]
    )
    conn.commit()
    conn.close()
    return run_id, run_ts, tickers

run_id, run_ts, tickers = start_run(["TSLA", "NVDA"], window_hours=48, notes="db report test")
print("run_id:", run_id)
print("tickers:", tickers)


run_id: 3
tickers: ['NVDA', 'TSLA']


In [4]:
def fetch_ticker_data(ticker, since_epoch):
    conn = get_conn()

    posts = conn.execute(
        """SELECT post_id, title, selftext, subreddit, score, num_comments, created_utc
           FROM posts
           WHERE ticker=? AND created_utc>=?
           ORDER BY created_utc DESC
           LIMIT 50""",
        (ticker, since_epoch)
    ).fetchall()

    comments = conn.execute(
        """SELECT comment_id, post_id, body, subreddit, score, created_utc, depth
           FROM comments
           WHERE ticker=? AND created_utc>=?
           ORDER BY created_utc DESC
           LIMIT 600""",
        (ticker, since_epoch)
    ).fetchall()

    conn.close()
    return posts, comments

window_hours = 48
since_epoch = int(time.time()) - window_hours * 3600
print("since_epoch:", since_epoch)


since_epoch: 1769467296


In [5]:
import re
from collections import Counter

WORD_RE = re.compile(r"[A-Za-z]{3,}")

def simple_narrative(ticker, posts, comments):
    post_count = len(posts)
    comment_count = len(comments)

    text_blob = " ".join(
        [(p[1] or "") + " " + (p[2] or "") for p in posts] +
        [(c[2] or "") for c in comments]
    )
    words = [w.lower() for w in WORD_RE.findall(text_blob)]
    top_words = [w for w, _ in Counter(words).most_common(8)]

    return (
        f"In the last {window_hours} hours, {ticker} has {post_count} captured posts and "
        f"{comment_count} captured comments in the local database. "
        f"Common keywords: {', '.join(top_words) if top_words else 'insufficient data'}."
    )

def build_sections(tickers, since_epoch):
    sections = {}
    for t in tickers:
        posts, comments = fetch_ticker_data(t, since_epoch)
        narrative = simple_narrative(t, posts, comments)

        themes = [{
            "label": "Top keywords (placeholder)",
            "count": len(comments),
            "bull": 0, "bear": 0, "neutral": 0,
            "points": ["Theme modeling comes in Step 12 (embeddings + clustering)."]
        }]

        flags = []
        if len(comments) < 20:
            flags.append("low data volume")
        if len(posts) == 0:
            flags.append("no posts captured")

        sections[t] = {
            "narrative": narrative,
            "themes": themes,
            "bull_case": "Placeholder (added in Step 12).",
            "bear_case": "Placeholder (added in Step 12).",
            "flags": flags or ["None"]
        }
    return sections

sections = build_sections(tickers, since_epoch)
sections


{'NVDA': {'narrative': 'In the last 48 hours, NVDA has 0 captured posts and 0 captured comments in the local database. Common keywords: insufficient data.',
  'themes': [{'label': 'Top keywords (placeholder)',
    'count': 0,
    'bull': 0,
    'bear': 0,
    'neutral': 0,
    'points': ['Theme modeling comes in Step 12 (embeddings + clustering).']}],
  'bull_case': 'Placeholder (added in Step 12).',
  'bear_case': 'Placeholder (added in Step 12).',
  'flags': ['low data volume', 'no posts captured']},
 'TSLA': {'narrative': 'In the last 48 hours, TSLA has 1 captured posts and 2 captured comments in the local database. Common keywords: fake, this, post, title, about, tsla, test, body.',
  'themes': [{'label': 'Top keywords (placeholder)',
    'count': 2,
    'bull': 0,
    'bear': 0,
    'neutral': 0,
    'points': ['Theme modeling comes in Step 12 (embeddings + clustering).']}],
  'bull_case': 'Placeholder (added in Step 12).',
  'bear_case': 'Placeholder (added in Step 12).',
  'flag

In [6]:
REPORTS_DIR = os.path.join(PROJECT_ROOT, "reports")
os.makedirs(REPORTS_DIR, exist_ok=True)

html = render_report_html(run_ts, window_hours, tickers, sections)

filename = f"retail_chatter_report_db_{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}.html"
out_path = os.path.join(REPORTS_DIR, filename)

with open(out_path, "w", encoding="utf-8") as f:
    f.write(html)

print("Wrote report to:", out_path)


Wrote report to: C:\Users\jacks\Downloads\retail-chatter-research\reports\retail_chatter_report_db_20260128_224158.html


  filename = f"retail_chatter_report_db_{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}.html"
