# 10Q Sentiment & DCF Analysis
This notebook analyzes the DCF of a 10Q as well as the sentiment of the writings within the report for a given ticker and predicts its future price movement.

### Imports & Config

In [None]:
import os, re, sys, time, math, json, warnings, requests
from datetime import datetime, timedelta, timezone, date
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup
from dotenv import load_dotenv
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TextClassificationPipeline

import logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s | %(levelname)s | %(message)s",
    datefmt="%H:%M:%S"
)
log = logging.getLogger("10Q-DCF")

warnings.filterwarnings("ignore")
load_dotenv()

SEC_EMAIL = os.getenv("SEC_EMAIL")
POLYGON_API_KEY = os.getenv("POLYGON_API_KEY")

TRADIER_ACCESS_TOKEN = os.getenv("TRADIER_ACCESS_TOKEN")
TRADIER_ACCOUNT_ID = os.getenv("TRADIER_ACCOUNT_ID")
TRADIER_BASE = os.getenv("TRADIER_BASE")

def _tradier_headers():
    if not TRADIER_ACCESS_TOKEN:
        raise RuntimeError("Missing TRADIER_ACCESS_TOKEN in env.")
    return {
        "Authorization": f"Bearer {TRADIER_ACCESS_TOKEN}",
        "Accept": "application/json",
    }

FINBERT_ID = "yiyanghkust/finbert-tone"
SECTION_PATTERNS = [
    (r"item\s+2\.\s*management[’']?s discussion and analysis.*?(?=item\s+3\.)", "MD&A"),
    (r"item\s+1a\.\s*risk factors.*?(?=item\s+2\.)", "RiskFactors"),
    (r"results of operations.*?(?=liquidity|capital resources|item\s+\d)", "Results"),
]
POS_PHRASES = [r"strong demand", r"margin expansion", r"raised guidance", r"record (revenue|earnings)", r"cost (reductions|optimization)", r"share repurchase", r"cash flow (improved|growth)"]
NEG_PHRASES = [r"decline in (sales|revenue)", r"margin compression", r"impairment charge", r"supply chain disruption", r"adversely affected", r"weaker demand", r"material weakness"]

def SEC_HEADERS():
    return {
        "User-Agent": f"Severin Spagnola (contact: {SEC_EMAIL})",
        "Accept-Encoding": "gzip, deflate",
    }

def cap_bucket(mc):
    if mc is None or not np.isfinite(mc): return "unknown"
    mc_b = mc / 1e9
    if mc_b < 0.3:  return "micro"
    if mc_b < 2:    return "small"
    if mc_b < 10:   return "mid"
    if mc_b < 200:  return "large"
    return "mega"

BACKTEST_CSV = "10q_sentiment_event_returns.csv"
BASELINES_JSON = "sentiment_baselines.json"


### SEC + HTML Helpers

In [15]:
def get_cik(ticker: str) -> str:
    url = "https://www.sec.gov/files/company_tickers.json"
    js = requests.get(url, headers=SEC_HEADERS(), timeout=30).json()
    t = ticker.upper()
    for _, rec in js.items():
        if rec.get("ticker","").upper() == t:
            return str(rec["cik_str"]).zfill(10)
    raise ValueError(f"CIK not found for {ticker}")

def list_10q_with_dates(cik: str, max_n=8):
    url = f"https://data.sec.gov/submissions/CIK{cik}.json"
    r = requests.get(url, headers=SEC_HEADERS(), timeout=30); r.raise_for_status()
    rec = r.json().get("filings",{}).get("recent",{})
    out = []
    for form, acc, prim, fdate in zip(rec.get("form",[]), rec.get("accessionNumber",[]), rec.get("primaryDocument",[]), rec.get("filingDate",[])):
        if form == "10-Q":
            out.append({"accession": acc.replace("-",""), "primary": prim, "filing_date": fdate})
        if len(out) >= max_n: break
    return out

def fetch_filing_html(cik:str, accession:str, primary:str) -> str:
    base = f"https://www.sec.gov/Archives/edgar/data/{int(cik)}/{accession}"
    url  = f"{base}/{primary}"
    r = requests.get(url, headers=SEC_HEADERS(), timeout=60); r.raise_for_status()
    return r.text

def _make_soup(html: str) -> BeautifulSoup:
    for parser in ("lxml", "html5lib", "html.parser"):
        try:
            return BeautifulSoup(html, parser)
        except Exception:
            pass
    return BeautifulSoup(html, "html.parser")

def _lower_clean(txt: str) -> str:
    return re.sub(r"[ \t]+"," ", txt.lower())

def extract_sections(html: str, patterns=SECTION_PATTERNS, fallback_full=True, cap=60000) -> dict:
    soup = _make_soup(html)
    txt  = soup.get_text("\n", strip=True)
    low  = _lower_clean(txt)
    out = {}
    for pat, name in patterns:
        m = re.search(pat, low, flags=re.S)
        if m: out[name] = low[m.start():m.end()][:cap]
    if not out and fallback_full: out["FullDocument"] = low[:cap]
    return out

### FinBERT Loader + Long-Text Analysis

In [16]:
def load_finbert():
    tok = AutoTokenizer.from_pretrained(FINBERT_ID)
    mdl = AutoModelForSequenceClassification.from_pretrained(FINBERT_ID)
    pipe = TextClassificationPipeline(model=mdl, tokenizer=tok, top_k=None, truncation=True)
    return pipe, tok

def _token_chunks(text: str, tokenizer, max_tokens=512, stride=32):
    ids = tokenizer.encode(text, add_special_tokens=False)
    step = max_tokens - stride
    for i in range(0, len(ids), step):
        window = ids[i:i+max_tokens]
        if not window: break
        yield tokenizer.decode(window, skip_special_tokens=True)

def finbert_sent_long(text: str, pipe, tokenizer, max_tokens=512, batch=16):
    if len(text) < 4000:
        rows = pipe([text], truncation=True, max_length=max_tokens)
    else:
        chunks = list(_token_chunks(text, tokenizer, max_tokens=max_tokens))
        rows = []
        for i in range(0, len(chunks), batch):
            rows.extend(pipe(chunks[i:i+batch], truncation=True, max_length=max_tokens))
    pos = neu = neg = 0.0
    for r in rows:
        d = {x["label"].lower(): x["score"] for x in r}
        pos += d.get("positive",0.0); neu += d.get("neutral",0.0); neg += d.get("negative",0.0)
    n = max(1, len(rows))
    return {"pos":pos/n, "neu":neu/n, "neg":neg/n, "sent_score":pos/n - neg/n}

def phrase_boost(text: str, pos_list=POS_PHRASES, neg_list=NEG_PHRASES, w=0.1) -> float:
    boost = 0.0
    for p in pos_list:
        if re.search(p, text, flags=re.I): boost += w
    for n in neg_list:
        if re.search(n, text, flags=re.I): boost -= w
    return boost

def score_sections(sections: dict, pipe_tok=None) -> dict:
    pipe, tok = pipe_tok if pipe_tok else load_finbert()
    feats = {}
    for name, text in sections.items():
        fb = finbert_sent_long(text, pipe, tok, max_tokens=512, batch=16)
        boost = phrase_boost(text)
        feats[f"{name}_pos"] = fb["pos"]; feats[f"{name}_neg"] = fb["neg"]
        base = fb.get("sent_score", 0.0)
        feats[f"{name}_sent"] = base + boost
        feats[f"{name}_boost"] = boost
    sents = [v for k,v in feats.items() if k.endswith("_sent")]
    feats["sent_overall"] = float(np.mean(sents)) if sents else np.nan
    return feats


### Price Data from Polygon

In [17]:
def polygon_daily(ticker: str, start: str, end: str) -> pd.DataFrame:
    url = f"https://api.polygon.io/v2/aggs/ticker/{ticker.upper()}/range/1/day/{start}/{end}"
    params = {"adjusted":"true","sort":"asc","limit":50000,"apiKey": POLYGON_API_KEY}
    r = requests.get(url, params=params, timeout=30); r.raise_for_status()
    rows = r.json().get("results", []) or []
    if not rows: return pd.DataFrame(columns=["date","close"])
    df = pd.DataFrame(rows)[["t","c"]]
    df["date"] = pd.to_datetime(df["t"], unit="ms", utc=True).dt.tz_convert("US/Eastern").dt.date
    return df.drop(columns=["t"]).rename(columns={"c":"close"}).drop_duplicates("date")

def polygon_latest_close(ticker: str, lookback_days: int = 14):
    """Get the most recent daily close from Polygon within the last N days."""
    end = date.today()
    start = end - timedelta(days=lookback_days)
    df = polygon_daily(ticker, start.isoformat(), end.isoformat())
    if df.empty: return None
    return float(df.iloc[-1]["close"])

def next_trading_close(df: pd.DataFrame, target_date: date):
    s = df[df["date"] >= target_date]
    return None if s.empty else float(s.iloc[0]["close"])

def event_closes(ticker: str, filing_date: str) -> dict:
    d0 = datetime.strptime(filing_date, "%Y-%m-%d").date()
    d7 = d0 + timedelta(days=7)
    d28= d0 + timedelta(days=28)
    start = (d0 - timedelta(days=5)).strftime("%Y-%m-%d")
    end   = (d28 + timedelta(days=5)).strftime("%Y-%m-%d")
    df = polygon_daily(ticker, start, end)
    if df.empty: return {"close_0":np.nan,"close_7":np.nan,"close_28":np.nan}
    return {"close_0": next_trading_close(df,d0),
            "close_7": next_trading_close(df,d7),
            "close_28":next_trading_close(df,d28)}

### CSV Backtest Builder

In [18]:
def backtest_10q_sentiment(tickers, max_filings=6, sleep_sec=0.3):
    pipe_tok = load_finbert()
    rows = []
    for t in tickers:
        print(f"[{t}] pulling 10-Qs…")
        cik = get_cik(t)
        pairs = list_10q_with_dates(cik, max_n=max_filings)
        for p in pairs:
            try:
                html = fetch_filing_html(cik, p["accession"], p["primary"])
            except Exception as e:
                print(f"  skip {p['accession']} ({e})"); continue
            secs  = extract_sections(html)
            feats = score_sections(secs, pipe_tok=pipe_tok)
            px    = event_closes(t, p["filing_date"])
            row   = {"ticker":t,"cik":cik, **p, **feats, **px}
            c0,c7,c28 = row["close_0"], row["close_7"], row["close_28"]
            row["ret_7"]  = (c7/c0 - 1.0)*100 if c0 and c7 else np.nan
            row["ret_28"] = (c28/c0 - 1.0)*100 if c0 and c28 else np.nan
            rows.append(row)
            time.sleep(sleep_sec)
    df = pd.DataFrame(rows)
    df.to_csv(BACKTEST_CSV, index=False)
    print(f"Saved {BACKTEST_CSV} with {len(df)} rows.")
    return df


### Backtest Analyzer

In [19]:
from scipy.stats import pearsonr

def run_legitimacy_checks(path=BACKTEST_CSV):
    df = pd.read_csv(path)
    df["filing_date"] = pd.to_datetime(df["filing_date"])
    df = df.dropna(subset=["ret_7","ret_28"])
    sent_cols = [c for c in df.columns if c.endswith("_sent")]
    def winsorize(s, p=0.01):
        lo, hi = s.quantile(p), s.quantile(1-p)
        return s.clip(lo, hi)
    df["ret_7_w"]  = winsorize(df["ret_7"])
    df["ret_28_w"] = winsorize(df["ret_28"])

    def corr_table(y_col):
        rows=[]
        for c in sent_cols + (["sent_overall"] if "sent_overall" in df.columns else []):
            x,y = df[c], df[y_col]
            m = x.notna() & y.notna()
            if m.sum() < 8: rows.append((c, np.nan, np.nan)); continue
            r,p = pearsonr(x[m], y[m])
            rows.append((c,r,p))
        return pd.DataFrame(rows, columns=["feature","pearson_r","p_value"]).sort_values("pearson_r", ascending=False)

    corr7, corr28 = corr_table("ret_7_w"), corr_table("ret_28_w")
    df = df.sort_values(["ticker","filing_date"])
    df["Δsent_overall"] = df.groupby("ticker")["sent_overall"].diff()

    def corr_delta(y_col):
        rows=[]
        x,y = df["Δsent_overall"], df[y_col]
        m = x.notna() & y.notna()
        if m.sum() >= 8:
            r,p = pearsonr(x[m], y[m]); rows.append(("Δsent_overall", r, p))
        return pd.DataFrame(rows, columns=["feature","pearson_r","p_value"]).sort_values("pearson_r", ascending=False)

    dc7, dc28 = corr_delta("ret_7_w"), corr_delta("ret_28_w")
    return {"corr7":corr7, "corr28":corr28, "dc7":dc7, "dc28":dc28}

### Market Cap Baselines

In [20]:
def polygon_market_cap(ticker: str):
    url = f"https://api.polygon.io/v3/reference/tickers/{ticker.upper()}"
    params = {"apiKey": POLYGON_API_KEY}
    r = requests.get(url, params=params, timeout=20)
    try:
        js = r.json().get("results", {})
        return js.get("market_cap", None)
    except Exception:
        return None

def fit_bucket_baselines(path=BACKTEST_CSV, min_per_bucket=8):
    df = pd.read_csv(path)
    df = df.dropna(subset=["ret_7","ret_28","sent_overall"])
    df = df.sort_values(["ticker","filing_date"])
    df["Δsent_overall"] = df.groupby("ticker")["sent_overall"].diff()

    # attach current market cap + bucket
    mcap = {}
    for t in df["ticker"].unique():
        mcap[t] = polygon_market_cap(t)
        time.sleep(0.2)
    df["market_cap"] = df["ticker"].map(mcap)
    df["bucket"] = df["market_cap"].map(cap_bucket)

    out = {}
    for bucket in df["bucket"].dropna().unique():
        d = df[df["bucket"]==bucket].dropna(subset=["Δsent_overall","ret_7","ret_28"])
        if len(d) < min_per_bucket:
            continue
        X = np.c_[np.ones(len(d)), d["Δsent_overall"].values]
        for horizon, ycol in (("ret7","ret_7"), ("ret28","ret_28")):
            y = d[ycol].values
            beta = np.linalg.pinv(X).dot(y)  # [intercept, slope]
            intercept, slope = float(beta[0]), float(beta[1])
            out.setdefault(bucket, {})[horizon] = {"intercept": intercept, "slope": slope, "n": int(len(d))}
    with open(BASELINES_JSON,"w") as f:
        json.dump(out, f, indent=2)
    print(f"Saved {BASELINES_JSON}")
    return out

### Fundamentals + DCF Snapshot

In [21]:
# H) FUNDAMENTALS + DCF SNAPSHOT (TTM from SEC facts)

def get_company_facts(cik: str) -> dict:
    url = f"https://data.sec.gov/api/xbrl/companyfacts/CIK{cik}.json"
    r = requests.get(url, headers=SEC_HEADERS(), timeout=30); r.raise_for_status()
    return r.json()

def _ttm_sum(items, n=4):
    if not items: return None
    vals = [x.get("val") for x in items][-n:]
    vals = [v for v in vals if v is not None]
    return float(np.nansum(vals)) if vals else None

def build_ttm_metrics(facts: dict) -> dict:
    usgaap = facts.get("facts", {}).get("us-gaap", {})
    def get_series(tag):
        return (usgaap.get(tag, {}).get("units", {}).get("USD", []) or
                usgaap.get(tag, {}).get("units", {}).get("USD/shares", []) or
                usgaap.get(tag, {}).get("units", {}).get("shares", []))
    revenue_q   = get_series("Revenues")
    ni_q        = get_series("NetIncomeLoss")
    eps_q       = (usgaap.get("EarningsPerShareDiluted", {}).get("units", {}).get("USD/shares", []) or [])
    dil_sh_q    = get_series("WeightedAverageNumberOfDilutedSharesOutstanding")
    cfo_q       = get_series("NetCashProvidedByUsedInOperatingActivities")
    capex_q     = get_series("PaymentsToAcquirePropertyPlantAndEquipment")

    revenue_ttm     = _ttm_sum(revenue_q)
    net_income_ttm  = _ttm_sum(ni_q)
    eps_ttm         = _ttm_sum(eps_q)
    diluted_sh_ttm  = _ttm_sum(dil_sh_q)
    cfo_ttm         = _ttm_sum(cfo_q)
    capex_ttm       = _ttm_sum(capex_q)
    fcf_ttm         = (cfo_ttm or 0.0) - abs(capex_ttm or 0.0)

    if eps_ttm and diluted_sh_ttm and net_income_ttm:
        approx = eps_ttm * diluted_sh_ttm
        if abs(approx - net_income_ttm)/max(1.0, net_income_ttm) > 0.15:
            print("[warn] EPS*Shares != NetIncome by >15%. Check tags/periods.")

    return dict(
        revenue_ttm=revenue_ttm, net_income_ttm=net_income_ttm,
        eps_diluted_ttm=eps_ttm, diluted_shares_ttm=diluted_sh_ttm,
        cfo_ttm=cfo_ttm, capex_ttm=capex_ttm, fcf_ttm=fcf_ttm,
        rev_per_share=(revenue_ttm / diluted_sh_ttm) if (revenue_ttm and diluted_sh_ttm) else None,
    )

SECTOR_MULTIPLES = {"Technology":{"PE":30.0,"PS":6.8}, "_default":{"PE":18.0,"PS":2.5}}

def multiples_anchor(metrics:dict, sector="Technology"):
    cfg = SECTOR_MULTIPLES.get(sector, SECTOR_MULTIPLES["_default"])
    eps = metrics.get("eps_diluted_ttm")
    rps = metrics.get("rev_per_share")
    pe_anchor = eps * cfg["PE"] if eps else None
    ps_anchor = rps * cfg["PS"] if rps else None
    anchors = [x for x in (pe_anchor, ps_anchor) if x is not None and math.isfinite(x)]
    mid = float(np.mean(anchors)) if anchors else None
    return {"pe_anchor":pe_anchor, "ps_anchor":ps_anchor, "fair_value_mid":mid, "assumptions":cfg}

def dcf_anchor(metrics:dict, years=5, g=0.04, r=0.095, g_term=0.02):
    fcf0 = metrics.get("fcf_ttm")
    sh   = metrics.get("diluted_shares_ttm")
    if not fcf0 or not sh or sh <= 0: return None
    pv, fcf = 0.0, fcf0
    for t in range(1, years+1):
        fcf *= (1+g)
        pv  += fcf / ((1+r)**t)
    terminal = (fcf * (1+g_term)) / (r - g_term)
    pv_term  = terminal / ((1+r)**years)
    return (pv + pv_term) / sh

def blended_fair_value(mult_mid, dcf_val, w=0.5):
    if mult_mid is None and dcf_val is None: return None
    if mult_mid is None: return dcf_val
    if dcf_val  is None: return mult_mid
    return w*mult_mid + (1-w)*dcf_val


### Single Ticker Predictor

In [22]:
def latest_two_10q_delta_sent(ticker: str):
    cik   = get_cik(ticker)
    pairs = list_10q_with_dates(cik, max_n=2)
    if not pairs: return np.nan, np.nan, np.nan
    pipe_tok = load_finbert()
    scores = []
    for p in pairs:
        html = fetch_filing_html(cik, p["accession"], p["primary"])
        secs = extract_sections(html)
        feats= score_sections(secs, pipe_tok=pipe_tok)
        scores.append(feats["sent_overall"])
        time.sleep(0.3)
    if len(scores)==1: return scores[0], np.nan, np.nan
    return scores[0], scores[1], scores[0]-scores[1]

def load_baselines(path=BASELINES_JSON):
    if not os.path.exists(path): raise FileNotFoundError("Run cell G to fit/save sentiment baselines first.")
    with open(path,"r") as f:
        return json.load(f)

def predict_ticker(ticker: str, w_short=0.30, sector="Technology"):
    sent_now, sent_prev, d_sent = latest_two_10q_delta_sent(ticker)
    mc   = polygon_market_cap(ticker)
    bkt  = cap_bucket(mc)
    base = load_baselines().get(bkt, {})
    a7,b7   = base.get("ret7",{}).get("intercept",0.0),  base.get("ret7",{}).get("slope",0.0)
    a28,b28 = base.get("ret28",{}).get("intercept",0.0), base.get("ret28",{}).get("slope",0.0)
    pred7   = a7  + b7  * (d_sent if np.isfinite(d_sent) else 0.0)
    pred28  = a28 + b28 * (d_sent if np.isfinite(d_sent) else 0.0)

    # Valuation
    cik   = get_cik(ticker)
    facts = get_company_facts(cik)
    gaap  = build_ttm_metrics(facts)
    mult  = multiples_anchor(gaap, sector=sector)
    dcfv  = dcf_anchor(gaap, years=5, g=0.04, r=0.095, g_term=0.02)
    blend = blended_fair_value(mult.get("fair_value_mid"), dcfv, w=0.5)

    # Price now from Polygon only
    pnow = polygon_latest_close(ticker)
    price_28d = pnow*(1+pred28/100) if pnow and np.isfinite(pred28) else None
    final_target = (w_short*price_28d + (1-w_short)*blend) if (price_28d and blend) else (price_28d or blend)
    direction = "UP" if (final_target and pnow and final_target > pnow) else "DOWN"

    return {
        "ticker": ticker.upper(),
        "market_cap": mc, "cap_bucket": bkt,
        "sent_overall_now": sent_now, "sent_overall_prev": sent_prev, "Δsent_overall": d_sent,
        "pred_ret_7d_pct": pred7, "pred_ret_28d_pct": pred28,
        "price_now": pnow, "price_28d_target": price_28d,
        "dcf_fair_value": dcfv, "multiples_mid": mult.get("fair_value_mid"),
        "final_blended_target": final_target, "final_direction": direction,
        "baseline_used": base
    }


### Options Spread Creator

In [23]:
def tradier_expirations(ticker: str):
    """List available expiration dates (YYYY-MM-DD) for a symbol."""
    url = f"{TRADIER_BASE}/markets/options/expirations"
    r = requests.get(url, headers=_tradier_headers(),
                     params={"symbol": ticker.upper(), "includeAll": "false"},
                     timeout=20)
    r.raise_for_status()
    data = r.json().get("expirations", {})
    exps = data.get("date", [])
    # API returns a string or list depending on count
    return exps if isinstance(exps, list) else ([exps] if exps else [])

def tradier_chain(ticker: str, expiration: str):
    """Get the option chain for a given expiration."""
    url = f"{TRADIER_BASE}/markets/options/chains"
    r = requests.get(url, headers=_tradier_headers(),
                     params={"symbol": ticker.upper(), "expiration": expiration, "greeks": "false"},
                     timeout=30)
    r.raise_for_status()
    opt = r.json().get("options", {}).get("option", [])
    return opt if isinstance(opt, list) else ([opt] if opt else [])

def best_vertical_by_target(ticker: str, pred_pct: float, polygon_price_fn, horizon_days=28):
    """
    If pred_pct > 0 => bull call vertical. If < 0 => bear put vertical.
    'polygon_price_fn' should be a callable like polygon_latest_close(ticker)->float.
    """
    # pick an expiration ~horizon_days out (nearest available)
    today = date.today()
    target = today + timedelta(days=horizon_days)
    exps = tradier_expirations(ticker)
    if not exps:
        return None
    # choose the expiration closest to 'target' that is >= today
    def to_date(s): 
        y,m,d = map(int, s.split("-")); 
        return date(y,m,d)
    future_exps = [e for e in exps if to_date(e) >= today]
    if not future_exps:
        return None
    exp = min(future_exps, key=lambda s: abs(to_date(s)-target))

    chain = tradier_chain(ticker, exp)
    if not chain:
        return None

    pnow = polygon_price_fn(ticker)
    if not pnow:
        return None
    p_tgt = pnow * (1 + pred_pct/100.0)

    # strikes universe
    strikes = sorted({float(o["strike"]) for o in chain if "strike" in o})
    if not strikes:
        return None
    nearest = lambda x: min(strikes, key=lambda k: abs(k - x))

    if pred_pct >= 0:
        k_buy  = nearest(pnow * 0.99)
        k_sell = nearest(p_tgt * 1.02)
        leg_buy  = [o for o in chain if o.get("option_type")=="call" and float(o["strike"])==k_buy]
        leg_sell = [o for o in chain if o.get("option_type")=="call" and float(o["strike"])==k_sell]
        spread_type = "bull_call"
    else:
        k_buy  = nearest(pnow * 1.01)
        k_sell = nearest(p_tgt * 0.98)
        leg_buy  = [o for o in chain if o.get("option_type")=="put" and float(o["strike"])==k_buy]
        leg_sell = [o for o in chain if o.get("option_type")=="put" and float(o["strike"])==k_sell]
        spread_type = "bear_put"

    if not leg_buy or not leg_sell:
        return None

    def mid(q):
        b = float(q.get("bid", 0.0)); a = float(q.get("ask", 0.0))
        return (b + a)/2 if (a and b) else float(q.get("last", 0.0))

    debit = max(0.01, mid(leg_buy[0]) - mid(leg_sell[0]))
    width = abs(k_sell - k_buy)
    max_profit = max(0.0, width - debit)
    rr = (max_profit / debit) if debit > 0 else None

    return {
        "type": spread_type,
        "expiration": exp,
        "buy_strike": float(k_buy),
        "sell_strike": float(k_sell),
        "debit": round(debit, 2),
        "width": float(width),
        "max_profit": round(max_profit, 2),
        "R_by_Risk": round(rr, 2) if rr else None,
        "price_now": float(pnow),
        "price_target": float(p_tgt),
    }

### Final Functions

In [37]:
def run_full_baseline():
    """
    1. Builds backtest CSV across a mixed-cap universe.
    2. Runs legitimacy checks (optional).
    3. Fits per-cap baselines and saves to sentiment_baselines.json.
    """
    micro = "HZO, TIRX, STRC, GWRS, UPXI, GCTK, VTSI, HCAT, OPRX, AOUT, FCEL, HITI, WKSP".split(",")
    small = "BLKB, HQY, PIPR, HAYW, NVCR, SMPL, MGPI, BE, PRCT, SKYW, AVAV, INMD, VRTS, CNXN, REZI, ASTE, MHO, CELH, ABM, PCT".split(",")
    mid   = "LULU, MAR, EA, FSLR, MLM, TTWO, TDY, ENPH, ALB, DAL, CHRW, WDC, AAP, CZR, CHD, SWKS, COHR, PTC, HOLX, MKTX".split(",")
    large = "AAPL, MSFT, AMZN, NVDA, GOOGL, META, JPM, V, JNJ, PG, XOM, UNH, PEP, KO, COST, ORCL, DIS, HD, BAC, WMT".split(",")
    TICKERS = [x.strip() for s in (micro+small+mid+large) for x in [s] if x.strip()]
    df = backtest_10q_sentiment(TICKERS, max_filings=6)
    print(df.head())
    _ = run_legitimacy_checks(BACKTEST_CSV)
    baselines = fit_bucket_baselines(BACKTEST_CSV)
    return baselines

def run_predict(tickers, w_short=0.30, sector="Technology", suggest_spread=True):
    out = []
    for t in tickers:
        res = predict_ticker(t, w_short=w_short, sector=sector)
        if suggest_spread and res.get("pred_ret_28d_pct") is not None:
            res["options_vertical"] = best_vertical_by_target(
                t, res["pred_ret_28d_pct"], polygon_price_fn=polygon_latest_close
            )
        out.append(res)
    return pd.DataFrame(out)

### Create Baselines

In [38]:
run_full_baseline()

Device set to use cpu


[HZO] pulling 10-Qs…
[TIRX] pulling 10-Qs…
[STRC] pulling 10-Qs…
[GWRS] pulling 10-Qs…
[UPXI] pulling 10-Qs…
[GCTK] pulling 10-Qs…
[VTSI] pulling 10-Qs…
[HCAT] pulling 10-Qs…
[OPRX] pulling 10-Qs…
[AOUT] pulling 10-Qs…
[FCEL] pulling 10-Qs…
[HITI] pulling 10-Qs…
[WKSP] pulling 10-Qs…
[BLKB] pulling 10-Qs…
[HQY] pulling 10-Qs…
[PIPR] pulling 10-Qs…
[HAYW] pulling 10-Qs…
[NVCR] pulling 10-Qs…
[SMPL] pulling 10-Qs…
[MGPI] pulling 10-Qs…
[BE] pulling 10-Qs…
[PRCT] pulling 10-Qs…
[SKYW] pulling 10-Qs…
[AVAV] pulling 10-Qs…
[INMD] pulling 10-Qs…
[VRTS] pulling 10-Qs…
[CNXN] pulling 10-Qs…
[REZI] pulling 10-Qs…
[ASTE] pulling 10-Qs…
[MHO] pulling 10-Qs…
[CELH] pulling 10-Qs…
[ABM] pulling 10-Qs…
[PCT] pulling 10-Qs…
[LULU] pulling 10-Qs…
[MAR] pulling 10-Qs…
[EA] pulling 10-Qs…
[FSLR] pulling 10-Qs…
[MLM] pulling 10-Qs…
[TTWO] pulling 10-Qs…
[TDY] pulling 10-Qs…
[ENPH] pulling 10-Qs…
[ALB] pulling 10-Qs…
[DAL] pulling 10-Qs…
[CHRW] pulling 10-Qs…
[WDC] pulling 10-Qs…
[AAP] pulling 10-Qs…
[CZR

{'mid': {'ret7': {'intercept': 0.6646190032519315,
   'slope': 1.7611971977397842,
   'n': 69},
  'ret28': {'intercept': 2.3405542204124696,
   'slope': 10.91955421084191,
   'n': 69}},
 'mega': {'ret7': {'intercept': 0.6368375467036415,
   'slope': -3.967231884785469,
   'n': 89},
  'ret28': {'intercept': 2.044963250329099,
   'slope': -3.623117976094409,
   'n': 89}},
 'large': {'ret7': {'intercept': 1.34865014417929,
   'slope': 23.122288040311755,
   'n': 93},
  'ret28': {'intercept': 5.919645811842534,
   'slope': 28.298360316565425,
   'n': 93}},
 'micro': {'ret7': {'intercept': -5.021574038998796,
   'slope': 105.3251808168595,
   'n': 40},
  'ret28': {'intercept': -4.520283496135282,
   'slope': 151.7534318627472,
   'n': 40}},
 'small': {'ret7': {'intercept': 2.2229207879115034,
   'slope': -14.617579781703878,
   'n': 40},
  'ret28': {'intercept': 3.7918126913080634,
   'slope': -78.66805774539746,
   'n': 40}}}

### Run Predictions on Given Ticker

In [39]:
# Assuming you already ran the cells that define run_predict() etc.
ticker = "AAPL"  # <-- set your ticker symbol

pred = run_predict([ticker], w_short=0.30, sector="Technology", suggest_spread=True).iloc[0].to_dict()

print("=== Prediction Summary ===")
print(f"Ticker: {pred['ticker']}")
print(f"Cap bucket: {pred['cap_bucket']} | Market cap: {pred['market_cap']}")
print(f"Δsent_overall: {pred['Δsent_overall']:.4f}")
print(f"Pred 7d: {pred['pred_ret_7d_pct']:.2f}% | Pred 28d: {pred['pred_ret_28d_pct']:.2f}%")
print(f"Price now: {pred['price_now']}")
print(f"DCF fair value: {pred['dcf_fair_value']}")
print(f"Multiples mid: {pred['multiples_mid']}")
print(f"28d target: {pred['price_28d_target']}")
print(f"Final blended target: {pred['final_blended_target']}  ->  {pred['final_direction']}")

ov = pred.get("options_vertical")
if ov:
    print("\n=== Suggested Options Vertical ===")
    print(f"Type: {ov['type']}  |  Exp: {ov['expiration']}")
    print(f"Buy {ov['buy_strike']}  /  Sell {ov['sell_strike']}")
    print(f"Debit: {ov['debit']}  |  Width: {ov['width']}  |  Max Profit: {ov['max_profit']}")
    print(f"R:R (profit/risk): {ov['R_by_Risk']}")
else:
    print("\n(No options suggestion – ensure TRADIER_ACCESS_TOKEN is set and chains are available.)")


Device set to use cpu


[warn] EPS*Shares != NetIncome by >15%. Check tags/periods.


JSONDecodeError: Expecting value: line 1 column 1 (char 0)