In [132]:
import yfinance as yf
import os
import pandas as pd
from datetime import datetime, date, timedelta
from dateutil.parser import parse as dtparse
from concurrent.futures import ThreadPoolExecutor, as_completed

# ------------------ USER SETTINGS ------------------
MAX_EXPIRIES_PER_TICKER = 8     # to avoid rate-limit pain (nearest 7 + Jan'26 if present)
LAST_PRICE_MAX = 1.50           # <= $2.00 contracts only
VOL_MIN = 300                   # minimum volume to be considered
VOL_OI_MIN = 2.0                # volume/open interest threshold
END_DATE_CUTOFF = "2026-01-31"  # scan expiries up to end-Jan 2026

base_date = datetime.now().strftime('%Y-%m-%d')
prefix = f"unusual_options_scan_{base_date}"
ext = ".csv"

# Find the highest existing index for today
existing_indices = []
for fname in os.listdir('.'):
    if fname.startswith(prefix) and fname.endswith(ext):
        # Extract the number between last underscore and .csv
        try:
            num = int(fname[len(prefix)+1:-len(ext)])
            existing_indices.append(num)
        except ValueError:
            pass

# Determine next index
next_index = max(existing_indices) + 1 if existing_indices else 1
SAVE_CSV = f"{prefix}_{next_index}{ext}"

print(f"Next file to save: {SAVE_CSV}")

# SAVE_CSV = f"unusual_options_scan_{datetime.now().strftime('%Y-%m-%d')}.csv"
# ---------------------------------------------------

# NASDAQ-100 tickers (quickly updated set; harmless if a few changed)
NASDAQ100 = [
    "AAPL","MSFT","NVDA","AMZN","META","GOOGL","GOOG","TSLA","AVGO","COST","AFRM",
    "NFLX","PEP","ADBE","AMD","LIN","TMUS","CSCO","QCOM","TXN","AMAT",
    "INTU","HON","INTC","BKNG","SBUX","MU","AMGN","PDD","REGN","LRCX",
    "ADP","ISRG","ABNB","MDLZ","VRTX","ASML","GILD","ADI","PANW","KLAC",
    "PYPL","CRWD","CSX","WDAY","CHTR","MAR","NXPI","ROP","AEP","KDP",
    "MELI","FTNT","ORLY","SNPS","CDNS","MNST","CTAS","DXCM","PCAR","LULU",
    "MRVL","MCHP","ROST","EXC","ODFL","ADSK","ATVI","IDXX","EA",
    "PAYX","CTSH","TEAM","XEL","WDAY","DDOG","ZS","SPLK","BKR","ALGN",
    "AZN","CEG","VRSK","SIRI","PDD","LCID","RIVN","BIDU","JD","BMRN",
    "DOCU","VRSN","NTES","MRNA","ANSS","CSGP","CHKP","MTCH","CRWD","OKTA",
    "NEE", "JNJ", "SMCI", "STZ", "TMQ", "PLTR", "XYZ", "HOOD", "ORCL", "UPST",
    "TSM", "SHOP", "SPOT", "LLY", "HIMS", "UNH", "DELL", "COIN", "OSCR", "SNOW",
    "QUBT", "RGTI", "CRWV", "RKLB", "BA", "QCOM", "PANW", "JPM", "GS", "BABA", "BIDU", "USAR", "ONON", "VIX", "OKLO",
    "QS", "CRML", "MP", "QBTS", "JEF", "GKOS", "GSK", "AMGN", "ROKU", "RH", "FCX", "DASH", "CHWY", "CCJ", "FI", "TEAM",
    "SBET", "METC", "AVAV", "MTSR", "NTLA", "ALAB", "ALK", "PINS", "TEM"
]
# Deduplicate (list may contain a couple repeats above)
TICKERS = sorted(list(dict.fromkeys(NASDAQ100)))

def safe_option_chain(tkr, exp):
    """Return (calls, puts) DataFrames or (None, None) on failure."""
    try:
        oc = tkr.option_chain(exp)
        c = oc.calls.copy()
        p = oc.puts.copy()
        c["type"] = "CALL"
        p["type"] = "PUT"
        for df in (c, p):
            df["expiration"] = exp
        return c, p
    except Exception:
        return None, None

def pick_expiries(all_exps):
    """
    Choose a practical subset:
    - nearest expiries in order, up to MAX_EXPIRIES_PER_TICKER - 1
    - plus Jan 2026 (3rd Friday or any Jan-2026 date in list) if present
    - only expiries <= END_DATE_CUTOFF and >= today
    """
    today = date.today().isoformat()
    OFFSET_DAYS = 0  # for example, start looking 7 days from today
    today_offset = (date.today() + timedelta(days=OFFSET_DAYS)).isoformat()
    cutoff = END_DATE_CUTOFF
    exps = [e for e in all_exps if today_offset <= e <= cutoff]
    exps_sorted = sorted(exps)
    chosen = exps_sorted[:max(0, MAX_EXPIRIES_PER_TICKER - 1)]
    # try to include a Jan 2026 expiry if available
    jan26 = [e for e in exps_sorted if e.startswith("2026-01")]
    if jan26:
        jan_pick = jan26[0]
        if jan_pick not in chosen:
            chosen.append(jan_pick)
    return chosen

def scan_ticker(ticker):
    tkr = yf.Ticker(ticker)
    try:
        all_exps = tkr.options
    except Exception:
        return pd.DataFrame()

    if not all_exps:
        return pd.DataFrame()

    exps = pick_expiries(all_exps)
    rows = []
    for exp in exps:
        calls, puts = safe_option_chain(tkr, exp)
        if calls is None:
            continue
        df = pd.concat([calls, puts], ignore_index=True)

        # Clean columns (Yahoo schema can vary slightly)
        for col in ["lastPrice","volume","openInterest","strike"]:
            if col not in df.columns:
                df[col] = 0

        # Filter rules:
        # 1) lastPrice <= $2.00
        # 2) decent volume
        # 3) unusual-ish vol/oi ratio
        df["vol_oi"] = df["volume"] / df["openInterest"].replace(0, 1)
        flt = (
            (df["lastPrice"] <= LAST_PRICE_MAX) &
            (df["volume"] >= VOL_MIN) &
            (df["vol_oi"] >= VOL_OI_MIN)
        )
        df = df.loc[flt, ["contractSymbol","type","strike","lastPrice","volume","openInterest","vol_oi","expiration"]]
        df["ticker"] = ticker

        # Add a simple score to rank results (volume * vol/oi)
        df["score"] = df["volume"] * df["vol_oi"]
        rows.append(df)

    if not rows:
        return pd.DataFrame()
    out = pd.concat(rows, ignore_index=True)
    return out


def main():
    all_hits = []

    # You can tune max_workers depending on your network / rate-limit tolerance
    max_workers = min(20, len(TICKERS))  # 15–25 is usually a sweet spot

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        # Submit all ticker scan tasks at once
        future_to_ticker = {executor.submit(scan_ticker, tk): tk for tk in TICKERS}

        for i, future in enumerate(as_completed(future_to_ticker), 1):
            tk = future_to_ticker[future]
            try:
                hits = future.result()
                if not hits.empty:
                    all_hits.append(hits)
                    print(f"[{i:3d}/{len(TICKERS)}] ✅ {tk} — found {len(hits)} matches")
                else:
                    print(f"[{i:3d}/{len(TICKERS)}] {tk} — no matches")
            except Exception as e:
                print(f"[{i:3d}/{len(TICKERS)}] {tk} — ❌ error: {e}")

    if not all_hits:
        print("No matches found with current filters. Consider lowering VOL_MIN or VOL_OI_MIN.")
        return

    df = pd.concat(all_hits, ignore_index=True)

    # Rank per-ticker by max score, keep top 10 tickers, then show their top rows
    per_ticker = df.groupby("ticker")["score"].max().reset_index().sort_values("score", ascending=False)
    top10_tickers = per_ticker["ticker"].head(10).tolist()
    final = (df[df["ticker"].isin(top10_tickers)]
             .sort_values(["ticker","score"], ascending=[True, False]))

    # Save CSV
    final_cols = ["ticker","type","strike","lastPrice","volume","openInterest","vol_oi","expiration","contractSymbol","score"]
    final[final_cols].to_csv(SAVE_CSV, index=False)
    print(f"\nSaved {len(final)} matches to: {SAVE_CSV}")

    # Print a quick summary to console
    print("\n=== Top 10 tickers (by max score) ===")
    for _, row in per_ticker[per_ticker["ticker"].isin(top10_tickers)].iterrows():
        print(f"{row['ticker']:>5}  score={row['score']:.1f}")

    print("\n=== Sample rows ===")
    print(final[final_cols].head(25).to_string(index=False))

if __name__ == "__main__":
    main()


Next file to save: unusual_options_scan_2025-11-04_1.csv
[  1/152] ATVI — no matches
[  2/152] ANSS — no matches
[  3/152] AEP — no matches
[  4/152] ALK — no matches
[  5/152] AZN — no matches
[  6/152] ALGN — no matches
[  7/152] ✅ AFRM — found 2 matches
[  8/152] ✅ ABNB — found 1 matches
[  9/152] ✅ ALAB — found 1 matches
[ 10/152] AMGN — no matches
[ 11/152] AMAT — no matches
[ 12/152] ✅ AMZN — found 3 matches
[ 13/152] AVAV — no matches
[ 14/152] ADP — no matches
[ 15/152] ADBE — no matches
[ 16/152] AVGO — no matches
[ 17/152] ADSK — no matches
[ 18/152] ADI — no matches
[ 19/152] ✅ ASML — found 1 matches
[ 20/152] ✅ AAPL — found 3 matches
[ 21/152] ✅ BA — found 1 matches
[ 22/152] ✅ AMD — found 16 matches
[ 23/152] BKR — no matches
[ 24/152] BMRN — no matches
[ 25/152] CHKP — no matches
[ 26/152] ✅ BIDU — found 3 matches
[ 27/152] ✅ BABA — found 1 matches
[ 28/152] ✅ CSGP — found 1 matches
[ 29/152] BKNG — no matches
[ 30/152] CTSH — no matches
[ 31/152] ✅ CDNS — found 1 matches

In [112]:
import yfinance as yf
import os
import pandas as pd
from datetime import datetime, date, timedelta
from dateutil.parser import parse as dtparse
from concurrent.futures import ThreadPoolExecutor, as_completed

# ------------------ USER SETTINGS ------------------
MAX_EXPIRIES_PER_TICKER = 8     # to avoid rate-limit pain (nearest 7 + Jan'26 if present)
LAST_PRICE_MAX = 1.50           # <= $2.00 contracts only
VOL_MIN = 300                   # minimum volume to be considered
VOL_OI_MIN = 2.0                # volume/open interest threshold
END_DATE_CUTOFF = "2026-01-31"  # scan expiries up to end-Jan 2026

base_date = datetime.now().strftime('%Y-%m-%d')
prefix = f"unusual_options_scan_{base_date}"
ext = ".csv"

# Find the highest existing index for today
existing_indices = []
for fname in os.listdir('.'):
    if fname.startswith(prefix) and fname.endswith(ext):
        try:
            num = int(fname[len(prefix)+1:-len(ext)])
            existing_indices.append(num)
        except ValueError:
            pass

next_index = max(existing_indices) + 1 if existing_indices else 1
SAVE_CSV = f"{prefix}_{next_index}{ext}"

# NASDAQ-100 + extras (dedup)
NASDAQ100 = [
    "AAPL","MSFT","NVDA","AMZN","META","GOOGL","GOOG","TSLA","AVGO","COST",
    "NFLX","PEP","ADBE","AMD","LIN","TMUS","CSCO","QCOM","TXN","AMAT",
    "INTU","HON","INTC","BKNG","SBUX","MU","AMGN","PDD","REGN","LRCX",
    "ADP","ISRG","ABNB","MDLZ","VRTX","ASML","GILD","ADI","PANW","KLAC",
    "PYPL","CRWD","CSX","WDAY","CHTR","MAR","NXPI","ROP","AEP","KDP",
    "MELI","FTNT","ORLY","SNPS","CDNS","MNST","CTAS","DXCM","PCAR","LULU",
    "MRVL","MCHP","ROST","EXC","ODFL","ADSK","ATVI","IDXX","EA",
    "PAYX","CTSH","TEAM","XEL","WDAY","DDOG","ZS","SPLK","BKR","ALGN",
    "AZN","CEG","VRSK","SIRI","PDD","LCID","RIVN","BIDU","JD","BMRN",
    "DOCU","VRSN","NTES","MRNA","ANSS","CSGP","CHKP","MTCH","CRWD","OKTA",
    "NEE", "JNJ", "SMCI", "STZ", "TMQ", "PLTR", "XYZ", "HOOD", "ORCL", "UPST",
    "TSM", "SHOP", "SPOT", "LLY", "HIMS", "UNH", "DELL", "COIN", "OSCR", "SNOW",
    "QUBT", "RGTI", "CRWV", "RKLB", "BA", "QCOM", "PANW", "JPM", "GS", "BABA",
    "BIDU", "USAR", "ONON", "VIX", "OKLO", "QS", "CRML", "MP", "QBTS", "JEF",
    "GKOS", "GSK", "AMGN", "ROKU", "RH", "FCX", "DASH", "CHWY", "OPEN", "QQQ"
]
TICKERS = sorted(list(dict.fromkeys(NASDAQ100)))

def safe_option_chain(tkr, exp):
    """Return (calls, puts) DataFrames or (None, None) on failure."""
    try:
        oc = tkr.option_chain(exp)
        c = oc.calls.copy()
        p = oc.puts.copy()
        c["type"] = "CALL"
        p["type"] = "PUT"
        for df in (c, p):
            df["expiration"] = exp
        return c, p
    except Exception:
        return None, None

def pick_expiries(all_exps):
    """
    Choose subset of expiries, respecting filters and MAX_EXPIRIES_PER_TICKER.
    """
    OFFSET_DAYS = 0  # tweak if you want "start looking N days from now"
    today_offset = (date.today() + timedelta(days=OFFSET_DAYS)).isoformat()
    cutoff = END_DATE_CUTOFF
    exps = [e for e in all_exps if today_offset <= e <= cutoff]

    exps_sorted = sorted(exps)
    chosen = exps_sorted[:max(0, MAX_EXPIRIES_PER_TICKER - 1)]

    # try to include a Jan 2026 expiry if available
    jan26 = [e for e in exps_sorted if e.startswith("2026-01")]
    if jan26:
        jan_pick = jan26[0]
        if jan_pick not in chosen:
            chosen.append(jan_pick)

    return chosen

def scan_ticker(ticker):
    tkr = yf.Ticker(ticker)
    try:
        all_exps = tkr.options
    except Exception:
        return pd.DataFrame()

    if not all_exps:
        return pd.DataFrame()

    exps = pick_expiries(all_exps)
    rows = []
    for exp in exps:
        calls, puts = safe_option_chain(tkr, exp)
        if calls is None:
            continue
        df = pd.concat([calls, puts], ignore_index=True)

        # Clean columns that sometimes go missing
        for col in ["lastPrice","volume","openInterest","strike"]:
            if col not in df.columns:
                df[col] = 0

        # Filters
        df["vol_oi"] = df["volume"] / df["openInterest"].replace(0, 1)
        flt = (
            (df["lastPrice"] <= LAST_PRICE_MAX) &
            (df["volume"] >= VOL_MIN) &
            (df["vol_oi"] >= VOL_OI_MIN)
        )
        df = df.loc[flt, ["contractSymbol","type","strike","lastPrice",
                          "volume","openInterest","vol_oi","expiration"]]
        df["ticker"] = ticker
        df["score"] = df["volume"] * df["vol_oi"]

        rows.append(df)

    if not rows:
        return pd.DataFrame()

    out = pd.concat(rows, ignore_index=True)
    return out

def build_strike_ranges_for_group(strikes_sorted):
    """
    Input: [425.0, 427.5, 430.0, 440.0, 445.0]
    Output: [(425.0, 430.0), (440.0, 445.0)]
    We treat consecutive list elements as one run. A 'gap' starts a new run.
    """
    ranges = []
    if not strikes_sorted:
        return ranges

    start = strikes_sorted[0]
    prev = strikes_sorted[0]

    for s in strikes_sorted[1:]:
        # if it's the next sequential strike in the sorted list, extend the run
        # "sequential" here = literally next element in list, no skip
        # so any jump (prev!=s) will break range
        # BUT: strikes could repeat, so ignore duplicates
        if s == prev:
            continue
        # if there's a jump, close previous range and start new
        # we consider ANY jump as a break
        if s != prev:
            ranges.append((start, prev))
            start = s
        prev = s

    # close last range
    ranges.append((start, prev))
    return ranges

def summarize_strike_ranges(df_final):
    """
    df_final has cols:
    ['ticker','type','strike','expiration','score',...]
    We'll group by (ticker, type, expiration) and then compress strikes.
    Returns a list of human-readable strings.
    """
    summaries = []

    if df_final.empty:
        return summaries

    # work only with needed cols
    work = df_final[["ticker","type","strike","expiration","score"]].copy()

    # within each group we might want to prioritize high-score strikes first,
    # but for range display we ultimately sort numerically
    grouped = work.groupby(["ticker","type","expiration"])

    for (ticker, opt_type, exp), g in grouped:
        strikes_sorted = sorted(g["strike"].unique())

        ranges = build_strike_ranges_for_group(strikes_sorted)

        # We'll also compute a "group score" = max score in that group for ranking
        group_score = g["score"].max()

        pretty_ranges = []
        for lo, hi in ranges:
            if lo == hi:
                # single strike like 427.5
                pretty_ranges.append(f"{fmt_strike(lo)}")
            else:
                pretty_ranges.append(f"{fmt_strike(lo)}-{fmt_strike(hi)}")

        # "C" or "P" instead of "CALL"/"PUT"
        short_type = "C" if opt_type.upper() == "CALL" else "P"

        # join them: "425-430 C (2025-11-01)"
        if pretty_ranges:
            joined = ", ".join([f"{r} {short_type}" for r in pretty_ranges])
            summaries.append({
                "ticker": ticker,
                "expiration": exp,
                "summary": f"{ticker} {joined} exp {exp}",
                "group_score": group_score
            })

    # sort summaries by score desc so hottest flow appears first
    summaries_sorted = sorted(summaries, key=lambda x: x["group_score"], reverse=True)
    return summaries_sorted

def fmt_strike(x):
    """
    Format strike cleanly:
    425.0 -> '425'
    427.5 -> '427.5'
    430.25 -> '430.25'
    """
    if float(x).is_integer():
        return str(int(x))
    else:
        return str(x)

def main():
    all_hits = []

    max_workers = min(20, len(TICKERS))

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_ticker = {executor.submit(scan_ticker, tk): tk for tk in TICKERS}

        for i, future in enumerate(as_completed(future_to_ticker), 1):
            tk = future_to_ticker[future]
            try:
                hits = future.result()
                if not hits.empty:
                    all_hits.append(hits)
                    print(f"[{i:3d}/{len(TICKERS)}] ✅ {tk} — found {len(hits)} matches")
                else:
                    print(f"[{i:3d}/{len(TICKERS)}] {tk} — no matches")
            except Exception as e:
                print(f"[{i:3d}/{len(TICKERS)}] {tk} — ❌ error: {e}")

    if not all_hits:
        print("No matches found with current filters. Consider lowering VOL_MIN or VOL_OI_MIN.")
        return

    df = pd.concat(all_hits, ignore_index=True)

    # Rank per-ticker by max score, keep top 10 tickers
    per_ticker = (
        df.groupby("ticker")["score"]
          .max()
          .reset_index()
          .sort_values("score", ascending=False)
    )
    top10_tickers = per_ticker["ticker"].head(10).tolist()

    final = (
        df[df["ticker"].isin(top10_tickers)]
        .sort_values(["ticker","score"], ascending=[True, False])
    )

    # ----- NEW PART: summarize strike ranges -----
    summaries = summarize_strike_ranges(final)

    print("\n=== GROUPED STRIKE RANGES (top tickers) ===")
    for s in summaries:
        # s["summary"] already looks like "TSLA 425-430 C exp 2025-11-01"
        # you can tweak formatting/fields here if you want cleaner printouts
        print(f"{s['summary']}  [score={s['group_score']:.1f}]")

    # Save full detailed rows to CSV (unchanged behavior)
    final_cols = [
        "ticker","type","strike","lastPrice","volume","openInterest",
        "vol_oi","expiration","contractSymbol","score"
    ]
    final[final_cols].to_csv(SAVE_CSV, index=False)
    print(f"\nSaved {len(final)} matches to: {SAVE_CSV}")

    # Old console summary (still useful, keeps visibility)
    print("\n=== Top 10 tickers (by max score) ===")
    for _, row in per_ticker[per_ticker["ticker"].isin(top10_tickers)].iterrows():
        print(f"{row['ticker']:>5}  score={row['score']:.1f}")

    print("\n=== Sample rows ===")
    print(final[final_cols].head(25).to_string(index=False))


if __name__ == "__main__":
    main()


[  1/142] ATVI — no matches
[  2/142] ANSS — no matches
[  3/142] AEP — no matches
[  4/142] ✅ AZN — found 1 matches
[  5/142] BKR — no matches
[  6/142] BMRN — no matches
[  7/142] ADI — no matches
[  8/142] ✅ ADP — found 1 matches
[  9/142] ✅ ABNB — found 2 matches
[ 10/142] ✅ AMZN — found 3 matches
[ 11/142] ✅ AMGN — found 1 matches
[ 12/142] ✅ BA — found 4 matches
[ 13/142] ALGN — no matches
[ 14/142] AMAT — no matches
[ 15/142] ✅ BIDU — found 1 matches
[ 16/142] ✅ AMD — found 5 matches
[ 17/142] ADSK — no matches
[ 18/142] ✅ AVGO — found 3 matches
[ 19/142] ✅ ASML — found 1 matches
[ 20/142] ✅ AAPL — found 3 matches
[ 21/142] ✅ BABA — found 2 matches
[ 22/142] ADBE — no matches
[ 23/142] CHKP — no matches
[ 24/142] BKNG — no matches
[ 25/142] CSGP — no matches
[ 26/142] CDNS — no matches
[ 27/142] CTSH — no matches
[ 28/142] CEG — no matches
[ 29/142] ✅ CSCO — found 2 matches
[ 30/142] CHTR — no matches
[ 31/142] EXC — no matches
[ 32/142] CRML — no matches
[ 33/142] CSX — no matc