In [1]:
# Import required libraries
import os
import json
import time
import datetime as dt
import csv
import pathlib
from typing import Dict, List
import requests
import pandas as pd
from bs4 import BeautifulSoup
from dotenv import load_dotenv

# Setup data directories
DATA_RAW = pathlib.Path("../data/raw")
DATA_RAW.mkdir(parents=True, exist_ok=True)

print(f"Data directory: {DATA_RAW.absolute()}")

# Load environment variables
load_dotenv()
ALPHA_KEY = os.getenv("ALPHAVANTAGE_API_KEY")
print("Loaded ALPHAVANTAGE_API_KEY?", bool(ALPHA_KEY))



Data directory: /Users/aman/Desktop/Bootcamp/bootcamp_aman_dhillon/project/notebooks/../data/raw
Loaded ALPHAVANTAGE_API_KEY? True


In [2]:
# Simple SPY stock data extraction with adjusted close
import requests
import pandas as pd
from pathlib import Path
import os
from dotenv import load_dotenv
from datetime import datetime

# Load API key
load_dotenv()
POLYGON_KEY = os.getenv("POLYGON_API_KEY")

# Setup
SYMBOL = "SPY"
DATA_RAW = Path("../data/raw")
DATA_RAW.mkdir(parents=True, exist_ok=True)

# Get 1 year of stock data
end_date = datetime.now().strftime("%Y-%m-%d")
start_date = (datetime.now() - pd.DateOffset(years=1)).strftime("%Y-%m-%d")

url = f"https://api.polygon.io/v2/aggs/ticker/{SYMBOL}/range/1/day/{start_date}/{end_date}"
params = {"apiKey": POLYGON_KEY}

try:
    response = requests.get(url, params=params, timeout=30)
    response.raise_for_status()
    data = response.json()
    
    if data.get("status") in ["OK", "DELAYED"] and data.get("results"):
        df = pd.DataFrame(data["results"])
        df['date'] = pd.to_datetime(df['t'], unit='ms')
        
        # Include adjusted close (vw = volume weighted average price)
        df = df[['date', 'o', 'h', 'l', 'c', 'v', 'vw']].rename(columns={
            'o': 'open', 'h': 'high', 'l': 'low', 'c': 'close', 'v': 'volume', 'vw': 'adj_close'
        })
        
        # Save data
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"spy_stock_1year_{timestamp}.csv"
        filepath = DATA_RAW / filename
        df.to_csv(filepath, index=False)
        
        print(f"✅ Saved {len(df)} days of SPY data to: {filename}")
        print(f"Date range: {df['date'].min().date()} to {df['date'].max().date()}")
        print(f"Latest close: ${df['close'].iloc[-1]:.2f}")
        print(f"Latest adj_close: ${df['adj_close'].iloc[-1]:.2f}")
        
    else:
        print("❌ No data received")
        
except Exception as e:
    print(f"❌ Error: {e}")

✅ Saved 249 days of SPY data to: spy_stock_1year_20250825_162252.csv
Date range: 2024-08-26 to 2025-08-22
Latest close: $645.31
Latest adj_close: $644.08


In [3]:
print(df)

                   date    open     high     low   close      volume  \
0   2024-08-26 04:00:00  563.18  563.910  559.05  560.79  35788609.0   
1   2024-08-27 04:00:00  559.49  562.060  558.32  561.56  32693898.0   
2   2024-08-28 04:00:00  561.21  561.650  555.04  558.30  41066024.0   
3   2024-08-29 04:00:00  560.31  563.680  557.18  558.35  38715176.0   
4   2024-08-30 04:00:00  560.77  564.200  557.14  563.68  62700110.0   
..                  ...     ...      ...     ...     ...         ...   
244 2025-08-18 04:00:00  642.86  644.000  642.18  643.30  43804914.0   
245 2025-08-19 04:00:00  643.12  644.105  638.48  639.81  69750731.0   
246 2025-08-20 04:00:00  639.40  639.660  632.95  638.11  88890298.0   
247 2025-08-21 04:00:00  636.28  637.970  633.81  635.55  54805775.0   
248 2025-08-22 04:00:00  637.76  646.500  637.25  645.31  84083214.0   

     adj_close  
0     561.1744  
1     560.9928  
2     558.4289  
3     560.1538  
4     561.5801  
..         ...  
244   643.1665  

In [13]:
import yfinance as yf
import pandas as pd
from typing import List,Dict
def get_european_options_data() -> pd.DataFrame:
    underlying = [
    # Major Index & Market ETFs
    "SPY", "QQQ", "IWM", "DIA", "EFA", "EEM", "VTI", "VOO", "VEA", "VWO",

    # Tech Megacaps
    "AAPL", "MSFT", "AMZN", "GOOGL", "META", "TSLA", "NVDA", "NFLX", "AMD",
    "INTC", "ORCL", "CSCO", "IBM", "ADBE", "CRM", "SHOP", "SQ", "PYPL",

    # Semiconductors & Chips
    "MU", "QCOM", "AVGO", "TXN", "SMH", "ON", "ASML", "LRCX", "KLAC",

    # Financials & Banks
    "JPM", "BAC", "WFC", "GS", "MS", "C", "SCHW", "AXP", "COF", "XLF", "BLK",

    # Energy & Oil
    "XOM", "CVX", "COP", "SLB", "XLE", "OXY", "BP", "MPC", "PSX", "HAL",

    # Healthcare & Pharma
    "JNJ", "PFE", "MRK", "UNH", "LLY", "ABBV", "BMY", "AMGN", "VRTX", "GILD",
    "XLV", "CVS", "CI",

    # Consumer Discretionary
    "MCD", "SBUX", "NKE", "DIS", "KO", "PEP", "PG", "COST", "WMT", "TGT",
    "HD", "LOW", "XLY", "XLP",

    # Industrials & Transport
    "CAT", "BA", "GE", "HON", "MMM", "UPS", "FDX", "DE", "NOC", "LMT", "RTX",
    "XLI", "DAL", "UAL", "AAL", "CSX", "UNP",

    # Utilities & Real Estate
    "XLU", "XLRE", "NEE", "DUK", "SO", "PLD", "AMT",

    # Materials & Mining
    "XLB", "FCX", "NEM", "LIN", "APD", "DD",

    # Telecom & Communication
    "XLC", "TMUS", "VZ", "T", "CHTR", "CMCSA",

    # Meme & High Retail Flow
    "GME", "AMC", "BBBYQ", "HOOD", "RIVN", "LCID", "PLTR", "SNAP", "ROKU", "TWLO",

    # ARK & Growth ETFs
    "ARKK", "ARKG", "ARKF", "HYG", "LQD", "SMH", "XBI", "KRE", "TLT", "IEF",

    # Commodities ETFs
    "GLD", "SLV", "USO", "UNG", "DBC", "XME", "URA",

    # Volatility Products
    "VIX", "VXX", "UVXY", "SVXY",

    # Other popular movers
    "SHOP", "ZM", "DOCU", "UBER", "LYFT", "ABNB", "ETSY", "BIDU", "BABA", "JD", "PDD",
    "RIO", "SHEL", "RBLX", "COIN", "MARA", "RIOT"]

    all_options_data = []
    for ticker_symbol in underlying:
        print(f"Fetching data for {ticker_symbol}") 
        ticker = yf.Ticker(ticker_symbol)
        expirations = ticker.options
        for exp in expirations:
            options_chain = ticker.option_chain(exp)
            
        if options_chain.calls is not None:
            options_chain.calls['contract_type'] = 'call'
            options_chain.calls['underlying_ticker'] = ticker_symbol
            options_chain.calls['expiration'] = exp
            all_options_data.append(options_chain.calls)
            
        if options_chain.puts is not None:
                options_chain.puts['contract_type'] = 'put'
                options_chain.puts['underlying_ticker'] = ticker_symbol
                options_chain.puts['expiration'] = exp
                all_options_data.append(options_chain.puts)
    
    df = pd.concat(all_options_data,ignore_index=True)
    df.rename(columns={'strike':'strike_price', 'lastPrice':'last_price'},inplace=True)
    return df



In [14]:
df = get_european_options_data()

Fetching data for SPY
Fetching data for QQQ
Fetching data for IWM
Fetching data for DIA
Fetching data for EFA
Fetching data for EEM
Fetching data for VTI
Fetching data for VOO
Fetching data for VEA
Fetching data for VWO
Fetching data for AAPL
Fetching data for MSFT
Fetching data for AMZN
Fetching data for GOOGL
Fetching data for META
Fetching data for TSLA
Fetching data for NVDA
Fetching data for NFLX
Fetching data for AMD
Fetching data for INTC
Fetching data for ORCL
Fetching data for CSCO
Fetching data for IBM
Fetching data for ADBE
Fetching data for CRM
Fetching data for SHOP
Fetching data for SQ
Fetching data for PYPL
Fetching data for MU
Fetching data for QCOM
Fetching data for AVGO
Fetching data for TXN
Fetching data for SMH
Fetching data for ON
Fetching data for ASML
Fetching data for LRCX
Fetching data for KLAC
Fetching data for JPM
Fetching data for BAC
Fetching data for WFC
Fetching data for GS
Fetching data for MS
Fetching data for C
Fetching data for SCHW
Fetching data for 

In [10]:
df.head(10)

Unnamed: 0,contractSymbol,lastTradeDate,strike_price,last_price,bid,ask,change,percentChange,volume,openInterest,impliedVolatility,inTheMoney,contractSize,currency,contract_type,underlying_ticker,expiration
0,SPY271217C00200000,2025-08-25 15:09:53+00:00,200.0,451.5,450.0,452.5,-1.0,-0.220994,2.0,311,0.526174,True,REGULAR,USD,call,SPY,2027-12-17
1,SPY271217C00205000,2025-08-22 16:05:27+00:00,205.0,447.62,443.52,448.0,0.0,0.0,1.0,5,0.508824,True,REGULAR,USD,call,SPY,2027-12-17
2,SPY271217C00210000,2025-08-19 14:59:10+00:00,210.0,439.81,439.5,443.96,0.0,0.0,99.0,108,0.512364,True,REGULAR,USD,call,SPY,2027-12-17
3,SPY271217C00215000,2025-07-18 14:34:00+00:00,215.0,423.44,436.5,441.0,0.0,0.0,1.0,3,0.528111,True,REGULAR,USD,call,SPY,2027-12-17
4,SPY271217C00220000,2025-08-13 19:52:32+00:00,220.0,434.0,430.5,434.5,0.0,0.0,10.0,25,0.502691,True,REGULAR,USD,call,SPY,2027-12-17
5,SPY271217C00225000,2025-04-14 17:53:14+00:00,225.0,327.0,370.75,375.5,0.0,0.0,,1,1e-05,True,REGULAR,USD,call,SPY,2027-12-17
6,SPY271217C00230000,2025-04-10 14:47:27+00:00,230.0,314.0,344.0,348.94,0.0,0.0,2.0,3,1e-05,True,REGULAR,USD,call,SPY,2027-12-17
7,SPY271217C00235000,2025-02-03 14:55:35+00:00,235.0,373.97,355.0,359.79,0.0,0.0,,1,1e-05,True,REGULAR,USD,call,SPY,2027-12-17
8,SPY271217C00240000,2025-04-07 13:53:40+00:00,240.0,267.91,0.0,0.0,0.0,0.0,2.0,0,1e-05,True,REGULAR,USD,call,SPY,2027-12-17
9,SPY271217C00250000,2025-08-14 19:47:16+00:00,250.0,407.34,403.15,408.0,0.0,0.0,1.0,27,0.504842,True,REGULAR,USD,call,SPY,2027-12-17


In [15]:
df.shape

(12024, 17)

In [11]:
count_le_0 = (df['impliedVolatility'] < 0).sum()       # strictly less than 0
count_eq_0 = (df['impliedVolatility'] == 0).sum()      # exactly equal to 0
count_gt_3 = (df['impliedVolatility'] > 3).sum()       # strictly greater than 3
count = (df['impliedVolatility']).sum() - count_le_0 - count_eq_0 - count_gt_3

print("<= 0:", count_le_0 + count_eq_0)   # if you want combined <= 0
print("= 0 :", count_eq_0)
print("> 3 :", count_gt_3)
print("Rest :", count)

<= 0: 0
= 0 : 0
> 3 : 0
Rest : 67.23878082660676


In [None]:
print(df)

         contractSymbol             lastTradeDate  strike_price  last_price  \
0    SPY271217C00200000 2025-08-22 16:05:27+00:00         200.0      452.50   
1    SPY271217C00205000 2025-08-22 16:05:27+00:00         205.0      447.62   
2    SPY271217C00210000 2025-08-19 14:59:10+00:00         210.0      439.81   
3    SPY271217C00215000 2025-07-18 14:34:00+00:00         215.0      423.44   
4    SPY271217C00220000 2025-08-13 19:52:31+00:00         220.0      434.00   
..                  ...                       ...           ...         ...   
283  SPY271217P00910000 2025-06-25 19:40:36+00:00         910.0      303.38   
284  SPY271217P00915000 2025-08-22 20:10:39+00:00         915.0      270.06   
285  SPY271217P00920000 2025-08-22 19:29:10+00:00         920.0      272.50   
286  SPY271217P00935000 2025-07-08 13:35:47+00:00         935.0      312.10   
287  SPY271217P01000000 2025-08-13 13:55:07+00:00        1000.0      355.06   

        bid     ask     change  percentChange  volu

In [None]:
df.columns

Index(['contractSymbol', 'lastTradeDate', 'strike_price', 'last_price', 'bid',
       'ask', 'change', 'percentChange', 'volume', 'openInterest',
       'impliedVolatility', 'inTheMoney', 'contractSize', 'currency',
       'contract_type', 'underlying_ticker', 'expiration'],
      dtype='object')