In [22]:
# Import required libraries
import os
import json
import time
import datetime as dt
import csv
import pathlib
from typing import Dict, List
import requests
import pandas as pd
from bs4 import BeautifulSoup
from dotenv import load_dotenv

# Setup data directories
DATA_RAW = pathlib.Path("../data/raw")
DATA_RAW.mkdir(parents=True, exist_ok=True)

print(f"Data directory: {DATA_RAW.absolute()}")

# Load environment variables
load_dotenv()
ALPHA_KEY = os.getenv("ALPHAVANTAGE_API_KEY")
print("Loaded ALPHAVANTAGE_API_KEY?", bool(ALPHA_KEY))



Data directory: /Users/aman/Desktop/Bootcamp/bootcamp_aman_dhillon/project/notebooks/../data/raw
Loaded ALPHAVANTAGE_API_KEY? True


In [23]:
# Simple SPY stock data extraction with adjusted close
import requests
import pandas as pd
from pathlib import Path
import os
from dotenv import load_dotenv
from datetime import datetime

# Load API key
load_dotenv()
POLYGON_KEY = os.getenv("POLYGON_API_KEY")

# Setup
SYMBOL = "SPY"
DATA_RAW = Path("../data/raw")
DATA_RAW.mkdir(parents=True, exist_ok=True)

# Get 1 year of stock data
end_date = datetime.now().strftime("%Y-%m-%d")
start_date = (datetime.now() - pd.DateOffset(years=1)).strftime("%Y-%m-%d")

url = f"https://api.polygon.io/v2/aggs/ticker/{SYMBOL}/range/1/day/{start_date}/{end_date}"
params = {"apiKey": POLYGON_KEY}

try:
    response = requests.get(url, params=params, timeout=30)
    response.raise_for_status()
    data = response.json()
    
    if data.get("status") in ["OK", "DELAYED"] and data.get("results"):
        df = pd.DataFrame(data["results"])
        df['date'] = pd.to_datetime(df['t'], unit='ms')
        
        # Include adjusted close (vw = volume weighted average price)
        df = df[['date', 'o', 'h', 'l', 'c', 'v', 'vw']].rename(columns={
            'o': 'open', 'h': 'high', 'l': 'low', 'c': 'close', 'v': 'volume', 'vw': 'adj_close'
        })
        
        # Save data
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        filename = f"spy_stock_1year_{timestamp}.csv"
        filepath = DATA_RAW / filename
        df.to_csv(filepath, index=False)
        
        print(f"✅ Saved {len(df)} days of SPY data to: {filename}")
        print(f"Date range: {df['date'].min().date()} to {df['date'].max().date()}")
        print(f"Latest close: ${df['close'].iloc[-1]:.2f}")
        print(f"Latest adj_close: ${df['adj_close'].iloc[-1]:.2f}")
        
    else:
        print("❌ No data received")
        
except Exception as e:
    print(f"❌ Error: {e}")

✅ Saved 250 days of SPY data to: spy_stock_1year_20250823_202340.csv
Date range: 2024-08-23 to 2025-08-22
Latest close: $645.31
Latest adj_close: $644.08


In [24]:
print(df)

                   date    open     high     low   close      volume  \
0   2024-08-23 04:00:00  559.53  563.090  557.29  562.13  50639393.0   
1   2024-08-26 04:00:00  563.18  563.910  559.05  560.79  35788609.0   
2   2024-08-27 04:00:00  559.49  562.060  558.32  561.56  32693898.0   
3   2024-08-28 04:00:00  561.21  561.650  555.04  558.30  41066024.0   
4   2024-08-29 04:00:00  560.31  563.680  557.18  558.35  38715176.0   
..                  ...     ...      ...     ...     ...         ...   
245 2025-08-18 04:00:00  642.86  644.000  642.18  643.30  43804914.0   
246 2025-08-19 04:00:00  643.12  644.105  638.48  639.81  69750731.0   
247 2025-08-20 04:00:00  639.40  639.660  632.95  638.11  88890298.0   
248 2025-08-21 04:00:00  636.28  637.970  633.81  635.55  54805775.0   
249 2025-08-22 04:00:00  637.76  646.500  637.25  645.31  84083214.0   

     adj_close  
0     560.7801  
1     561.1744  
2     560.9928  
3     558.4289  
4     560.1538  
..         ...  
245   643.1665  

In [25]:
import yfinance as yf
import pandas as pd
from typing import List,Dict
def get_european_options_data() -> pd.DataFrame:
    underlying = ['SPY',]
    all_options_data = []
    for ticker_symbol in underlying:
        print(f"Fetching data for {ticker_symbol}") 
        ticker = yf.Ticker(ticker_symbol)
        expirations = ticker.options
        for exp in expirations:
            options_chain = ticker.option_chain(exp)
            
        if options_chain.calls is not None:
            options_chain.calls['contract_type'] = 'call'
            options_chain.calls['underlying_ticker'] = ticker_symbol
            options_chain.calls['expiration'] = exp
            all_options_data.append(options_chain.calls)
            
        if options_chain.puts is not None:
                options_chain.puts['contract_type'] = 'put'
                options_chain.puts['underlying_ticker'] = ticker_symbol
                options_chain.puts['expiration'] = exp
                all_options_data.append(options_chain.puts)
    
    df = pd.concat(all_options_data,ignore_index=True)
    df.rename(columns={'strike':'strike_price', 'lastPrice':'last_price'},inplace=True)
    return df



In [26]:
df = get_european_options_data()

Fetching data for SPY


In [27]:
print(df)

         contractSymbol             lastTradeDate  strike_price  last_price  \
0    SPY271217C00200000 2025-08-22 16:05:27+00:00         200.0      452.50   
1    SPY271217C00205000 2025-08-22 16:05:27+00:00         205.0      447.62   
2    SPY271217C00210000 2025-08-19 14:59:10+00:00         210.0      439.81   
3    SPY271217C00215000 2025-07-18 14:34:00+00:00         215.0      423.44   
4    SPY271217C00220000 2025-08-13 19:52:31+00:00         220.0      434.00   
..                  ...                       ...           ...         ...   
283  SPY271217P00910000 2025-06-25 19:40:36+00:00         910.0      303.38   
284  SPY271217P00915000 2025-08-22 20:10:39+00:00         915.0      270.06   
285  SPY271217P00920000 2025-08-22 19:29:10+00:00         920.0      272.50   
286  SPY271217P00935000 2025-07-08 13:35:47+00:00         935.0      312.10   
287  SPY271217P01000000 2025-08-13 13:55:07+00:00        1000.0      355.06   

        bid     ask     change  percentChange  volu

In [28]:
df.columns

Index(['contractSymbol', 'lastTradeDate', 'strike_price', 'last_price', 'bid',
       'ask', 'change', 'percentChange', 'volume', 'openInterest',
       'impliedVolatility', 'inTheMoney', 'contractSize', 'currency',
       'contract_type', 'underlying_ticker', 'expiration'],
      dtype='object')