In [None]:
#imports
import yfinance as yf
import pandas as pd
import numpy as np
import os
from datetime import datetime, timedelta

In [None]:
from pathlib import Path

# project root = parent of notebooks folder
PROJECT_ROOT = Path(os.getcwd()).parent
DATA_DIR = PROJECT_ROOT / "data"

DATA_DIR


PosixPath('/Users/aadihuria/Projects/options-trading-ml-project/data')

In [None]:
if DATA_DIR.exists() and DATA_DIR.is_file():
    DATA_DIR.unlink()  # deletes the file named "data"

DATA_DIR.mkdir(parents=True, exist_ok=True)
DATA_DIR.exists(), DATA_DIR.is_dir(), DATA_DIR.is_file()


(True, True, False)

In [None]:
#download stock price data
ticker = "SPY"

stock = yf.download(
    ticker,
    start="2015-01-01",
    end="2025-01-01",
    progress=False
)

# Flatten columns if Yahoo returns MultiIndex
if isinstance(stock.columns, pd.MultiIndex):
    stock.columns = stock.columns.get_level_values(0)

stock.to_csv(DATA_DIR / "stock_prices.csv")
(DATA_DIR / "stock_prices.csv").exists()


True

In [None]:
#load option expiration dates
opt = yf.Ticker(ticker)
expirations = opt.options

expirations[:10]


('2025-12-29',
 '2025-12-30',
 '2025-12-31',
 '2026-01-02',
 '2026-01-05',
 '2026-01-06',
 '2026-01-07',
 '2026-01-08',
 '2026-01-09',
 '2026-01-16')

In [None]:
#choosing expiration closet to 30 days
today = datetime.now().date()
target_date = today + timedelta(days=30)

exp_dates = [datetime.strptime(e, "%Y-%m-%d").date() for e in expirations]

chosen_exp = min(exp_dates, key=lambda x: abs(x - target_date))
chosen_exp


datetime.date(2026, 1, 30)

In [None]:
#download option chain
chain = opt.option_chain(chosen_exp.strftime("%Y-%m-%d"))

calls = chain.calls
puts = chain.puts

calls.to_csv("../data/calls_raw.csv", index=False)
puts.to_csv("../data/puts_raw.csv", index=False)

calls.head()


Unnamed: 0,contractSymbol,lastTradeDate,strike,lastPrice,bid,ask,change,percentChange,volume,openInterest,impliedVolatility,inTheMoney,contractSize,currency
0,SPY260130C00415000,2025-12-22 15:19:13+00:00,415.0,271.88,272.24,275.63,0.0,0.0,,2,0.963379,True,REGULAR,USD
1,SPY260130C00420000,2025-12-18 14:52:08+00:00,420.0,257.27,267.21,270.45,0.0,0.0,1.0,2,0.936036,True,REGULAR,USD
2,SPY260130C00425000,2025-12-24 14:53:41+00:00,425.0,265.47,262.24,265.67,0.0,0.0,,1,0.925416,True,REGULAR,USD
3,SPY260130C00435000,2025-10-10 19:12:24+00:00,435.0,227.0,239.15,242.05,0.0,0.0,,0,1e-05,True,REGULAR,USD
4,SPY260130C00450000,2025-12-26 16:58:50+00:00,450.0,242.04,237.35,240.77,0.0,0.0,2.0,50,0.837038,True,REGULAR,USD


In [None]:
#add spot price
spot_price = stock["Close"].iloc[-1]

calls["spot"] = spot_price
puts["spot"] = spot_price

spot_price


np.float64(579.2774047851562)

In [None]:
#select atm call and put
atm_call = calls.iloc[(calls["strike"] - spot_price).abs().argsort()[:1]]
atm_put  = puts.iloc[(puts["strike"] - spot_price).abs().argsort()[:1]]

atm_call, atm_put


(        contractSymbol             lastTradeDate  strike  lastPrice     bid  \
 36  SPY260130C00579000 2025-12-26 17:25:41+00:00   579.0     113.25  109.21   
 
        ask  change  percentChange  volume  openInterest  impliedVolatility  \
 36  112.44     0.0            0.0     1.0             0           0.473089   
 
     inTheMoney contractSize currency        spot  
 36        True      REGULAR      USD  579.277405  ,
         contractSymbol             lastTradeDate  strike  lastPrice   bid  \
 39  SPY260130P00579000 2025-12-26 18:45:43+00:00   579.0       0.45  0.41   
 
      ask  change  percentChange  volume  openInterest  impliedVolatility  \
 39  0.42     0.0            0.0     2.0           456           0.279304   
 
     inTheMoney contractSize currency        spot  
 39       False      REGULAR      USD  579.277405  )

In [None]:
#save atm options
atm_call.to_csv("../data/atm_call.csv", index=False)
atm_put.to_csv("../data/atm_put.csv", index=False)
