In [59]:
import yfinance as yf
from datetime import datetime
import pandas as pd
from typing import Tuple
from concurrent.futures import ThreadPoolExecutor, as_completed

In [60]:
def get_option_chains_all(ticker: str,
                                  max_workers: int = 8) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """
    Fetches option chains (calls and puts) for every available expiry of a given ticker,
    performing API requests in parallel to reduce total fetch time.

    Parameters
    ----------
    ticker : str
        Stock ticker symbol (e.g., 'AAPL').
    max_workers : int, optional
        Maximum number of threads to use for concurrent fetching (default is 8).

    Returns
    -------
    Tuple[pd.DataFrame, pd.DataFrame]
        - calls_df: DataFrame containing all calls across expiries, with added columns:
            * 'option_type' = 'call'
            * 'expiration'  = expiry date string 'YYYY-MM-DD'
            * 'TTM'         = time to maturity in years
        - puts_df: DataFrame containing all puts with the same added columns.
    """
    stock = yf.Ticker(ticker)
    expiries = stock.options  # list of expiry date strings
    today = datetime.now().date()

    calls_accum = []
    puts_accum  = []

    def fetch_chain(expiry: str):
        """Fetch calls/puts for a single expiry and return (expiry, calls_df, puts_df)."""
        try:
            chain = stock.option_chain(expiry)
            calls = chain.calls.copy()
            puts  = chain.puts.copy()
        except Exception as e:
            # Return None on error so we can skip later
            return expiry, None, None

        # Tag each row with type and expiration
        calls['option_type']  = 'call'
        puts ['option_type']  = 'put'
        calls['expiration']   = expiry
        puts ['expiration']   = expiry

        # Compute time-to-maturity once
        exp_date = datetime.strptime(expiry, "%Y-%m-%d").date()
        ttm = max((exp_date - today).days / 365.0, 0.0)
        calls['TTM'] = ttm
        puts ['TTM'] = ttm

        return expiry, calls, puts

    # Fetch in parallel
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = [executor.submit(fetch_chain, exp) for exp in expiries]
        for future in as_completed(futures):
            expiry, calls_df, puts_df = future.result()
            if calls_df is not None and not calls_df.empty:
                calls_accum.append(calls_df)
            if puts_df  is not None and not puts_df.empty:
                puts_accum.append(puts_df)

    # Concatenate results
    all_calls = pd.concat(calls_accum, ignore_index=True) if calls_accum else pd.DataFrame()
    all_puts  = pd.concat(puts_accum,  ignore_index=True) if puts_accum  else pd.DataFrame()

    return all_calls, all_puts


In [61]:
calls, puts = get_option_chains_all("AAPL")

In [62]:
calls

Unnamed: 0,contractSymbol,lastTradeDate,strike,lastPrice,bid,ask,change,percentChange,volume,openInterest,impliedVolatility,inTheMoney,contractSize,currency,option_type,expiration,TTM
0,AAPL251107C00110000,2025-10-31 14:00:20+00:00,110.0,160.35,160.00,161.35,11.920013,8.030731,10.0,6,3.142580,True,REGULAR,USD,call,2025-11-07,0.013699
1,AAPL251107C00120000,2025-10-31 15:33:07+00:00,120.0,151.10,150.05,151.35,14.820007,10.874675,15.0,1,2.890628,True,REGULAR,USD,call,2025-11-07,0.013699
2,AAPL251107C00140000,2025-10-31 13:56:39+00:00,140.0,130.55,130.00,131.35,14.250000,12.252794,5.0,1,2.359379,True,REGULAR,USD,call,2025-11-07,0.013699
3,AAPL251107C00145000,2025-10-24 15:06:06+00:00,145.0,131.95,124.95,126.40,14.879997,12.710341,2.0,1,2.244145,True,REGULAR,USD,call,2025-11-07,0.013699
4,AAPL251107C00150000,2025-10-31 18:07:29+00:00,150.0,121.85,120.00,121.35,0.360001,0.296321,13.0,24,2.134770,True,REGULAR,USD,call,2025-11-07,0.013699
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1005,AAPL261218C00420000,2025-10-30 19:59:21+00:00,420.0,2.82,2.50,2.64,0.150000,5.617972,1.0,454,0.271064,False,REGULAR,USD,call,2026-12-18,1.126027
1006,AAPL261218C00430000,2025-10-30 13:31:37+00:00,430.0,2.38,2.11,2.23,0.000000,0.000000,1.0,324,0.271614,False,REGULAR,USD,call,2026-12-18,1.126027
1007,AAPL261218C00440000,2025-10-28 16:12:40+00:00,440.0,1.76,1.80,1.90,0.000000,0.000000,2.0,106,0.272651,False,REGULAR,USD,call,2026-12-18,1.126027
1008,AAPL261218C00450000,2025-10-31 19:07:37+00:00,450.0,1.61,1.54,1.65,-0.040000,-2.424240,24.0,1927,0.274604,False,REGULAR,USD,call,2026-12-18,1.126027


In [63]:
puts

Unnamed: 0,contractSymbol,lastTradeDate,strike,lastPrice,bid,ask,change,percentChange,volume,openInterest,impliedVolatility,inTheMoney,contractSize,currency,option_type,expiration,TTM
0,AAPL251107P00110000,2025-10-16 18:43:47+00:00,110.0,0.02,0.00,0.01,0.0,0.0,2.0,27,2.125005,False,REGULAR,USD,put,2025-11-07,0.013699
1,AAPL251107P00125000,2025-10-23 14:44:51+00:00,125.0,0.01,0.00,0.01,0.0,0.0,100.0,136,1.812501,False,REGULAR,USD,put,2025-11-07,0.013699
2,AAPL251107P00130000,2025-10-20 17:40:07+00:00,130.0,0.01,0.00,0.01,0.0,0.0,,1,1.718751,False,REGULAR,USD,put,2025-11-07,0.013699
3,AAPL251107P00135000,2025-10-24 17:49:14+00:00,135.0,0.01,0.00,0.01,0.0,0.0,128.0,119,1.625002,False,REGULAR,USD,put,2025-11-07,0.013699
4,AAPL251107P00140000,2025-10-24 19:52:48+00:00,140.0,0.01,0.00,0.20,0.0,0.0,30.0,945,2.046880,False,REGULAR,USD,put,2025-11-07,0.013699
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
933,AAPL261218P00390000,2025-08-07 13:34:41+00:00,390.0,172.00,149.35,151.70,0.0,0.0,,0,0.559300,True,REGULAR,USD,put,2026-12-18,1.126027
934,AAPL261218P00400000,2025-09-22 14:47:29+00:00,400.0,147.00,128.50,130.80,0.0,0.0,20.0,0,0.211617,True,REGULAR,USD,put,2026-12-18,1.126027
935,AAPL261218P00410000,2025-08-08 15:57:51+00:00,410.0,184.00,169.35,171.70,0.0,0.0,20.0,0,0.591084,True,REGULAR,USD,put,2026-12-18,1.126027
936,AAPL261218P00440000,2025-01-03 15:21:54+00:00,440.0,197.30,0.00,0.00,0.0,0.0,4.0,0,0.000010,True,REGULAR,USD,put,2026-12-18,1.126027


In [64]:
calls.describe()

Unnamed: 0,strike,lastPrice,bid,ask,change,percentChange,volume,openInterest,impliedVolatility,TTM
count,1010.0,1010.0,1010.0,1010.0,1010.0,1010.0,998.0,1010.0,1010.0,1010.0
mean,218.418317,77.462713,78.106515,79.596406,0.556624,-3.835492,665.815631,3071.575248,0.550852,0.753726
std,104.948095,68.083842,70.563517,71.623083,3.103591,13.521139,3412.941106,7337.074586,0.527113,0.702951
min,5.0,0.01,0.0,0.0,-2.769999,-81.333336,1.0,0.0,1e-05,0.013699
25%,140.0,12.955,11.7875,12.375,-0.18,-1.925982,2.0,100.0,0.281086,0.128767
50%,220.0,64.885,61.825,63.05,0.0,0.0,16.0,527.0,0.398939,0.531507
75%,290.0,126.705,131.2,133.9875,0.1275,0.402787,114.75,2400.0,0.612858,1.183562
max,500.0,268.42,264.1,268.0,32.100006,28.139534,43140.0,87750.0,6.363283,2.219178


In [65]:
def initial_dataset_clean(dataset : pd.DataFrame):
    # Removing columns that will not be used for training or filtering
    cols_to_drop = ["contractSymbol", "lastTradeDate", "change", "percentChange", "expiration", "inTheMoney"]
    dataset.drop(columns=[c for c in cols_to_drop if c in dataset.columns], inplace=True)

    # Removing NA values for stale options
    dataset.dropna(axis=0,subset=["volume"], inplace=True)

    
    return dataset

In [66]:
calls_clean = initial_dataset_clean(calls)
calls_clean

Unnamed: 0,strike,lastPrice,bid,ask,volume,openInterest,impliedVolatility,contractSize,currency,option_type,TTM
0,110.0,160.35,160.00,161.35,10.0,6,3.142580,REGULAR,USD,call,0.013699
1,120.0,151.10,150.05,151.35,15.0,1,2.890628,REGULAR,USD,call,0.013699
2,140.0,130.55,130.00,131.35,5.0,1,2.359379,REGULAR,USD,call,0.013699
3,145.0,131.95,124.95,126.40,2.0,1,2.244145,REGULAR,USD,call,0.013699
4,150.0,121.85,120.00,121.35,13.0,24,2.134770,REGULAR,USD,call,0.013699
...,...,...,...,...,...,...,...,...,...,...,...
1005,420.0,2.82,2.50,2.64,1.0,454,0.271064,REGULAR,USD,call,1.126027
1006,430.0,2.38,2.11,2.23,1.0,324,0.271614,REGULAR,USD,call,1.126027
1007,440.0,1.76,1.80,1.90,2.0,106,0.272651,REGULAR,USD,call,1.126027
1008,450.0,1.61,1.54,1.65,24.0,1927,0.274604,REGULAR,USD,call,1.126027


In [67]:
calls_clean.describe()

Unnamed: 0,strike,lastPrice,bid,ask,volume,openInterest,impliedVolatility,TTM
count,998.0,998.0,998.0,998.0,998.0,998.0,998.0,998.0
mean,219.706914,76.754539,77.496022,78.978527,665.815631,3108.360721,0.547305,0.756052
std,104.753833,67.96564,70.265856,71.322579,3412.941106,7373.369719,0.524761,0.704266
min,5.0,0.01,0.0,0.0,1.0,0.0,1e-05,0.013699
25%,140.0,12.3625,11.675,12.2375,2.0,105.0,0.28088,0.128767
50%,220.0,63.295,60.95,62.2,16.0,550.5,0.39472,0.531507
75%,295.0,126.4,130.4125,133.0,114.75,2444.75,0.603687,1.20274
max,500.0,268.42,264.1,268.0,43140.0,87750.0,6.363283,2.219178


In [68]:
puts_clean = initial_dataset_clean(puts)
puts_clean

Unnamed: 0,strike,lastPrice,bid,ask,volume,openInterest,impliedVolatility,contractSize,currency,option_type,TTM
0,110.0,0.02,0.00,0.01,2.0,27,2.125005,REGULAR,USD,put,0.013699
1,125.0,0.01,0.00,0.01,100.0,136,1.812501,REGULAR,USD,put,0.013699
3,135.0,0.01,0.00,0.01,128.0,119,1.625002,REGULAR,USD,put,0.013699
4,140.0,0.01,0.00,0.20,30.0,945,2.046880,REGULAR,USD,put,0.013699
5,145.0,0.02,0.00,0.03,1.0,7,1.609377,REGULAR,USD,put,0.013699
...,...,...,...,...,...,...,...,...,...,...,...
932,380.0,153.50,139.35,141.70,20.0,0,0.542332,REGULAR,USD,put,1.126027
934,400.0,147.00,128.50,130.80,20.0,0,0.211617,REGULAR,USD,put,1.126027
935,410.0,184.00,169.35,171.70,20.0,0,0.591084,REGULAR,USD,put,1.126027
936,440.0,197.30,0.00,0.00,4.0,0,0.000010,REGULAR,USD,put,1.126027


In [69]:
r = .5