**Imports**

In [283]:
import yfinance as yf
from datetime import datetime
import pandas as pd
from typing import Tuple
from concurrent.futures import ThreadPoolExecutor, as_completed
import numpy as np
import math
import QuantLib as ql

**Define price fetcher**

In [284]:
def get_spot_price(ticker):
    """
    Fetches the current spot price for a stock ticker. Falls back to the most recent
    close if a live price is not available.

    Parameters
    ----------
    ticker : str
        Stock ticker symbol (e.g., 'AAPL').

    Returns
    -------
    float or None
        Spot price (live if available, else last close). Returns None if unavailable.
    """

    try:
        tk = yf.Ticker(ticker)
        
        # Attempt to fetch live price
        live_price = tk.fast_info.get("last_price", None)
        if live_price and live_price > 0:
            return live_price

        # Fallback: most recent close
        hist = tk.history(period="1d")
        if not hist.empty:
            fallback_price = hist["Close"].iloc[-1]
            print(f"[{ticker}] Live price unavailable — using last close: {fallback_price:.2f}")
            return fallback_price

        print(f"[{ticker}] No live or historical data available.")
        return None

    except Exception as e:
        print(f"[{ticker}] Spot price fetch failed: {e}")
        return None

**Define Option Chain Fetcher**

In [285]:
def get_option_chains_all(ticker: str,
                                  max_workers: int = 8) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """
    Fetches option chains (calls and puts) for every available expiry of a given ticker,
    performing API requests in parallel to reduce total fetch time.

    Parameters
    ----------
    ticker : str
        Stock ticker symbol (e.g., 'AAPL').
    max_workers : int, optional
        Maximum number of threads to use for concurrent fetching (default is 8).

    Returns
    -------
    Tuple[pd.DataFrame, pd.DataFrame]
        - calls_df: DataFrame containing all calls across expiries, with added columns:
            * 'option_type' = 'call'
            * 'expiration'  = expiry date string 'YYYY-MM-DD'
            * 'TTM'         = time to maturity in years
        - puts_df: DataFrame containing all puts with the same added columns.
    """
    stock = yf.Ticker(ticker)
    expiries = stock.options  # list of expiry date strings
    today = datetime.now().date()

    calls_accum = []
    puts_accum  = []

    def fetch_chain(expiry: str):
        """Fetch calls/puts for a single expiry and return (expiry, calls_df, puts_df)."""
        try:
            chain = stock.option_chain(expiry)
            calls = chain.calls.copy()
            puts  = chain.puts.copy()
        except Exception as e:
            # Return None on error so we can skip later
            return expiry, None, None

        # Tag each row with type and expiration
        calls['option_type']  = 'call'
        puts ['option_type']  = 'put'
        calls['expiration']   = expiry
        puts ['expiration']   = expiry

        # Compute time-to-maturity once
        exp_date = datetime.strptime(expiry, "%Y-%m-%d").date()
        ttm = max((exp_date - today).days / 365.0, 0.0)
        calls['TTM'] = ttm
        puts ['TTM'] = ttm

        return expiry, calls, puts

    # Fetch in parallel
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = [executor.submit(fetch_chain, exp) for exp in expiries]
        for future in as_completed(futures):
            expiry, calls_df, puts_df = future.result()
            if calls_df is not None and not calls_df.empty:
                calls_accum.append(calls_df)
            if puts_df  is not None and not puts_df.empty:
                puts_accum.append(puts_df)

    # Concatenate results
    all_calls = pd.concat(calls_accum, ignore_index=True) if calls_accum else pd.DataFrame()
    all_puts  = pd.concat(puts_accum,  ignore_index=True) if puts_accum  else pd.DataFrame()

    # Fetch dividend yield for the company
    dividendYield = stock.info.get("dividendYield")/100 # percentages on decimal basis
    all_calls["dividendYield"] = dividendYield
    all_puts["dividendYield"] = dividendYield

    all_calls["ticker"] = ticker
    all_puts["ticker"] = ticker

    spot_price = get_spot_price(ticker)
    all_calls["spot_price"] = spot_price
    all_puts["spot_price"] = spot_price

    return all_calls, all_puts


Get option chain and dividend yield

In [286]:
calls, puts = get_option_chains_all("JPM")

[JPM] Live price unavailable — using last close: 313.49


In [287]:
calls

Unnamed: 0,contractSymbol,lastTradeDate,strike,lastPrice,bid,ask,change,percentChange,volume,openInterest,impliedVolatility,inTheMoney,contractSize,currency,option_type,expiration,TTM,dividendYield,ticker,spot_price
0,JPM251128C00180000,2025-11-03 18:02:15+00:00,180.0,130.43,132.20,136.15,0.00,0.000000,20.0,10,1.166996,True,REGULAR,USD,call,2025-11-28,0.057534,0.0191,JPM,313.494995
1,JPM251128C00185000,2025-10-22 16:30:49+00:00,185.0,107.25,127.25,130.85,0.00,0.000000,,1,1.080083,True,REGULAR,USD,call,2025-11-28,0.057534,0.0191,JPM,313.494995
2,JPM251128C00225000,2025-10-10 18:50:55+00:00,225.0,79.80,87.40,90.50,0.00,0.000000,,1,0.697269,True,REGULAR,USD,call,2025-11-28,0.057534,0.0191,JPM,313.494995
3,JPM251128C00230000,2025-10-14 13:35:01+00:00,230.0,71.05,82.40,85.85,0.00,0.000000,,0,0.695316,True,REGULAR,USD,call,2025-11-28,0.057534,0.0191,JPM,313.494995
4,JPM251128C00250000,2025-10-20 17:42:50+00:00,250.0,54.50,62.50,66.20,0.00,0.000000,2.0,2,0.567387,True,REGULAR,USD,call,2025-11-28,0.057534,0.0191,JPM,313.494995
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
879,JPM280121C00430000,2025-10-21 19:58:31+00:00,430.0,9.85,13.70,15.10,0.00,0.000000,1.0,2,0.251716,False,REGULAR,USD,call,2028-01-21,2.205479,0.0191,JPM,313.494995
880,JPM280121C00440000,2025-10-31 16:59:23+00:00,440.0,11.60,11.75,13.45,0.00,0.000000,4.0,14,0.250259,False,REGULAR,USD,call,2028-01-21,2.205479,0.0191,JPM,313.494995
881,JPM280121C00450000,2025-10-30 17:44:38+00:00,450.0,9.90,10.50,12.45,0.00,0.000000,4.0,17,0.252434,False,REGULAR,USD,call,2028-01-21,2.205479,0.0191,JPM,313.494995
882,JPM280121C00460000,2025-10-22 16:30:37+00:00,460.0,6.03,9.85,10.45,0.00,0.000000,2.0,6,0.246193,False,REGULAR,USD,call,2028-01-21,2.205479,0.0191,JPM,313.494995


In [288]:
puts

Unnamed: 0,contractSymbol,lastTradeDate,strike,lastPrice,bid,ask,change,percentChange,volume,openInterest,impliedVolatility,inTheMoney,contractSize,currency,option_type,expiration,TTM,dividendYield,ticker,spot_price
0,JPM251128P00175000,2025-10-22 19:47:28+00:00,175.0,0.03,0.00,2.13,0.000000,0.000000,,11,1.317875,False,REGULAR,USD,put,2025-11-28,0.057534,0.0191,JPM,313.494995
1,JPM251128P00220000,2025-10-17 14:28:32+00:00,220.0,0.39,0.00,2.14,0.000000,0.000000,3.0,2,0.860597,False,REGULAR,USD,put,2025-11-28,0.057534,0.0191,JPM,313.494995
2,JPM251128P00235000,2025-10-23 18:37:04+00:00,235.0,0.48,0.00,2.17,0.000000,0.000000,,13,0.727054,False,REGULAR,USD,put,2025-11-28,0.057534,0.0191,JPM,313.494995
3,JPM251128P00240000,2025-10-17 19:24:44+00:00,240.0,0.60,0.00,2.18,0.000000,0.000000,10.0,11,0.683841,False,REGULAR,USD,put,2025-11-28,0.057534,0.0191,JPM,313.494995
4,JPM251128P00245000,2025-10-27 19:48:25+00:00,245.0,0.29,0.00,0.88,0.000000,0.000000,10.0,82,0.538091,False,REGULAR,USD,put,2025-11-28,0.057534,0.0191,JPM,313.494995
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
778,JPM280121P00400000,2025-11-07 19:10:43+00:00,400.0,95.10,92.00,96.00,-3.849998,-3.890852,10.0,3,0.182168,True,REGULAR,USD,put,2028-01-21,2.205479,0.0191,JPM,313.494995
779,JPM280121P00410000,2025-09-29 16:42:36+00:00,410.0,99.85,100.00,104.00,0.000000,0.000000,,1,0.177468,True,REGULAR,USD,put,2028-01-21,2.205479,0.0191,JPM,313.494995
780,JPM280121P00420000,2025-10-09 18:48:52+00:00,420.0,116.05,108.75,112.00,0.000000,0.000000,,2,0.170037,True,REGULAR,USD,put,2028-01-21,2.205479,0.0191,JPM,313.494995
781,JPM280121P00460000,2025-09-30 15:35:29+00:00,460.0,149.81,144.00,148.50,0.000000,0.000000,,0,0.161080,True,REGULAR,USD,put,2028-01-21,2.205479,0.0191,JPM,313.494995


In [289]:
calls.describe()

Unnamed: 0,strike,lastPrice,bid,ask,change,percentChange,volume,openInterest,impliedVolatility,TTM,dividendYield,spot_price
count,884.0,884.0,884.0,884.0,884.0,884.0,837.0,884.0,884.0,884.0,884.0,884.0
mean,279.736991,59.781833,61.372285,63.410181,-0.095419,-0.074743,58.354839,436.348416,0.431763,0.635917,0.0191,313.494995
std,94.121347,59.334156,61.290232,62.445407,1.310191,32.013839,309.940926,993.66058,0.346565,0.652016,0.0,0.0
min,65.0,0.01,0.0,0.0,-5.450005,-97.77778,1.0,0.0,1e-05,0.0,0.0191,313.494995
25%,200.0,6.9125,6.6375,7.25,0.0,0.0,2.0,12.0,0.258034,0.115068,0.0191,313.494995
50%,287.5,38.29,38.425,40.65,0.0,0.0,4.0,72.0,0.330253,0.364384,0.0191,313.494995
75%,350.0,107.34,111.3375,114.4125,0.0,0.0,20.0,349.75,0.497609,1.112329,0.0191,313.494995
max,470.0,240.2,242.45,245.95,13.110001,800.00024,5309.0,8789.0,4.437504,2.205479,0.0191,313.494995


Initial Dataset Clean

In [290]:
def initial_dataset_clean(dataset : pd.DataFrame):
    # Removing columns that will not be used for training or filtering
    #cols_to_drop = ["contractSymbol", "lastTradeDate", "change", "percentChange", "expiration", "inTheMoney"]
    #dataset.drop(columns=[c for c in cols_to_drop if c in dataset.columns], inplace=True)

    # Removing NA values for stale options
    dataset.dropna(axis=0,subset=["volume"], inplace=True)

    
    return dataset

In [291]:
calls_clean = initial_dataset_clean(calls)
calls_clean

Unnamed: 0,contractSymbol,lastTradeDate,strike,lastPrice,bid,ask,change,percentChange,volume,openInterest,impliedVolatility,inTheMoney,contractSize,currency,option_type,expiration,TTM,dividendYield,ticker,spot_price
0,JPM251128C00180000,2025-11-03 18:02:15+00:00,180.0,130.43,132.20,136.15,0.00,0.000000,20.0,10,1.166996,True,REGULAR,USD,call,2025-11-28,0.057534,0.0191,JPM,313.494995
4,JPM251128C00250000,2025-10-20 17:42:50+00:00,250.0,54.50,62.50,66.20,0.00,0.000000,2.0,2,0.567387,True,REGULAR,USD,call,2025-11-28,0.057534,0.0191,JPM,313.494995
6,JPM251128C00270000,2025-10-30 18:20:08+00:00,270.0,43.28,43.05,45.30,0.00,0.000000,2.0,2,0.490239,True,REGULAR,USD,call,2025-11-28,0.057534,0.0191,JPM,313.494995
7,JPM251128C00275000,2025-10-29 15:03:05+00:00,275.0,33.00,38.55,40.75,0.00,0.000000,1.0,6,0.475835,True,REGULAR,USD,call,2025-11-28,0.057534,0.0191,JPM,313.494995
8,JPM251128C00280000,2025-11-03 18:24:42+00:00,280.0,31.15,33.70,35.60,0.00,0.000000,2.0,4,0.419439,True,REGULAR,USD,call,2025-11-28,0.057534,0.0191,JPM,313.494995
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
879,JPM280121C00430000,2025-10-21 19:58:31+00:00,430.0,9.85,13.70,15.10,0.00,0.000000,1.0,2,0.251716,False,REGULAR,USD,call,2028-01-21,2.205479,0.0191,JPM,313.494995
880,JPM280121C00440000,2025-10-31 16:59:23+00:00,440.0,11.60,11.75,13.45,0.00,0.000000,4.0,14,0.250259,False,REGULAR,USD,call,2028-01-21,2.205479,0.0191,JPM,313.494995
881,JPM280121C00450000,2025-10-30 17:44:38+00:00,450.0,9.90,10.50,12.45,0.00,0.000000,4.0,17,0.252434,False,REGULAR,USD,call,2028-01-21,2.205479,0.0191,JPM,313.494995
882,JPM280121C00460000,2025-10-22 16:30:37+00:00,460.0,6.03,9.85,10.45,0.00,0.000000,2.0,6,0.246193,False,REGULAR,USD,call,2028-01-21,2.205479,0.0191,JPM,313.494995


In [292]:
calls_clean.describe()

Unnamed: 0,strike,lastPrice,bid,ask,change,percentChange,volume,openInterest,impliedVolatility,TTM,dividendYield,spot_price
count,837.0,837.0,837.0,837.0,837.0,837.0,837.0,837.0,837.0,837.0,837.0,837.0
mean,280.400239,59.557395,60.962605,62.977228,-0.100777,-0.07894,58.354839,460.659498,0.428877,0.644006,0.0191,313.494995
std,93.288559,59.765612,61.600593,62.765599,1.346317,32.90144,309.940926,1015.74573,0.348388,0.653501,3.471521e-18,0.0
min,65.0,0.01,0.0,0.0,-5.450005,-97.77778,1.0,0.0,1e-05,0.0,0.0191,313.494995
25%,205.0,6.8,6.65,7.3,-0.05,-0.2214,2.0,18.0,0.257942,0.115068,0.0191,313.494995
50%,290.0,37.25,37.25,39.4,0.0,0.0,4.0,82.0,0.328132,0.364384,0.0191,313.494995
75%,350.0,107.01,110.1,113.6,0.0,0.0,20.0,384.0,0.485235,1.112329,0.0191,313.494995
max,470.0,240.2,242.45,245.95,13.110001,800.00024,5309.0,8789.0,4.437504,2.205479,0.0191,313.494995


In [293]:
puts_clean = initial_dataset_clean(puts)
puts_clean

Unnamed: 0,contractSymbol,lastTradeDate,strike,lastPrice,bid,ask,change,percentChange,volume,openInterest,impliedVolatility,inTheMoney,contractSize,currency,option_type,expiration,TTM,dividendYield,ticker,spot_price
1,JPM251128P00220000,2025-10-17 14:28:32+00:00,220.0,0.39,0.00,2.14,0.000000,0.000000,3.0,2,0.860597,False,REGULAR,USD,put,2025-11-28,0.057534,0.0191,JPM,313.494995
3,JPM251128P00240000,2025-10-17 19:24:44+00:00,240.0,0.60,0.00,2.18,0.000000,0.000000,10.0,11,0.683841,False,REGULAR,USD,put,2025-11-28,0.057534,0.0191,JPM,313.494995
4,JPM251128P00245000,2025-10-27 19:48:25+00:00,245.0,0.29,0.00,0.88,0.000000,0.000000,10.0,82,0.538091,False,REGULAR,USD,put,2025-11-28,0.057534,0.0191,JPM,313.494995
5,JPM251128P00250000,2025-10-24 15:22:43+00:00,250.0,0.48,0.00,2.23,0.000000,0.000000,2.0,3,0.600590,False,REGULAR,USD,put,2025-11-28,0.057534,0.0191,JPM,313.494995
6,JPM251128P00255000,2025-10-28 19:46:32+00:00,255.0,0.41,0.00,0.70,0.000000,0.000000,27.0,66,0.505620,False,REGULAR,USD,put,2025-11-28,0.057534,0.0191,JPM,313.494995
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
774,JPM280121P00360000,2025-11-07 16:57:38+00:00,360.0,68.12,65.60,68.05,-0.649994,-0.945171,1.0,16,0.204979,True,REGULAR,USD,put,2028-01-21,2.205479,0.0191,JPM,313.494995
775,JPM280121P00370000,2025-11-07 17:20:14+00:00,370.0,75.25,71.40,74.35,-3.919998,-4.951368,11.0,3,0.198433,True,REGULAR,USD,put,2028-01-21,2.205479,0.0191,JPM,313.494995
776,JPM280121P00380000,2025-10-09 18:48:03+00:00,380.0,83.30,78.45,80.35,0.000000,0.000000,14.0,8,0.187661,True,REGULAR,USD,put,2028-01-21,2.205479,0.0191,JPM,313.494995
777,JPM280121P00390000,2025-11-07 19:10:43+00:00,390.0,88.60,84.95,88.50,-5.419998,-5.764729,10.0,4,0.188050,True,REGULAR,USD,put,2028-01-21,2.205479,0.0191,JPM,313.494995


**Join Calls and Puts**

In [294]:
df = pd.concat([puts_clean, calls_clean], ignore_index=True)
df

Unnamed: 0,contractSymbol,lastTradeDate,strike,lastPrice,bid,ask,change,percentChange,volume,openInterest,impliedVolatility,inTheMoney,contractSize,currency,option_type,expiration,TTM,dividendYield,ticker,spot_price
0,JPM251128P00220000,2025-10-17 14:28:32+00:00,220.0,0.39,0.00,2.14,0.00,0.000000,3.0,2,0.860597,False,REGULAR,USD,put,2025-11-28,0.057534,0.0191,JPM,313.494995
1,JPM251128P00240000,2025-10-17 19:24:44+00:00,240.0,0.60,0.00,2.18,0.00,0.000000,10.0,11,0.683841,False,REGULAR,USD,put,2025-11-28,0.057534,0.0191,JPM,313.494995
2,JPM251128P00245000,2025-10-27 19:48:25+00:00,245.0,0.29,0.00,0.88,0.00,0.000000,10.0,82,0.538091,False,REGULAR,USD,put,2025-11-28,0.057534,0.0191,JPM,313.494995
3,JPM251128P00250000,2025-10-24 15:22:43+00:00,250.0,0.48,0.00,2.23,0.00,0.000000,2.0,3,0.600590,False,REGULAR,USD,put,2025-11-28,0.057534,0.0191,JPM,313.494995
4,JPM251128P00255000,2025-10-28 19:46:32+00:00,255.0,0.41,0.00,0.70,0.00,0.000000,27.0,66,0.505620,False,REGULAR,USD,put,2025-11-28,0.057534,0.0191,JPM,313.494995
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1569,JPM280121C00430000,2025-10-21 19:58:31+00:00,430.0,9.85,13.70,15.10,0.00,0.000000,1.0,2,0.251716,False,REGULAR,USD,call,2028-01-21,2.205479,0.0191,JPM,313.494995
1570,JPM280121C00440000,2025-10-31 16:59:23+00:00,440.0,11.60,11.75,13.45,0.00,0.000000,4.0,14,0.250259,False,REGULAR,USD,call,2028-01-21,2.205479,0.0191,JPM,313.494995
1571,JPM280121C00450000,2025-10-30 17:44:38+00:00,450.0,9.90,10.50,12.45,0.00,0.000000,4.0,17,0.252434,False,REGULAR,USD,call,2028-01-21,2.205479,0.0191,JPM,313.494995
1572,JPM280121C00460000,2025-10-22 16:30:37+00:00,460.0,6.03,9.85,10.45,0.00,0.000000,2.0,6,0.246193,False,REGULAR,USD,call,2028-01-21,2.205479,0.0191,JPM,313.494995


In [295]:
def optionType(OPTtype):
    # Puts are type 1
    if OPTtype == "put":
        return "put"
    else:
    # Calls are type 0
        return "call"

In [296]:
df["optionType"] = df["option_type"].apply(optionType)
#df.drop(columns="option_type", inplace=True)
df

Unnamed: 0,contractSymbol,lastTradeDate,strike,lastPrice,bid,ask,change,percentChange,volume,openInterest,...,inTheMoney,contractSize,currency,option_type,expiration,TTM,dividendYield,ticker,spot_price,optionType
0,JPM251128P00220000,2025-10-17 14:28:32+00:00,220.0,0.39,0.00,2.14,0.00,0.000000,3.0,2,...,False,REGULAR,USD,put,2025-11-28,0.057534,0.0191,JPM,313.494995,put
1,JPM251128P00240000,2025-10-17 19:24:44+00:00,240.0,0.60,0.00,2.18,0.00,0.000000,10.0,11,...,False,REGULAR,USD,put,2025-11-28,0.057534,0.0191,JPM,313.494995,put
2,JPM251128P00245000,2025-10-27 19:48:25+00:00,245.0,0.29,0.00,0.88,0.00,0.000000,10.0,82,...,False,REGULAR,USD,put,2025-11-28,0.057534,0.0191,JPM,313.494995,put
3,JPM251128P00250000,2025-10-24 15:22:43+00:00,250.0,0.48,0.00,2.23,0.00,0.000000,2.0,3,...,False,REGULAR,USD,put,2025-11-28,0.057534,0.0191,JPM,313.494995,put
4,JPM251128P00255000,2025-10-28 19:46:32+00:00,255.0,0.41,0.00,0.70,0.00,0.000000,27.0,66,...,False,REGULAR,USD,put,2025-11-28,0.057534,0.0191,JPM,313.494995,put
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1569,JPM280121C00430000,2025-10-21 19:58:31+00:00,430.0,9.85,13.70,15.10,0.00,0.000000,1.0,2,...,False,REGULAR,USD,call,2028-01-21,2.205479,0.0191,JPM,313.494995,call
1570,JPM280121C00440000,2025-10-31 16:59:23+00:00,440.0,11.60,11.75,13.45,0.00,0.000000,4.0,14,...,False,REGULAR,USD,call,2028-01-21,2.205479,0.0191,JPM,313.494995,call
1571,JPM280121C00450000,2025-10-30 17:44:38+00:00,450.0,9.90,10.50,12.45,0.00,0.000000,4.0,17,...,False,REGULAR,USD,call,2028-01-21,2.205479,0.0191,JPM,313.494995,call
1572,JPM280121C00460000,2025-10-22 16:30:37+00:00,460.0,6.03,9.85,10.45,0.00,0.000000,2.0,6,...,False,REGULAR,USD,call,2028-01-21,2.205479,0.0191,JPM,313.494995,call


**Interest Rate Interpolation**

In [297]:
# Needs to figure out if we're using a interpolation structure to match each of them, also considering zero bond rates for the interpolation
def interest_rate(row):
    # Need to change this to interpolation calculation
    return (0.04)

In [298]:
df["r"] = .04
df

Unnamed: 0,contractSymbol,lastTradeDate,strike,lastPrice,bid,ask,change,percentChange,volume,openInterest,...,contractSize,currency,option_type,expiration,TTM,dividendYield,ticker,spot_price,optionType,r
0,JPM251128P00220000,2025-10-17 14:28:32+00:00,220.0,0.39,0.00,2.14,0.00,0.000000,3.0,2,...,REGULAR,USD,put,2025-11-28,0.057534,0.0191,JPM,313.494995,put,0.04
1,JPM251128P00240000,2025-10-17 19:24:44+00:00,240.0,0.60,0.00,2.18,0.00,0.000000,10.0,11,...,REGULAR,USD,put,2025-11-28,0.057534,0.0191,JPM,313.494995,put,0.04
2,JPM251128P00245000,2025-10-27 19:48:25+00:00,245.0,0.29,0.00,0.88,0.00,0.000000,10.0,82,...,REGULAR,USD,put,2025-11-28,0.057534,0.0191,JPM,313.494995,put,0.04
3,JPM251128P00250000,2025-10-24 15:22:43+00:00,250.0,0.48,0.00,2.23,0.00,0.000000,2.0,3,...,REGULAR,USD,put,2025-11-28,0.057534,0.0191,JPM,313.494995,put,0.04
4,JPM251128P00255000,2025-10-28 19:46:32+00:00,255.0,0.41,0.00,0.70,0.00,0.000000,27.0,66,...,REGULAR,USD,put,2025-11-28,0.057534,0.0191,JPM,313.494995,put,0.04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1569,JPM280121C00430000,2025-10-21 19:58:31+00:00,430.0,9.85,13.70,15.10,0.00,0.000000,1.0,2,...,REGULAR,USD,call,2028-01-21,2.205479,0.0191,JPM,313.494995,call,0.04
1570,JPM280121C00440000,2025-10-31 16:59:23+00:00,440.0,11.60,11.75,13.45,0.00,0.000000,4.0,14,...,REGULAR,USD,call,2028-01-21,2.205479,0.0191,JPM,313.494995,call,0.04
1571,JPM280121C00450000,2025-10-30 17:44:38+00:00,450.0,9.90,10.50,12.45,0.00,0.000000,4.0,17,...,REGULAR,USD,call,2028-01-21,2.205479,0.0191,JPM,313.494995,call,0.04
1572,JPM280121C00460000,2025-10-22 16:30:37+00:00,460.0,6.03,9.85,10.45,0.00,0.000000,2.0,6,...,REGULAR,USD,call,2028-01-21,2.205479,0.0191,JPM,313.494995,call,0.04


**Forward log-Moneyness**

We decided to use forward log-moneyness because its adjusted for r and q which are crucial in pricing the EEP

In [299]:

def forward_log_moneyness(S: float, r:float, q:float, T:float, K:float):
    """
    Calculates the log_moneyness in relation to the Forward Price of the stock

    Parameters
    ----------
    S : float
        Current spot_price of the stock.
    r : float
        Risk-free rate associated with that particular option maturity
    q : float
        Dividend-yield rate associated with that particular option maturity
    T : float
        Maturity of the option contract
    K : float
        Strike price of the option contract

    Returns
    -------
    Float
        - Forward log moneyness 
    """
    forward_price = S * np.exp((r - q)*T)
    
    return np.log(K/forward_price)

In [300]:
df["forward_log_moneyness"] = forward_log_moneyness(
    S=df["spot_price"].to_numpy(),
    r=df["r"].to_numpy(),
    q=df["dividendYield"].to_numpy(),
    T=df["TTM"].to_numpy(),
    K=df["strike"].to_numpy(),
)

df

Unnamed: 0,contractSymbol,lastTradeDate,strike,lastPrice,bid,ask,change,percentChange,volume,openInterest,...,currency,option_type,expiration,TTM,dividendYield,ticker,spot_price,optionType,r,forward_log_moneyness
0,JPM251128P00220000,2025-10-17 14:28:32+00:00,220.0,0.39,0.00,2.14,0.00,0.000000,3.0,2,...,USD,put,2025-11-28,0.057534,0.0191,JPM,313.494995,put,0.04,-0.355358
1,JPM251128P00240000,2025-10-17 19:24:44+00:00,240.0,0.60,0.00,2.18,0.00,0.000000,10.0,11,...,USD,put,2025-11-28,0.057534,0.0191,JPM,313.494995,put,0.04,-0.268347
2,JPM251128P00245000,2025-10-27 19:48:25+00:00,245.0,0.29,0.00,0.88,0.00,0.000000,10.0,82,...,USD,put,2025-11-28,0.057534,0.0191,JPM,313.494995,put,0.04,-0.247728
3,JPM251128P00250000,2025-10-24 15:22:43+00:00,250.0,0.48,0.00,2.23,0.00,0.000000,2.0,3,...,USD,put,2025-11-28,0.057534,0.0191,JPM,313.494995,put,0.04,-0.227525
4,JPM251128P00255000,2025-10-28 19:46:32+00:00,255.0,0.41,0.00,0.70,0.00,0.000000,27.0,66,...,USD,put,2025-11-28,0.057534,0.0191,JPM,313.494995,put,0.04,-0.207722
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1569,JPM280121C00430000,2025-10-21 19:58:31+00:00,430.0,9.85,13.70,15.10,0.00,0.000000,1.0,2,...,USD,call,2028-01-21,2.205479,0.0191,JPM,313.494995,call,0.04,0.269907
1570,JPM280121C00440000,2025-10-31 16:59:23+00:00,440.0,11.60,11.75,13.45,0.00,0.000000,4.0,14,...,USD,call,2028-01-21,2.205479,0.0191,JPM,313.494995,call,0.04,0.292897
1571,JPM280121C00450000,2025-10-30 17:44:38+00:00,450.0,9.90,10.50,12.45,0.00,0.000000,4.0,17,...,USD,call,2028-01-21,2.205479,0.0191,JPM,313.494995,call,0.04,0.315370
1572,JPM280121C00460000,2025-10-22 16:30:37+00:00,460.0,6.03,9.85,10.45,0.00,0.000000,2.0,6,...,USD,call,2028-01-21,2.205479,0.0191,JPM,313.494995,call,0.04,0.337349


Calculate Mid Price

In [301]:
def mid_price(bid: float, ask: float):
    """
    Calcualtes the mid-price of the option based on bid and ask prices

    Parameters
    ----------
    bid : float
        Current bid price of the option
    ask : float
        Current ask price of the option

    Returns
    -------
    Float
        - Mid price
    """

    return ((bid+ask)/2)
    

In [302]:
df["midPrice"] = mid_price(df["bid"], df["ask"])
df

Unnamed: 0,contractSymbol,lastTradeDate,strike,lastPrice,bid,ask,change,percentChange,volume,openInterest,...,option_type,expiration,TTM,dividendYield,ticker,spot_price,optionType,r,forward_log_moneyness,midPrice
0,JPM251128P00220000,2025-10-17 14:28:32+00:00,220.0,0.39,0.00,2.14,0.00,0.000000,3.0,2,...,put,2025-11-28,0.057534,0.0191,JPM,313.494995,put,0.04,-0.355358,1.070
1,JPM251128P00240000,2025-10-17 19:24:44+00:00,240.0,0.60,0.00,2.18,0.00,0.000000,10.0,11,...,put,2025-11-28,0.057534,0.0191,JPM,313.494995,put,0.04,-0.268347,1.090
2,JPM251128P00245000,2025-10-27 19:48:25+00:00,245.0,0.29,0.00,0.88,0.00,0.000000,10.0,82,...,put,2025-11-28,0.057534,0.0191,JPM,313.494995,put,0.04,-0.247728,0.440
3,JPM251128P00250000,2025-10-24 15:22:43+00:00,250.0,0.48,0.00,2.23,0.00,0.000000,2.0,3,...,put,2025-11-28,0.057534,0.0191,JPM,313.494995,put,0.04,-0.227525,1.115
4,JPM251128P00255000,2025-10-28 19:46:32+00:00,255.0,0.41,0.00,0.70,0.00,0.000000,27.0,66,...,put,2025-11-28,0.057534,0.0191,JPM,313.494995,put,0.04,-0.207722,0.350
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1569,JPM280121C00430000,2025-10-21 19:58:31+00:00,430.0,9.85,13.70,15.10,0.00,0.000000,1.0,2,...,call,2028-01-21,2.205479,0.0191,JPM,313.494995,call,0.04,0.269907,14.400
1570,JPM280121C00440000,2025-10-31 16:59:23+00:00,440.0,11.60,11.75,13.45,0.00,0.000000,4.0,14,...,call,2028-01-21,2.205479,0.0191,JPM,313.494995,call,0.04,0.292897,12.600
1571,JPM280121C00450000,2025-10-30 17:44:38+00:00,450.0,9.90,10.50,12.45,0.00,0.000000,4.0,17,...,call,2028-01-21,2.205479,0.0191,JPM,313.494995,call,0.04,0.315370,11.475
1572,JPM280121C00460000,2025-10-22 16:30:37+00:00,460.0,6.03,9.85,10.45,0.00,0.000000,2.0,6,...,call,2028-01-21,2.205479,0.0191,JPM,313.494995,call,0.04,0.337349,10.150


QuantLib helpers

In [303]:
def _setup_ts(eval_date, r, q):
    """
    Set up flat (continuous-compounded) risk-free and dividend term structures for QuantLib.

    Parameters
    ----------
    eval_date : ql.Date
        QuantLib evaluation date to use for all curve objects.
    r : float
        Continuously compounded risk-free rate.
    q : float
        Continuously compounded dividend yield.

    Returns
    -------
    (dc, r_ts, q_ts) : tuple
        dc   : ql.Actual365Fixed day counter.
        r_ts : ql.YieldTermStructureHandle (FlatForward at rate r).
        q_ts : ql.YieldTermStructureHandle (FlatForward at yield q).
    """

    dc = ql.Actual365Fixed()
    r_ts = ql.YieldTermStructureHandle(
        ql.FlatForward(eval_date, ql.QuoteHandle(ql.SimpleQuote(float(r))),
                       dc, ql.Continuous, ql.NoFrequency))
    q_ts = ql.YieldTermStructureHandle(
        ql.FlatForward(eval_date, ql.QuoteHandle(ql.SimpleQuote(float(q))),
                       dc, ql.Continuous, ql.NoFrequency))
    return dc, r_ts, q_ts

def _to_maturity(eval_date, T_years):
    """
    Convert a year fraction to a QuantLib maturity date using an Actual/365 convention.

    Parameters
    ----------
    eval_date : ql.Date
        Reference QuantLib date.
    T_years : float
        Time to maturity in years.

    Returns
    -------
    ql.Date
        Maturity date equal to eval_date + round(T_years * 365) days.
    """
    days = max(1, int(round(float(T_years) * 365.0)))
    return eval_date + days

Binomial de-americanization formula from quantlib

In [304]:
def _deamericanize_price_binomial(row, eval_date=None, steps=400, tree="lr"):
    """
    Convert an American mid price to a European-equivalent price using a QuantLib binomial tree.

    Steps:
      1) Solve sigma* s.t. American_binomial(S,K,r,q,T,sigma*) = mid_price
      2) Return European_binomial(S,K,r,q,T,sigma*)

    Returns European price (float) or None if bracketing/inversion failed.
    """
    S = float(row['spot_price']); K = float(row['strike']); T = float(row['TTM'])
    r = float(row['r']); q = float(row['dividendYield']); P = float(row['mid_price'])
    if T <= 0 or not np.isfinite(P) or P <= 0 or S <= 0 or K <= 0:
        return None

    # No-arbitrage guard
    df_r = np.exp(-r*T); df_q = np.exp(-q*T)
    if str(row['optionType']).lower() == 'call':
        lb, ub = max(0.0, S*df_q - K*df_r), S*df_q
    else:
        lb, ub = max(0.0, K*df_r - S*df_q), K*df_r
    if not (lb - 1e-8 <= P <= ub + 1e-8):
        return None

    # American call, ~zero dividend ⇒ Am = Eu
    if str(row['optionType']).lower() == 'call' and abs(q) <= 1e-8:
        return float(P)

    if eval_date is None:
        eval_date = ql.Date.todaysDate()
    ql.Settings.instance().evaluationDate = eval_date

    dc, r_ts, q_ts = _setup_ts(eval_date, r, q)
    spot = ql.QuoteHandle(ql.SimpleQuote(S))
    vol_q = ql.QuoteHandle(ql.SimpleQuote(0.30))
    vol_ts = ql.BlackVolTermStructureHandle(ql.BlackConstantVol(eval_date, ql.NullCalendar(), vol_q, dc))
    process = ql.BlackScholesMertonProcess(spot, q_ts, r_ts, vol_ts)

    maturity = _to_maturity(eval_date, T)
    ql_type = ql.Option.Call if str(row['optionType']).lower() == 'call' else ql.Option.Put
    am_opt = ql.VanillaOption(ql.PlainVanillaPayoff(ql_type, K), ql.AmericanExercise(eval_date, maturity))
    eu_opt = ql.VanillaOption(ql.PlainVanillaPayoff(ql_type, K), ql.EuropeanExercise(maturity))

    am_opt.setPricingEngine(ql.BinomialVanillaEngine(process, tree, int(steps)))

    class _Res:
        def __call__(self, sigma):
            vol_q.setValue(max(float(sigma), 1e-10))
            return am_opt.NPV() - P

    # robust bracket
    f = _Res()
    lo, hi = 1e-6, 5.0
    f_lo, f_hi = f(lo), f(hi)
    expands = 0
    while f_lo * f_hi > 0 and expands < 8:
        lo *= 0.5
        hi *= 1.5
        f_lo, f_hi = f(lo), f(hi)
        expands += 1
    if f_lo * f_hi > 0:
        return None

    sigma_star = float(ql.Brent().solve(f, 1e-8, 0.3, lo, hi))

    # price European analytically with sigma*
    vol_q.setValue(max(sigma_star, 1e-10))
    eu_opt.setPricingEngine(ql.AnalyticEuropeanEngine(process))
    return float(eu_opt.NPV())

**Implied Volatility**

In [305]:
def _row_bs_iv_from_price(row, eval_date=None, iv_guess=0.25, use_deam=True):
    """
    Compute Black–Scholes implied vol for a single row.

    If `use_deam=True`, first de-Americanizes the row's mid price via a binomial model,
    then inverts BS-IV from the **European** price. This prevents "price out of range" failures.

    Returns float IV or None.
    """
    S = float(row['spot_price']); K = float(row['strike']); T = float(row['TTM'])
    r = float(row['r']); q = float(row['dividendYield']); P = float(row['mid_price'])
    if T <= 0 or not np.isfinite(P) or P <= 0:
        return None

    if eval_date is None:
        eval_date = ql.Date.todaysDate()
    ql.Settings.instance().evaluationDate = eval_date

    price_for_iv = P
    if use_deam:
        P_eu = _deamericanize_price_binomial(row, eval_date=eval_date)
        if P_eu is None or not np.isfinite(P_eu) or P_eu <= 0:
            return None
        price_for_iv = float(P_eu)

    dc, r_ts, q_ts = _setup_ts(eval_date, r, q)
    cal = ql.NullCalendar()
    maturity = _to_maturity(eval_date, T)
    spot = ql.QuoteHandle(ql.SimpleQuote(S))
    vol_ts = ql.BlackVolTermStructureHandle(ql.BlackConstantVol(eval_date, cal, iv_guess, dc))
    proc = ql.BlackScholesMertonProcess(spot, q_ts, r_ts, vol_ts)
    ql_type = ql.Option.Call if str(row['optionType']).lower() == 'call' else ql.Option.Put
    opt = ql.VanillaOption(ql.PlainVanillaPayoff(ql_type, K), ql.EuropeanExercise(maturity))
    try:
        iv = opt.impliedVolatility(price_for_iv, proc, 1e-8, 1000, 1e-7, 5.0)
        return float(iv) if np.isfinite(iv) and iv > 0 else None
    except Exception:
        return None

Heston Calibration

In [306]:
def _calibrate_heston(group_df: pd.DataFrame, eval_date=None, init=None):
    """
    Calibrate a Heston model to one group (same instrument/day), using IVs
    inferred from each row’s mid price.

    Assumes the group has constant:
      'spot_price', 'r', 'dividendYield'.
    Each row must have:
      'strike','TTM','mid_price','optionType'.

    Returns
    -------
    ql.HestonModel
        Calibrated model (parameters stored internally).
    """
    if eval_date is None:
        eval_date = ql.Date.todaysDate()
    ql.Settings.instance().evaluationDate = eval_date

    S = float(group_df['spot_price'].iloc[0])
    r = float(group_df['r'].iloc[0])
    q = float(group_df['dividendYield'].iloc[0])

    _, r_ts, q_ts = _setup_ts(eval_date, r, q)
    cal = ql.NullCalendar()
    spot_h = ql.QuoteHandle(ql.SimpleQuote(S))

    helpers = []
    for _, row in group_df.iterrows():
        iv = _row_bs_iv_from_price(row, eval_date=eval_date, use_deam=True)
        if iv is None:
            continue
        K = float(row['strike']); T = float(row['TTM'])
        tenor = ql.Period((_to_maturity(eval_date, T) - eval_date), ql.Days)
        helpers.append(ql.HestonModelHelper(tenor, cal, spot_h, K,
                                            ql.QuoteHandle(ql.SimpleQuote(iv)),
                                            q_ts, r_ts))
    if len(helpers) < 5:
        raise ValueError("Not enough valid options to calibrate Heston (need ≥ ~5 across strikes/maturities).")

    p = dict(v0=0.04, kappa=1.5, theta=0.04, sigma=0.3, rho=-0.7)
    if init: p.update(init)

    process = ql.HestonProcess(r_ts, q_ts, spot_h, p['v0'], p['kappa'], p['theta'], p['sigma'], p['rho'])
    model = ql.HestonModel(process)
    engine = ql.AnalyticHestonEngine(model)
    for h in helpers:
        h.setPricingEngine(engine)

    om = ql.LevenbergMarquardt(1e-8, 1e-8, 1e-8)
    model.calibrate(helpers, om, ql.EndCriteria(500, 50, 1e-8, 1e-8, 1e-8))
    return model

**European Option Equivalent Price**

In [307]:
def _price_eu_heston(row, model: ql.HestonModel, eval_date=None):
    """
    Price a single European option under a calibrated Heston model.

    Requires in `row`:
      'spot_price','strike','TTM','r','dividendYield','optionType'.

    Returns
    -------
    float
        European-equivalent option price under Heston.
    """
    if eval_date is None:
        eval_date = ql.Date.todaysDate()
    ql.Settings.instance().evaluationDate = eval_date

    K = float(row['strike'])
    T = float(row['TTM'])
    maturity = _to_maturity(eval_date, T)
    ql_type = ql.Option.Call if str(row['optionType']).lower() == 'call' else ql.Option.Put

    opt = ql.VanillaOption(ql.PlainVanillaPayoff(ql_type, K), ql.EuropeanExercise(maturity))
    opt.setPricingEngine(ql.AnalyticHestonEngine(model))
    return float(opt.NPV())

In [308]:
def calibrate_and_price_heston_european(df: pd.DataFrame,
                                           group_cols=('ticker',),
                                           eval_date: ql.Date | None = None,
                                           init: dict | None = None) -> pd.Series:
    """
    Calibrate Heston per group from mid prices, then return ONLY the European-equivalent
    price for each row. No parameters or diagnostics are attached to the output.

    Assumes `df` already has:
      - 'mid_price' (clean market mid)
      - 'optionType' in {'call','put'}

    Parameters
    ----------
    df : pandas.DataFrame
        Input option chain with at least:
        ['strike','TTM','dividendYield','ticker','spot_price','optionType','r','mid_price'].
    group_cols : tuple[str, ...]
        Columns to group by for separate calibrations (e.g., ('ticker',) or ('ticker','date')).
    eval_date : ql.Date or None
        Evaluation date (defaults to today).
    init : dict or None
        Optional Heston initial guesses: {'v0','kappa','theta','sigma','rho'}.

    Returns
    -------
    pandas.Series
        A Series named 'V_EU_Heston' containing the European-equivalent price for each row,
        aligned to df.index.
    """
    if eval_date is None:
        eval_date = ql.Date.todaysDate()

    results = pd.Series(index=df.index, dtype=float, name='V_EU_Heston')

    for _, grp in df.groupby(list(group_cols), dropna=False):
        g = grp.copy()
        try:
            model = _calibrate_heston(g, eval_date=eval_date, init=init)
        except Exception:
            # leave NaNs for this group's rows if calibration failed
            continue

        # Price each row with the calibrated model
        prices = g.apply(lambda r: _price_eu_heston(r, model, eval_date), axis=1)
        results.loc[g.index] = prices.values

    return results

V_eu = calibrate_and_price_heston_european(df, group_cols=("ticker", ))
df = df.join(V_eu)
df

**American Option Price**

In [310]:
df["V_EU_Heston"].describe()

count    0.0
mean     NaN
std      NaN
min      NaN
25%      NaN
50%      NaN
75%      NaN
max      NaN
Name: V_EU_Heston, dtype: float64

In [311]:
df.columns

Index(['contractSymbol', 'lastTradeDate', 'strike', 'lastPrice', 'bid', 'ask',
       'change', 'percentChange', 'volume', 'openInterest',
       'impliedVolatility', 'inTheMoney', 'contractSize', 'currency',
       'option_type', 'expiration', 'TTM', 'dividendYield', 'ticker',
       'spot_price', 'optionType', 'r', 'forward_log_moneyness', 'midPrice',
       'V_EU_Heston'],
      dtype='object')