In [1]:
%pip install QuantLib-Python


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
import glob
import polars as pl
import numpy as np
import QuantLib as ql
from enhance_parquet import CleanParquetUtility

#option_df = pl.read_parquet(glob.glob(os.path.join("test_data/option_agg_month", "**", "*.parquet"), recursive=True))
option_df = pl.read_parquet("test_data/current_options_day_aggs.parquet")
option_df = CleanParquetUtility.option_aggs_parquet(option_df)

#stock_df = pl.read_parquet(glob.glob(os.path.join("test_data/stock_agg_month", "**", "*.parquet"), recursive=True))
stock_df = pl.read_parquet("test_data/current_stocks_day_aggs.parquet")
stock_df = CleanParquetUtility.stock_aggs_parquet(stock_df)

# Add current underlying price to options df
stock_df = stock_df.select('ticker', 'price', 'timestamp')
stock_df = stock_df.rename({'ticker': 'underlying_ticker'})
combined_df = option_df.join_asof(
    stock_df,
    on='timestamp',
    by='underlying_ticker',
    strategy='nearest',
    tolerance='1m'
)

# Convert timestamp to just date
combined_df = combined_df.with_columns(
    pl.col('timestamp').dt.date().alias('date'),
)
combined_df = combined_df.with_columns(
    (pl.col('expiration_date').dt.date() - pl.col('date')).dt.total_days().alias('days_to_expiration')
)

# Add r
treasury_df = pl.read_parquet("test_data/treasury_yields.parquet")
treasury_df = CleanParquetUtility.treasury_parquet(treasury_df)
treasury_df = treasury_df.select('date', '3m')
treasury_df = treasury_df.rename({'3m': 'risk_free_rate'})
treasury_df = treasury_df.with_columns(
    pl.col('date').dt.date().alias('date')
)
combined_df = combined_df.join(treasury_df, on='date')

# Rename columns to be consistent with option pricing engine
combined_df = combined_df.rename({'date': 'evaluation_date',
                                  'option_price': 'option_market_price',
                                  'price': 'spot'})

# Convert some types for coordination with the QL method
combined_df = combined_df.with_columns(
    pl.col('spot').cast(pl.Float64).alias('spot'),
    pl.col('strike_price').cast(pl.Float64).alias('strike_price')    
)

# There are no spot prices for indexes in the data set so we drop the index options, in total there are 31 and they are
# all of the nulls
combined_df = combined_df.drop_nulls()

combined_df.head()


  combined_df = option_df.join_asof(


option_ticker,volume,option_market_price,timestamp,underlying_ticker,expiration_date,option_type,strike_price,spot,evaluation_date,days_to_expiration,risk_free_rate
str,i64,"decimal[*,2]",datetime[ns],str,datetime[ns],enum,f64,f64,date,i64,f64
"""O:A250815C00095000""",1,23.78,2025-08-14 04:00:00,"""A""",2025-08-15 00:00:00,"""CALL""",95.0,119.23,2025-08-14,1,0.043
"""O:A250815C00100000""",1,19.0,2025-08-14 04:00:00,"""A""",2025-08-15 00:00:00,"""CALL""",100.0,119.23,2025-08-14,1,0.043
"""O:A250815C00110000""",7,8.5,2025-08-14 04:00:00,"""A""",2025-08-15 00:00:00,"""CALL""",110.0,119.23,2025-08-14,1,0.043
"""O:A250815C00115000""",31,3.75,2025-08-14 04:00:00,"""A""",2025-08-15 00:00:00,"""CALL""",115.0,119.23,2025-08-14,1,0.043
"""O:A250815C00120000""",181,0.65,2025-08-14 04:00:00,"""A""",2025-08-15 00:00:00,"""CALL""",120.0,119.23,2025-08-14,1,0.043


In [3]:
# Using QuantLib we compute the IV and Greeks for an option using black-scholes-merton to get a market
# adjusted view of pricing the option. While this is fast and relativley accurate, more precise results
# can be had utilizing different methods in the QuantLib library. Most people use BSM for basics and advanced
# methods for risk-control, price-discovery, etc. For my purpose, BSM is more than sufficent right now 

from QuantLib import (Date, Settings, Actual365Fixed, NullCalendar, QuoteHandle, SimpleQuote,
                        YieldTermStructureHandle, FlatForward, RelinkableBlackVolTermStructureHandle,
                        BlackConstantVol, GeneralizedBlackScholesProcess, PlainVanillaPayoff,
                        Option, EuropeanExercise, VanillaOption, AnalyticEuropeanEngine)

def ql_bsm_iv_and_greeks(evaluation_date: Date,
                         option_market_price: float,
                         spot: float,
                         strike_price: float,
                         days_to_expiration: int,
                         risk_free_rate: float,
                         option_type: str,
                         dividend=0.0):

    ql_evaluationDate = ql.Date(evaluation_date.day, evaluation_date.month, evaluation_date.year)
    Settings.instance().evaluationDate = ql_evaluationDate
    dc = Actual365Fixed()
    cal = NullCalendar()
    maturity = ql_evaluationDate + days_to_expiration

    spot_quote = QuoteHandle(SimpleQuote(spot))
    rf   = YieldTermStructureHandle(FlatForward(ql_evaluationDate, risk_free_rate, dc))
    div  = YieldTermStructureHandle(FlatForward(ql_evaluationDate, dividend, dc))
    volh = RelinkableBlackVolTermStructureHandle(BlackConstantVol(ql_evaluationDate, cal, 0.20, dc))  # seed
    proc = GeneralizedBlackScholesProcess(spot_quote, div, rf, volh)

    ql_option_type = Option.Call if option_type == "CALL" else Option.Put
    payoff   = PlainVanillaPayoff(ql_option_type, strike_price)
    exercise = EuropeanExercise(maturity)
    opt      = VanillaOption(payoff, exercise)
    opt.setPricingEngine(AnalyticEuropeanEngine(proc))

    try:
        iv = opt.impliedVolatility(float(option_market_price), proc, 1e-10, 1000, 1e-8, 5.0)
        volh.linkTo(BlackConstantVol(ql_evaluationDate, cal, iv, dc))  # lock at IV
        return {
            "bsm-iv": iv,
            "bsm-price": opt.NPV(),
            "bsm-delta": opt.delta(),
            "bsm-gamma": opt.gamma(),
            "bsm-vega":  opt.vega(),
            "bsm-theta": opt.theta() / 365.0,
            "bsm-rho":   opt.rho(),
        }
    except Exception as e:
        # Unable to find a root, option is likely deep OTM or has no volume to get a relative market price, either way we don't want it
        return {
            "bsm-iv": float('nan'),
            "bsm-price": float('nan'),
            "bsm-delta": float('nan'),
            "bsm-gamma": float('nan'),
            "bsm-vega":  float('nan'),
            "bsm-theta": float('nan'),
            "bsm-rho":   float('nan'),
        }  


In [4]:
# Create an adaptor to plug the method into the polars dataframe and use vectorized batching
from typing import List, Dict
import datetime

def ql_bsm_iv_and_greeks_connector(batch) -> pl.Series:
    evaluation_date_batch: List[datetime.date] = batch[0]
    option_market_price_batch: List[float] = batch[1]
    spot_batch: List[float] = batch[2]
    strike_price_batch: List[float] = batch[3]
    days_to_expiration_batch: List[int] = batch[4]
    risk_free_rate_batch: List[float] = batch[5]
    option_type_batch: List[str] = batch[6]
    dividend: float = 0.0 # if adding dividends in the future adjust per batch
    
    bsm_results: List[Dict[str, float]] = []
    for idx in range(len(evaluation_date_batch)):
        bsm_res = ql_bsm_iv_and_greeks(evaluation_date=evaluation_date_batch[idx],
                                       option_market_price=option_market_price_batch[idx],
                                       spot=spot_batch[idx],
                                       strike_price=strike_price_batch[idx],
                                       days_to_expiration=days_to_expiration_batch[idx],
                                       risk_free_rate=risk_free_rate_batch[idx],
                                       option_type=option_type_batch[idx],
                                       dividend=dividend)
        bsm_results.append(bsm_res)
    
    return pl.Series(bsm_results)



In [5]:
# Plug the option pricing method into the dataframe to retrieve IV and Greeks from the QL method

bsm_df = combined_df.with_columns([
    pl.map_batches(
        exprs=[
            pl.col('evaluation_date'),
            pl.col('option_market_price'),
            pl.col('spot'),
            pl.col('strike_price'),
            pl.col('days_to_expiration'),
            pl.col('risk_free_rate'),
            pl.col('option_type')
        ],
        function=ql_bsm_iv_and_greeks_connector,
        return_dtype=pl.Struct({
            "bsm-iv": pl.Float64,
            "bsm-price": pl.Float64,
            "bsm-delta": pl.Float64,
            "bsm-gamma": pl.Float64,
            "bsm-vega": pl.Float64,
            "bsm-theta": pl.Float64,
            "bsm-rho": pl.Float64
        })
    ).alias('ql_results')
]).unnest('ql_results')

# Filter out contracts that could not have greeks calculated due to pricing/ or deep OTM
full_df = bsm_df.filter(pl.col("bsm-iv") != float('nan'))

full_df.head()

option_ticker,volume,option_market_price,timestamp,underlying_ticker,expiration_date,option_type,strike_price,spot,evaluation_date,days_to_expiration,risk_free_rate,bsm-iv,bsm-price,bsm-delta,bsm-gamma,bsm-vega,bsm-theta,bsm-rho
str,i64,"decimal[*,2]",datetime[ns],str,datetime[ns],enum,f64,f64,date,i64,f64,f64,f64,f64,f64,f64,f64,f64
"""O:A250815C00120000""",181,0.65,2025-08-14 04:00:00,"""A""",2025-08-15 00:00:00,"""CALL""",120.0,119.23,2025-08-14,1,0.043,0.393182,0.65,0.383317,0.155579,2.382447,-0.473676,0.123433
"""O:A250815C00125000""",26,0.05,2025-08-14 04:00:00,"""A""",2025-08-15 00:00:00,"""CALL""",125.0,119.23,2025-08-14,1,0.043,0.508774,0.05,0.039468,0.026846,0.531965,-0.135873,0.012755
"""O:A250815C00130000""",12,0.05,2025-08-14 04:00:00,"""A""",2025-08-15 00:00:00,"""CALL""",130.0,119.23,2025-08-14,1,0.043,0.837031,0.05,0.025631,0.011424,0.372435,-0.156224,0.008235
"""O:A250815P00120000""",1,2.1,2025-08-14 04:00:00,"""A""",2025-08-15 00:00:00,"""PUT""",120.0,119.23,2025-08-14,1,0.043,0.678817,2.1,-0.563597,0.092972,2.45801,-0.826105,-0.189857
"""O:A250919C00105000""",5,15.34,2025-08-14 04:00:00,"""A""",2025-09-19 00:00:00,"""CALL""",105.0,119.23,2025-08-14,36,0.043,0.345646,15.34,0.896914,0.013863,6.718567,-0.043045,9.034427


In [6]:
# Now that we can get BSM IV we need to get the ATM call option IV for each underlying to act as a proxy for an equities IV

iv_stock_df = pl.read_parquet("test_data/current_stocks_day_aggs.parquet")
iv_stock_df = CleanParquetUtility.stock_aggs_parquet(iv_stock_df)
iv_stock_df = iv_stock_df.select('ticker', 'price', 'timestamp')
iv_stock_df = iv_stock_df.rename({'ticker': 'underlying_ticker'})

# Filter for call options with days_to_expiration closest to 30 days
iv_option_df = full_df.filter(pl.col("option_type") == "CALL")

# Find the closest DTE to 30 days for each underlying ticker and date
closest_dte_df = iv_option_df.with_columns(
        (pl.col("days_to_expiration") - 30).abs().alias("dte_diff")
)
closest_dte_df = closest_dte_df.group_by(["underlying_ticker", "evaluation_date"])
closest_dte_df = closest_dte_df.agg(pl.col("dte_diff").min().alias("min_dte_diff"))

# Join back to get options with closest DTE to 30 days
iv_option_df = iv_option_df.with_columns(
        (pl.col("days_to_expiration") - 30).abs().alias("dte_diff")
)
iv_option_df = iv_option_df.join(closest_dte_df, on=["underlying_ticker", "evaluation_date"])
iv_option_df = iv_option_df.filter(pl.col("dte_diff") == pl.col("min_dte_diff"))
iv_option_df = iv_option_df.drop(["dte_diff", "min_dte_diff"])

# Filter for strike price closest to current spot price
closest_strike_df = iv_option_df.with_columns(
        (pl.col("strike_price") - pl.col("spot")).abs().alias("strike_diff")
)
closest_strike_df = closest_strike_df.group_by(["underlying_ticker", "evaluation_date"])
closest_strike_df = closest_strike_df.agg(pl.col("strike_diff").min().alias("min_strike_diff"))

# Join back to get options with closest strike to spot
atm_iv_df = iv_option_df.with_columns(
        (pl.col("strike_price") - pl.col("spot")).abs().alias("strike_diff")
)
atm_iv_df = atm_iv_df.join(closest_strike_df, on=["underlying_ticker", "evaluation_date"])
atm_iv_df = atm_iv_df.filter(pl.col("strike_diff") == pl.col("min_strike_diff"))
atm_iv_df = atm_iv_df.drop(["strike_diff", "min_strike_diff"])
atm_iv_df = atm_iv_df.select(["underlying_ticker", "evaluation_date", "strike_price", "spot", "days_to_expiration", "bsm-iv"])

atm_iv_df.head()

underlying_ticker,evaluation_date,strike_price,spot,days_to_expiration,bsm-iv
str,date,f64,f64,i64,f64
"""A""",2025-08-14,120.0,119.23,36,0.325789
"""AA""",2025-08-14,31.0,31.08,29,0.403482
"""AAAU""",2025-08-14,33.0,33.01,36,0.158129
"""AACT""",2025-08-14,12.5,11.37,36,0.632987
"""AAL""",2025-08-14,13.0,12.92,29,0.486809


In [7]:
stock_iv_df = atm_iv_df

stock_iv_df = stock_iv_df.filter((pl.col("bsm-iv") < 1.2) & (pl.col("days_to_expiration") > 20) & (pl.col("days_to_expiration") < 40))
stock_iv_df = stock_iv_df.filter(pl.col("spot") >= 100.0)
stock_iv_df = stock_iv_df.top_k(20, by="bsm-iv").sort(by="bsm-iv")

stock_iv_df.head(20)

underlying_ticker,evaluation_date,strike_price,spot,days_to_expiration,bsm-iv
str,date,f64,f64,i64,f64
"""FUTU""",2025-08-14,172.5,173.3,29,0.6135
"""WGS""",2025-08-14,119.0,120.23,29,0.615638
"""BLD""",2025-08-14,340.0,435.32,36,0.623994
"""APP""",2025-08-14,435.0,434.58,29,0.624979
"""POWL""",2025-08-14,250.0,250.38,36,0.642807
…,…,…,…,…,…
"""UTHR""",2025-08-14,300.0,308.83,36,0.912697
"""NVDU""",2025-08-14,127.0,125.64,36,0.925024
"""CRCL""",2025-08-14,152.5,152.0,29,0.926479
"""CRDO""",2025-08-14,120.0,119.45,36,1.013115


In [8]:
# Turning to get realized volatility for an equity now

stock_year_df_source = pl.read_parquet(glob.glob(os.path.join("test_data/stock_agg_year", "**", "*.parquet"), recursive=True))

stock_year_df_source.head()

ticker,volume,open,close,high,low,window_start,transactions
str,i64,f64,f64,f64,f64,i64,i64
"""A""",1192289,147.61,147.84,149.475,147.242,1710216000000000000,25263
"""AA""",6140114,30.6,29.79,30.75,29.21,1710216000000000000,47739
"""AAA""",10922,25.06,25.06,25.1203,25.06,1710216000000000000,39
"""AAAU""",2245730,21.435,21.355,21.45,21.29,1710216000000000000,2552
"""AACG""",15825,1.63,1.59,1.63,1.53,1710216000000000000,164


In [9]:
stock_year_df = stock_year_df_source


