In [2]:
import pandas as pd
import datetime
pd.set_option('display.max_columns', None)

import pandas_market_calendars as mcal
# Create a calendar
cboe = mcal.get_calendar('CBOE_Equity_Options')

date_from = datetime.date(2022, 9, 28)
date_to = datetime.date(2023, 2, 16)#date_from#+datetime.timedelta(days=5)

def strict_dom_check_trade(r: pd.Series, oi_df, top_df) -> bool:
    if (pd.to_datetime(r['expiry_date']).date() - day).days < 15: return False
    if (abs(r['delta']) < 0.1): return False
    if r['side'] == 0: return False
    if 'SPRD' in r['cond']: return False

    poi = oi_df[(oi_df['Symbol'] == r['usymbol'])]
    try:
        if poi.iloc[0]['OI'] >= r['size']: return False
    except IndexError:
        return False

    ptop = top_df[(top_df['usymbol'] == r['usymbol']) & (top_df['date_time'] <= r['date_time'])]
    topdic = ptop.iloc[0].to_dict()
    rdic = r.to_dict()
    keys = topdic.keys() & r.keys()
    if {k:topdic[k] for k in topdic if k in keys} != {k:rdic[k] for k in rdic if k in keys}:
        return False
    return len(ptop) <= 1 or ptop.iloc[1]['size'] < r['size']

all_trades = []
for day in cboe.schedule(date_from, date_to).index:
    day = day.date()
    trades_df = pd.read_parquet(f'../data/{day.strftime("%Y%m%d")}.trades.parquet')
    oi_df = pd.read_parquet(f'../data/{day.strftime("%Y%m%d")}.oi.parquet')
    top_df = pd.read_parquet(f'../data/{day.strftime("%Y%m%d")}.top.parquet')

    if len(oi_df) <= 0:
        print(day)

    trades_df['check_trade'] = False
    trades_df['check_trade'] = trades_df.apply(strict_dom_check_trade, axis=1, args=(oi_df, top_df))
    all_trades.append(trades_df[(trades_df['check_trade'] == True)].reset_index(drop=True).drop(columns=['check_trade']))

all_trades = pd.concat(all_trades, ignore_index=True, axis=0).sort_values(by='date_time')
all_trades

Unnamed: 0,size,usymbol,expiry_date,strike,pc,price,theo,side,exch,date_time,volume,cond,ivol,ivol_chg,delta,spot,spot_chg,vega_dollar,cond_extra,hilo,events,bid,ask,open_int,bid_exch,bid_size,ask_size,ask_exch
0,1438,SAIA,2022-10-21,190.0,P,7.70000,8.344700,-2,BOX,2022-09-28 12:12:00.250000-04:00,1500,FLR,0.587407,-0.028093,-0.357175,197.940,5.680,26783.0,OPENING,,,7.70,10.20,32,AMEX,26.0,17.0,BOX
1,2609,CNI,2022-10-21,110.0,C,4.66603,4.432300,3,[MULTI],2022-09-28 12:46:33.826000-04:00,2619,ISO,0.336441,0.019525,0.584749,111.375,2.685,28552.0,OPENING,,,4.30,4.60,44,PHLX,72.0,95.0,PHLX
2,566,OSG,2022-11-18,2.5,C,0.70000,0.699999,-2,[MULTI],2022-09-28 14:59:29.275000-04:00,1000,,1.158380,0.411377,0.711910,2.990,0.040,204.0,OPENING,,,0.70,0.75,15,CBOE,113.0,171.0,BZX
3,1000,SILK,2022-11-18,50.0,C,2.75000,3.066950,-1,BOX,2022-09-29 10:14:23.870000-04:00,1000,FLR,0.568433,0.024939,0.430177,46.595,-1.345,6822.0,OPENING,,,1.50,4.90,3,BOX,37.0,50.0,BOX
4,540,KROS,2022-10-21,30.0,C,8.25000,8.364420,1,[AMEX],2022-09-29 12:19:57.570000-04:00,540,FLR,1.155940,0.099390,0.815830,36.500,-0.150,1341.0,OPENING,,,5.70,10.00,0,AMEX,42.0,20.0,PHLX
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
396,2690,AON,2023-03-17,330.0,C,1.00000,1.296140,-2,AMEX,2023-02-16 11:57:05.318000-05:00,2695,FLR,0.178344,-0.002303,0.132154,310.280,-4.790,51854.0,OPENING,,,1.00,1.60,606,GEMX,6.0,13.0,BOX
397,14390,COF,2023-03-17,125.0,C,0.60000,0.675698,-2,AMEX,2023-02-16 12:43:46.075000-05:00,14392,FLR,0.283778,-0.023438,0.136844,113.630,-1.000,102641.0,OPENING,,,0.60,0.75,1051,PHLX,565.0,1.0,BZX
398,3000,TGNA,2023-03-17,17.0,P,0.25000,0.174325,-1,CBOE,2023-02-16 13:43:28.616000-05:00,3012,CROSS,0.659497,-0.100780,-0.123895,20.615,0.145,3570.0,OPENING,,,0.05,0.60,271,PHLX,30.0,1.0,ARCA
399,600,SOVO,2023-05-19,15.0,C,0.70000,0.724481,-2,[ARCA],2023-02-16 15:23:08.322000-05:00,737,,0.485551,-0.146412,0.367224,13.100,-0.710,1495.0,OPENING,,,0.70,0.75,111,ARCA,863.0,53.0,C2


In [4]:
all_trades.to_csv('signals.csv', index=False)

In [4]:
all_trades.dtypes

size                                int64
usymbol                            object
expiry_date                datetime64[ns]
strike                            float64
pc                                 object
price                             float64
theo                              float64
side                                 int8
exch                               object
date_time      datetime64[ns, US/Eastern]
volume                              int64
cond                               object
ivol                              float64
ivol_chg                          float64
delta                             float64
spot                              float64
spot_chg                          float64
vega_dollar                       float64
cond_extra                         object
hilo                               object
events                             object
bid                               float64
ask                               float64
open_int                          