In [2]:
import pandas as pd
import numpy as np
import datetime
import requests
from string import Template
import pytz
import math
import logging
import time
from pandasql import sqldf

In [3]:
pd.options.display.max_rows = 50
pd.options.display.max_columns = 100

In [4]:
# API  STUFF
API_KEY = 'qf_ZzJ56DKvoDrfdw3OVFlzGW_UfZ9el'
OPTIONS_URL = Template('https://api.polygon.io/v2/aggs/ticker/$ticker/range/$multiplier/minute/$start_date/$end_date?adjusted=true&sort=asc&limit=50000')
EQUITY_URL =  Template('https://api.polygon.io/v2/aggs/ticker/$ticker/range/$multiplier/minute/$start_date/$end_date?adjusted=true&sort=asc&limit=50000')

# logger stuff
console_logger = logging.getLogger('console_logger')
file_logger = logging.getLogger('file_logger')
file_logger.setLevel(logging.DEBUG)
console_logger.setLevel(logging.DEBUG)
file_handler = logging.FileHandler('research_options_05-11-24.log')
console_handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
file_handler.setFormatter(formatter)
console_handler.setFormatter(formatter)
file_logger.addHandler(file_handler)
console_logger.addHandler(console_handler)
# logger stuff end

#pandassql stuff
mysqldf = lambda q: sqldf(q, globals())

In [5]:
startDate = datetime.date(2023,3,1)
endDate = datetime.date(2023,5,30)
equity_symbol = 'SPY'
max_expiry_dte = 2
max_strikes_from_open_price = 0.03 # pct wise from  open equity price
strike_step = 1


In [6]:
def generateOptionsAPIRequestQuotes(optionstring:str, startDate: datetime.date, endDate: datetime.date, minutes_freq:int):
    CSV = False
    headers = {
             'Authorization': f"Bearer {API_KEY}",
        }
    if CSV: headers['Accept'] = 'text/csv'
    url = OPTIONS_URL.substitute(
                ticker = optionstring,
                multiplier = minutes_freq,
                start_date = startDate.strftime('%Y-%m-%d'),
                end_date = endDate.strftime('%Y-%m-%d')
    )
    response = requests.get(url, headers=headers)
    return response
def generateEquityAPIRequestQuotes(ticker: str,startDate: datetime.date,endDate: datetime.date, minutes_freq:int):
    headers = {
        'Authorization': f"Bearer {API_KEY}"
    }
    CSV = False
    if CSV: headers['Accept'] = 'text/csv'
    url = EQUITY_URL.substitute(
                ticker = ticker,
                multiplier = minutes_freq,
                start_date = startDate,
                end_date = endDate
        )
    response = requests.get(url, headers=headers)
    return response
def getOptionsData(ticker,startDate,endDate,minutes_freq):
    def enhancePulledDataFromPolygon(df:pd.DataFrame, optionticker:str,
                                     option_full_name: str,
                                     optiontype: str, option_strike:float,
                                     option_expiry_date: datetime.date):
        """Gets df obtained from Polygon api request and enriches with 
           columns such as ticker, strike, option type, expiry date
           since the data from Polygon doesn't contain this.
           option parametes are passed from the calling func as they were used to query
           poligon API
         """
        df['ticker'] = optionticker
        df['full_name'] = option_full_name
        df['type'] = optiontype
        df['strike'] = option_strike
        df['expiry'] = option_expiry_date
        return df
    dates_range = pd.date_range(start=startDate, end=endDate, freq='D')
    equity_quotes = generateEquityAPIRequestQuotes(ticker,startDate,endDate,minutes_freq)
    equity_df = pd.DataFrame()
    if equity_quotes.status_code == 200:
        equity_df = pd.DataFrame(equity_quotes.json()['results'])
        equity_df = equity_df.rename(
            columns={"v": "volume", "vw": "volume_weighted", "o": "open", "c": "close",
                        "h": "high", "l": "low", "t": "timestamp", "n": "number of trades",
                        "a": "trade conditions"})
        equity_df['time_converted'] = pd.to_datetime(
            equity_df["timestamp"], unit="ms").dt.tz_localize(
                pytz.UTC).dt.tz_convert("US/Eastern").dt.tz_localize(None)
    else:
        print (equity_quotes.text)
        raise RuntimeError("Error when processing API call to equity prices")
    
    
    df_quotes = []
    for day in dates_range:
        # first pull equity open price for this day
        try:
            # pull  first open price in candle after 9.30
            central_price = equity_df[(equity_df['time_converted'].dt.date == day.date()) & 
                                            (equity_df['time_converted'].dt.time > datetime.time(9,30))]['open'].iloc[0]
            # Calculate strike math
            strike_upper_limit = math.ceil(central_price*(1+max_strikes_from_open_price))
            strike_lower_limit = math.floor(central_price*(1-max_strikes_from_open_price))
            strike_range = list(range(strike_lower_limit,strike_upper_limit,strike_step))
        except IndexError:
            file_logger.warning('Could not find equity date for date: ' + str(day.date()))   
            console_logger.warning('Could not find equity date for date: ' + str(day.date()))
            continue   
        if np.is_busday(day.date()):
            # create API requests for days having DTE 0 and less than max_expiry_dte
            for DTE in range(0,max_expiry_dte+1):
                # for each date pull all options expiring on this date and within max_strikes_from_open_price
                contract_date = (day.date() + pd.tseries.offsets.BDay(n = DTE))
                                 #datetime.timedelta(days = DTE))
                # if the contract expiry date is weekend (sat,sun) then skip
                if contract_date.isoweekday() in [6,7]:
                    file_logger.info(f'Contract expiry date {contract_date} is on expiring on Weekend for options trading on {day.date}. Skipping this expiry date ')
                    console_logger.debug(f'Contract expiry date {contract_date} is on expiring on Weekend for options trading on {day.date}. Skipping this expiry date ')
                    continue
                else:
                    contract_date = contract_date.strftime('%y%m%d')
                for strike in strike_range:
                    #for each strike retrieve for specific contract expiry date (DTE(contract_date)) for specific date (day) quotes
                    # example: "O:SPY230519C00417000"
                    call_contract_ticker = 'O:SPY'+ contract_date+'C'+'00'+str(int(strike)*1000)
                    put_contract_ticker =  'O:SPY'+ contract_date+'P'+'00'+str(int(strike)*1000)
                    file_logger.info('Pulling call: ' + call_contract_ticker)
                    call_response = generateOptionsAPIRequestQuotes(call_contract_ticker,day,day,minutes_freq)
                    if call_response.status_code != 200:
                        console_logger.debug(call_contract_ticker)
                        console_logger.debut(call_response.text)
                        raise RuntimeError("API returned not success when trying to get call options")
                    
                    # potentially maybe zero results since extra holidays or data corruption, then skip it
                    if int(call_response.json()['resultsCount']) > 0:
                        df_strike_call = pd.DataFrame(call_response.json()['results'])
                        df_strike_call = enhancePulledDataFromPolygon(
                                    df=df_strike_call, optionticker=ticker,option_full_name=call_contract_ticker,
                                    optiontype='call', option_strike=strike,
                                    option_expiry_date=contract_date
                                )
                        df_strike_call['equity_start_price'] = central_price
                        df_quotes.append(df_strike_call)
                    else:
                        file_logger.debug(f'!!!!**********Missing Call contract from API return******')
                        file_logger.debug(call_contract_ticker)
                        console_logger.debug(f'!!!!**********Missing Call contract******')
                        console_logger.debug(call_contract_ticker)
                    
                    file_logger.info('Pulling put: ' + put_contract_ticker)
                    put_response = generateOptionsAPIRequestQuotes(put_contract_ticker,day,day,minutes_freq)
                    if put_response.status_code != 200:
                        console_logger.debug(put_contract_ticker)
                        console_logger.debut(put_response.text)
                        raise RuntimeError("API returned not success when trying to get put options")
                    
                    if int(put_response.json()['resultsCount']) > 0:
                        df_strike_put = pd.DataFrame(put_response.json()['results'])
                        df_strike_put = enhancePulledDataFromPolygon(
                                    df=df_strike_put, optionticker=ticker,option_full_name=put_contract_ticker,
                                    optiontype='put', option_strike=strike,
                                    option_expiry_date=contract_date
                                )
                        df_strike_put['equity_start_price'] = central_price
                        df_quotes.append(df_strike_put)
                    else:
                        file_logger.debug(f'!!!!**********Missing Put contract from API return******')
                        file_logger.debug(put_contract_ticker)
                        console_logger.debug(f'!!!!**********Missing Put contract******')
                        console_logger.debug(put_contract_ticker)
                    file_logger.info(f"""Appended these file contracts for day {day.date()}:{call_contract_ticker} and  {put_contract_ticker}""")
                    time.sleep(0.5)
                console_logger.debug(f'Processed expiry date {DTE} for day {day.date()}')
        console_logger.debug(f'************Finished processing day: {day.date()}***************')        
    console_logger.debug(f'************Finished running function***************')        
    file_logger.debug(f'************Finished running function***************')        
    return {
        'options_quotes': df_quotes,
        'equity_quotes' : equity_df
    }


In [7]:
## main body
file_logger.info('*****************************New Run***********************************')
# response is dict with options data in ['options_quotes] (list of options dfs) and equity data in ['equity_quotes] - df
data = getOptionsData('SPY',datetime.date(2023,9,1), datetime.date(2023,12,31
                                                                   ),15)

2024-05-12 11:16:26,873 - console_logger - DEBUG - Processed expiry date 0 for day 2023-09-01
2024-05-12 11:16:27,273 - console_logger - DEBUG - !!!!**********Missing Call contract******
2024-05-12 11:16:27,273 - console_logger - DEBUG - O:SPY230904C00438000
2024-05-12 11:16:27,776 - console_logger - DEBUG - !!!!**********Missing Put contract******
2024-05-12 11:16:27,777 - console_logger - DEBUG - O:SPY230904P00438000
2024-05-12 11:16:28,672 - console_logger - DEBUG - !!!!**********Missing Call contract******
2024-05-12 11:16:28,672 - console_logger - DEBUG - O:SPY230904C00439000
2024-05-12 11:16:29,059 - console_logger - DEBUG - !!!!**********Missing Put contract******
2024-05-12 11:16:29,060 - console_logger - DEBUG - O:SPY230904P00439000
2024-05-12 11:16:29,941 - console_logger - DEBUG - !!!!**********Missing Call contract******
2024-05-12 11:16:29,941 - console_logger - DEBUG - O:SPY230904C00440000
2024-05-12 11:16:30,314 - console_logger - DEBUG - !!!!**********Missing Put contra

In [8]:


options_df = pd.concat(data['options_quotes'])
options_df['expiry'] = pd.to_datetime(options_df['expiry'], format="%y%m%d").dt.date
options_df = options_df.rename(
            columns={"v": "volume", "vw": "volume_weighted", "o": "open", "c": "close",
                        "h": "high", "l": "low", "t": "timestamp", "n": "number of trades",
                        "a": "trade conditions"})
options_df['time_converted'] = pd.to_datetime(
            options_df["timestamp"], unit="ms").dt.tz_localize(
                pytz.UTC).dt.tz_convert("US/Eastern").dt.tz_localize(None)
options_df =  options_df.merge(
                data['equity_quotes'],how='left', on='time_converted',indicator=True)
# create mapping after the merge: _x => options, _y: equity else unchanged
cols = [col[0:-2] + '_options' if col[-2:] == '_x' else (col[0:-2] +'_equity' if col[-2:] == '_y' else col) for col in options_df.columns]
options_df.columns = cols
options_df.drop(columns=['volume_equity','volume_weighted_equity'], inplace=True)

# pickup only time > 9.30 since some option quotes start @ 9, which is not feasible for analysis

options_df = options_df[(options_df['time_converted'].dt.time > datetime.time(9,30)) & (options_df['time_converted'].dt.time < datetime.time(16,0))]

options_df.reset_index(drop=True, inplace=True)
# for each day calculate equity pct change since open
# options_df.loc[:,'equity_pct_change'] = options_df.loc[:,'open_equity']/options_df.loc[:,'equity_start_price']
options_df['equity_pct_change'] = options_df.loc[:,'open_equity']/options_df.loc[:,'equity_start_price']
# for each day/strike calculate option price pct change since open

#date_options_comb is all possible combinations, then reiterate through them and do the math
date_options_comb = mysqldf("select distinct full_name, date(time_converted) as date from options_df")

#create new column
options_df.loc[:,'options_pct_change'] = None
options_df.loc[:,'options_earliest_open'] = None
for index, row in date_options_comb.iterrows():
    # open price (first) for this combination of option and date
    combination_mask =  (options_df['full_name'] == row['full_name']) & (options_df['time_converted'].dt.date ==
           pd.to_datetime(row['date']).date())
    #pull first price for this combination 
    open_price_opt = options_df.loc[combination_mask,'open_options'].iloc[0]
    #populate this field as well as pct for this combination
    options_df.loc[combination_mask,'options_earliest_open'] = open_price_opt
    options_df.loc[combination_mask,'options_pct_change'] = options_df.loc[combination_mask,'open_options']/options_df.loc[
            combination_mask,'options_earliest_open']



In [9]:
options_df.to_csv('0-2DTE_spy_options_01Sep23-31Dec23.csv')
options_df.to_pickle('0-2DTE_spy_options_01Sep23-31Dec23.pkl')
#options_df['time_converted'].max()

In [1]:
options_df

NameError: name 'options_df' is not defined