## Various ways to get Options expiration dates

In [1]:
import os,sys
import numpy as np
import pandas as pd
from pandas.tseries.offsets import BDay
import pandas_datareader.data as pdr
import datetime
import pytz
import pathlib
from dateutil.relativedelta import *
import pandas_market_calendars as pmc
import pathlib
from dashapp import dashapp2 as dashapp
import os,sys
import requests
import io
import subprocess
import re
import xml.etree.ElementTree as ET
import xmltodict
import zipfile
import chrome_driver_upload as cdu
from pandas.tseries.holiday import USFederalHolidayCalendar
import sel_scrape as sc
from tqdm import tqdm,tqdm_notebook

  from pandas.util.testing import assert_frame_equal


In [2]:
DOWNLOAD_CHROMEDRIVER = False
MONTH_CODES = 'FGHJKMNQUVXZ'
DICT_MONTH_CODE = {MONTH_CODES[i]:i+1 for i in range(len(MONTH_CODES))}
TIMEZONE = 'US/Eastern'
CHROME_DOWNLOADED=False
THIS_DECADE = int(str(int(datetime.datetime.now().year))[2])*10

In [5]:
if DOWNLOAD_CHROMEDRIVER and not CHROME_DOWNLOADED:
    cdu.download_chromedriver()
    CHROME_DOWNLOADED=True
# sac = sc.SelScrape(headless=False,driver_name='chrome')#,profile_path=pp)
# sac.goto('https://www.cmegroup.com/tools-information/quikstrike/options-calendar.html')


### Methods to extract all current CME expirations from CME website

In [6]:
def get_full_underlying_symbol(r,decade=2,underlying_col='Underlying Symbol'):
    underyear = int(re.findall('[0-9]{1,2}$',r[underlying_col])[0])
    if underyear > 9:
        return r[underlying_col]
    under_symbol_no_year = r[underlying_col][:-1]
    return under_symbol_no_year + str(decade*10+underyear)

def get_full_option_symbol(r,decade=2,option_col='Option Symbol',
                           underlying_col='Underlying Symbol'):
    optyear = int(re.findall('[0-9]{1,2}$',r[option_col])[0])
    if optyear>9:
        return r[option_col]
    underyear = int(re.findall('[0-9]{1,2}$',r[underlying_col])[0])
    if underyear > 9:
        return r[option_col][:-1] + str(underyear)
    opt_symbol_no_year = r[option_col][:-1]
    return opt_symbol_no_year + str(decade*10+optyear)



In [10]:
def get_cme_expiries():    
    df_eee_all = None
    for i in tqdm(range(1,7)):
        url = f'https://cmegroup-tools.quikstrike.net/User/Export/CME/ExpirationCalendar.aspx?GroupId={i}'
        df_eee_temp = pd.read_csv(url)
        if df_eee_all is None:
            df_eee_all = df_eee_temp.copy()
        else:
            df_eee_all = df_eee_all.append(df_eee_temp,ignore_index=True)
        df_eee_all.index = list(range(len(df_eee_all)))

    oexps = [s[:10] for s in df_eee_all['Option Expiration Date (CT)'].values]
    exp_dts = [datetime.datetime.strptime(str_date,'%m/%d/%Y') for str_date in oexps]
    df_eee_all['option_expiry'] = exp_dts

    unexps = ['01/01/1900' if 'nan' in str(s).lower() else s[:10] for s in df_eee_all['Underlying Expiration Date (CT)'].values]
    unexp_dts = [datetime.datetime.strptime(str_date,'%m/%d/%Y') for str_date in unexps]
    df_eee_all['underlying_expiry'] = unexp_dts

    df_eee_all['underlying_symbol'] = df_eee_all.apply(get_full_underlying_symbol,axis=1)
    opsym_lambda = lambda r:get_full_option_symbol(r,underlying_col='underlying_symbol')
    df_eee_all['option_symbol'] = df_eee_all.apply(opsym_lambda,axis=1)
    return df_eee_all


In [11]:
df_cme_expiry = get_cme_expiries()

100%|██████████| 6/6 [00:05<00:00,  1.03it/s]


In [13]:
yy = '{2}'
df_cme_expiry[df_cme_expiry.option_symbol.str.contains(f'LO[{MONTH_CODES}][0-9]{yy}$')]


Unnamed: 0,Option First Trade Date,Option Expiration Date (CT),Option Product,Option Symbol,Underlying Symbol,Underlying Expiration Date (CT),option_expiry,underlying_expiry,underlying_symbol,option_symbol
455,11/20/2014,07/16/2020 13:30:00,Crude Oil Options,LOQ0,CLQ0,07/21/2020 13:30:00,2020-07-16,2020-07-21,CLQ20,LOQ20
461,11/20/2014,08/17/2020 13:30:00,Crude Oil Options,LOU0,CLU0,08/20/2020 13:30:00,2020-08-17,2020-08-20,CLU20,LOU20
466,11/20/2014,09/17/2020 13:30:00,Crude Oil Options,LOV0,CLV0,09/22/2020 13:30:00,2020-09-17,2020-09-22,CLV20,LOV20
471,11/20/2014,10/15/2020 13:30:00,Crude Oil Options,LOX0,CLX0,10/20/2020 13:30:00,2020-10-15,2020-10-20,CLX20,LOX20
477,11/18/2011,11/17/2020 13:30:00,Crude Oil Options,LOZ0,CLZ0,11/20/2020 13:30:00,2020-11-17,2020-11-20,CLZ20,LOZ20
...,...,...,...,...,...,...,...,...,...,...
626,11/23/2020,09/17/2031 13:30:00,Crude Oil Options,LOV31,CLV31,,2031-09-17,1900-01-01,CLV31,LOV31
627,11/23/2020,10/16/2031 13:30:00,Crude Oil Options,LOX31,CLX31,,2031-10-16,1900-01-01,CLX31,LOX31
628,11/23/2020,11/17/2031 12:30:00,Crude Oil Options,LOZ31,CLZ31,,2031-11-17,1900-01-01,CLZ31,LOZ31
629,11/23/2020,12/16/2031 12:30:00,Crude Oil Options,LOF32,CLF32,,2031-12-16,1900-01-01,CLF32,LOF32


In [21]:
uk_holidays = open('expiration_data/uk_holidays.csv').readlines()
# uksplit = [','.join([t.strip('\n') for t in l.split(',')]) for l in  uk_holidays]
uksplit = [','.join(l.split(',')) for l in  uk_holidays]
fio = io.StringIO()
fio.writelines(uksplit)
fio.seek(0)
df_ukh = pd.read_csv(fio)
def ukh_to_yyyymmdd(month_day,year):
    md = month_day.strip()
    d = datetime.datetime.strptime(f'{md} {year}', '%B %d %Y')
    return d
#     return d.strftime("%Y-%m-%d")

year_cols = [c for c in df_ukh.columns.values if '20' in str(c)]
for c in year_cols:
    df_ukh[c] = df_ukh[c].apply(lambda s:ukh_to_yyyymmdd(s,c))
uk_holidays = sorted(df_ukh[year_cols].values.reshape(-1))
uk_holidays

[numpy.datetime64('2020-01-02T00:00:00.000000000'),
 numpy.datetime64('2020-04-10T00:00:00.000000000'),
 numpy.datetime64('2020-04-13T00:00:00.000000000'),
 numpy.datetime64('2020-05-04T00:00:00.000000000'),
 numpy.datetime64('2020-05-25T00:00:00.000000000'),
 numpy.datetime64('2020-08-31T00:00:00.000000000'),
 numpy.datetime64('2020-11-30T00:00:00.000000000'),
 numpy.datetime64('2020-12-25T00:00:00.000000000'),
 numpy.datetime64('2020-12-28T00:00:00.000000000'),
 numpy.datetime64('2021-01-01T00:00:00.000000000'),
 numpy.datetime64('2021-04-02T00:00:00.000000000'),
 numpy.datetime64('2021-04-05T00:00:00.000000000'),
 numpy.datetime64('2021-05-03T00:00:00.000000000'),
 numpy.datetime64('2021-05-31T00:00:00.000000000'),
 numpy.datetime64('2021-08-30T00:00:00.000000000'),
 numpy.datetime64('2021-11-30T00:00:00.000000000'),
 numpy.datetime64('2021-12-27T00:00:00.000000000'),
 numpy.datetime64('2021-12-28T00:00:00.000000000'),
 numpy.datetime64('2022-01-03T00:00:00.000000000'),
 numpy.datet

In [82]:
bday_us = pd.offsets.CustomBusinessDay(calendar=USFederalHolidayCalendar())
# bday_uk = pd.offsets.CustomBusinessDay(calendar=pmc.exchange_calendar_ice.ICEExchangeCalendar().regular_holidays)
bday_uk = pd.offsets.CustomBusinessDay(holidays=uk_holidays)

from pandas.tseries.holiday import AbstractHolidayCalendar, Holiday, nearest_workday, \
    USMartinLutherKingJr, USPresidentsDay, GoodFriday, USMemorialDay, \
    USLaborDay, USThanksgivingDay

def saturday_christmas_observance(d):
    r = d
    if d.weekday==0:
        r = d+datetime.timedelta(1)
    return r
    
class USTradingCalendar(AbstractHolidayCalendar):
    rules = [
        Holiday('NewYearsDay', month=1, day=1, observance=nearest_workday),
        USMartinLutherKingJr,
        USPresidentsDay,
        GoodFriday,
        USMemorialDay,
        Holiday('USIndependenceDay', month=7, day=4, observance=nearest_workday),
        USLaborDay,
        USThanksgivingDay,
        Holiday('Christmas', month=12, day=25, observance=nearest_workday),
#         Holiday('SaturdayChristmas', month=12, day=27, observance=saturday_christmas_observance)
    ]
bday_us = pd.offsets.CustomBusinessDay(calendar=USTradingCalendar())


In [83]:
datetime.datetime(2021,12,24)+bday_us

Timestamp('2021-12-27 00:00:00')

In [48]:

def get_nth_weekday(year,month,target_weekday,nth_occurrence):
    '''
    weekday is the term that assigns numbers from 0 to 6 to the days of the weeks.
    weekday 0 = monday
    '''
    # get dayofweeks of year,month,1
    weekday_01 = datetime.datetime(year,month,1).weekday()
    if weekday_01 <= target_weekday:
        day_of_month_of_first_occurence = target_weekday - weekday_01
        day_of_month_of_nth_occurence = day_of_month_of_first_occurence + 1 + (nth_occurrence - 1) * 7
    else:
        day_of_month_of_nth_occurence = target_weekday - weekday_01 + 1 + (nth_occurrence) * 7 
    return datetime.datetime(year,month,day_of_month_of_nth_occurence)




def get_ES_expiry(symbol):
    '''
    3rd friday of month of symbol
    '''
    monthcode_yy = symbol[2:]
    month = DICT_MONTH_CODE[monthcode_yy[0]]
    year = 2000 + int(monthcode_yy[1:])
    return get_nth_weekday(year,month,4,3)

def get_E6_expiry(symbol):
    monthcode_yy = symbol[2:]
    next_month = DICT_MONTH_CODE[monthcode_yy[0]] + 1
    year = 2000 + int(monthcode_yy[1:])
    if next_month>12:
        next_month = 1
        year += 1
    return datetime.datetime(year,next_month,1) - 7*bday_us

def get_CL_expiry(symbol):
    '''
    Trading terminates 7 business days before the 26th calendar of the month prior to the contract month.
    '''
    monthcode_yy = symbol[2:]
    month = DICT_MONTH_CODE[monthcode_yy[0]]
    year = 2000 + int(monthcode_yy[1:])
    month = month -1
    if month<1:
        month = 12
        year = year - 1
    return datetime.datetime(year,month,26) - 7*bday_us

def get_NG_expiry(symbol):
    monthcode_yy = symbol[2:]
    month = DICT_MONTH_CODE[monthcode_yy[0]]
    year = 2000 + int(monthcode_yy[1:])
    return datetime.datetime(year,month,1) - 4*bday_us

def get_CB_expiry(symbol):
    '''
    This is the spec for the CME Brent, but it matches ICE.
    Trading terminates the 4th last London business day of 
    the month, 2 months prior to the contract month 
    except for the February contract month which 
    terminates the 5th last London business day of the 
    month, 2 months prior to the contract month.  
    '''
    monthcode_yy = symbol[2:]
    month = DICT_MONTH_CODE[monthcode_yy[0]]
    year = 2000 + int(monthcode_yy[1:])
    month = month - 1
    if month<1:
        month = 12 + month
        year = year - 1
    days_to_subtract = 4
    if monthcode_yy[0] =='G':
        days_to_subtract = 5
    elif monthcode_yy[0] == 'F':
        days_to_subtract = 3
#     elif monthcode_yy == 'N22':
#         days_to_subtract = 7
    return datetime.datetime(year,month,1,0,0) - days_to_subtract * bday_uk

DICT_PRODUCT = {
    'E6':get_E6_expiry,
    'ES':get_ES_expiry,
    'CL':get_CL_expiry,
    'NG':get_NG_expiry,
    'CB':get_CB_expiry,
}

    
def get_expiry(symbol):
    product = symbol[:2]
    f = DICT_PRODUCT[product]
    return f(symbol)


def dt_from_yyyymmdd(yyyymmdd,hour=0,minute=0,timezone=TIMEZONE):
    y = int(str(yyyymmdd)[0:4])
    m = int(str(yyyymmdd)[4:6])
    d = int(str(yyyymmdd)[6:8])  
    return datetime.datetime(y,m,d,hour,minute,tzinfo=pytz.timezone(timezone))

def yyyymmdd_from_dt(dt):
    y = int(dt.year)
    m = int(dt.month)
    d = int(dt.day)
    return y*100*100 + m*100 + d

def get_dte_pct(trade_yyyymmdd,expiry_yyyymmdd):
    dt_td = dt_from_yyyymmdd(trade_yyyymmdd)
    dt_xp = dt_from_yyyymmdd(expiry_yyyymmdd)
    return ((dt_xp - dt_td).days + 1)/365


In [49]:
# get options expirations from barchart using curl
# c = 'CB'
# ybeg = 20
# yend = 23
# for y in range(ybeg,yend+1):
#     !bash barchart_expirations.sh {c} {y}

In [50]:
# # get energy expirations from the cme group
# def create_energy_expiry():
#     cme_expiry_url="https://cmegroup-tools.quikstrike.net/CME/Export/Expirations.aspx?insid=36653504"
#     with requests.Session() as s:
#         download = s.get(cme_expiry_url)
#         decoded_content = download.content.decode('utf-8')
#     s = io.StringIO(decoded_content)
#     lines = s.readlines()

#     # cme_expiry_url=f"{pathlib.Path.home()}/downloads/Expirations.csv"
#     # lines = open(cme_expiry_url,'r').readlines()

#     lines2 = [l.strip().split(',') for l in lines]
#     lfix = lambda l: l[0:1] + [l[1] + ' ' + l[2]] + l[3:]
#     lines2 = [l if len(l)==15 else lfix(l) for l in lines2]
#     # df_current_expirations = pd.read_csv(cme_expiry_url)
#     df_cme_expirations = pd.DataFrame(lines2[1:],columns=lines2[0])

#     df_expiry = df_cme_expirations.copy()
#     df_expiry['symbol'] = df_expiry.Symbol
#     df_expiry['expiry'] = [datetime.datetime.strptime(str_date,'%m/%d/%Y %H:%M') for str_date in df_expiry.Expiration]
#     syms_search = df_expiry.Symbol.str.contains('[FGHJKMNQUVXZ][0-9]$',regex=True)
#     syms = df_expiry.loc[syms_search,'Symbol'].values
#     sym_ys = [int(s[-1]) for s in syms ]
#     expiries = df_expiry.loc[syms_search,'Expiration'].values
#     expiries_yys = [int(expiry[6:10])-2000 for expiry in expiries]
#     this_decade = int(str(int(datetime.datetime.now().year))[2])*10
#     symbol_decades = [this_decade + (10 if this_decade<expiries_yys[i] else 0) for i in range(len(syms))]
#     new_syms = [syms[i][:-1] + str(symbol_decades[i]+sym_ys[i]) for i in range(len(syms))]
#     df_expiry.loc[syms_search,'symbol'] = new_syms
#     df_expiry[df_expiry.Symbol.str.contains('^LO[A-Z][0-9]$',regex=True)].sort_values('expiry')

# len(df_expiry)


In [51]:
# df_expiry = create_energy_expiry()

In [52]:
# df_expiry.to_csv('cme_energy_expirations.csv',index=False)

In [36]:
search = df_cme_expiry.underlying_symbol.str.contains('^NG[FGHJKMNQUVXZ][2-4][0-9]')
df_cme_expiry[search][['option_symbol','underlying_symbol','option_expiry','underlying_expiry']]
                                                                         

Unnamed: 0,option_symbol,underlying_symbol,option_expiry,underlying_expiry
734,LN1N20,NGQ20,2020-07-02,2020-07-29
735,LN2N20,NGQ20,2020-07-10,2020-07-29
736,LN3N20,NGQ20,2020-07-17,2020-07-29
737,LN4N20,NGQ20,2020-07-24,2020-07-29
738,LNEQ20,NGQ20,2020-07-28,2020-07-29
...,...,...,...,...
928,LNEQ33,NGQ33,2033-07-26,1900-01-01
929,LNEU33,NGU33,2033-08-26,1900-01-01
930,LNEV33,NGV33,2033-09-27,1900-01-01
931,LNEX33,NGX33,2033-10-26,1900-01-01


In [94]:
df_lo = df_cme_expiry[df_cme_expiry.option_symbol.str.contains('^LNE[FGHJKMNQUVXZ][2-4][0-9]')][['option_symbol','underlying_symbol','option_expiry','underlying_expiry']]
if len(df_lo)<1:
    print('no matches')
else:
    clsyms = df_lo.underlying_symbol
    theo_expiries = [get_expiry(s) for s in clsyms]
    df_lo['theo'] = theo_expiries
    df_lo['actual'] = df_lo.option_expiry
    df_lo['theo_vs_actual'] = df_lo.actual == df_lo.theo
    display(df_lo[~df_lo.theo_vs_actual])


Unnamed: 0,option_symbol,underlying_symbol,option_expiry,underlying_expiry,theo,actual,theo_vs_actual
789,LNEF22,NGF22,2021-12-28,2021-12-29,2021-12-27,2021-12-28,False
861,LNEF28,NGF28,2027-12-28,2027-12-29,2027-12-27,2027-12-28,False
921,LNEF33,NGF33,2032-12-28,1900-01-01,2032-12-27,2032-12-28,False


In [8]:
c = "CL"
ybeg = 20
yend = 23
df_exp = None
for y in range(ybeg,yend+1):
    dft = pd.read_csv(f'expiration_data/commod_expirations_{c}{y}.csv')
    if df_exp is None:
        df_exp = dft.copy()
    else:
        df_exp = df_exp.append(dft,)
def mmddyy_to_yyyymmdd(mmddyy):
    if str(mmddyy).lower() == 'nan':
        return 99999999
    s = mmddyy.split('/')
    y =  2000 + int(s[2])
    m = int(s[0])
    d = int(s[1])
    return (y*100*100 + m*100 + d)
def yyyymmdd_from_symbol(s):
    d = get_expiry(s)
    return int(d.year*100*100 + d.month*100 + d.day)
df_exp.expiry = df_exp.expiry.apply(lambda mmddyy:mmddyy_to_yyyymmdd(mmddyy))
df_exp['theo_expiry'] = df_exp.symbol.apply(lambda s:yyyymmdd_from_symbol(s))
df_exp['theo_is_good'] = df_exp.apply(lambda r: False if (r.expiry==99999999) or (r.expiry != r.theo_expiry) else True,axis=1)
df_exp = df_exp[df_exp.expiry!=99999999]
df_exp

Unnamed: 0,symbol,expiry,theo_expiry,theo_is_good
3,CLJ20,20200317,20200317,True
4,CLK20,20200416,20200416,True
5,CLM20,20200514,20200514,True
6,CLN20,20200617,20200617,True
7,CLQ20,20200716,20200716,True
8,CLU20,20200817,20200817,True
9,CLV20,20200917,20200917,True
10,CLX20,20201015,20201015,True
11,CLZ20,20201117,20201117,True
0,CLF21,20201216,20201216,True
