## Research various skew related topics

In [1]:
import zipfile
import glob
import pandas as pd
import numpy as np

from argparse import ArgumentParser
from argparse import RawDescriptionHelpFormatter
import sys
import os
if  not './' in sys.path:
    sys.path.append('./')
if  not '../' in sys.path:
    sys.path.append('../')

from barchartacs import build_db
from barchartacs import db_info
import plotly.graph_objs as go
from plotly.offline import  init_notebook_mode, iplot
init_notebook_mode(connected=True)
import plotly.tools as tls
from plotly.graph_objs.layout import Font,Margin
from IPython import display

import datetime
import io
from tqdm import tqdm,tqdm_notebook
from barchartacs import pg_pandas as pg
import mibian
import py_vollib
import importlib
from py_vollib import black
from py_vollib.black import implied_volatility
import ipdb
import traceback
import pandas_datareader.data as pdr

# importlib.reload(build_db)

In [2]:
# import warnings
# warnings.filterwarnings("ignore")

### important global variables

In [3]:

DEBUG_IT=False
opttab = 'sec_schema.options_table'
futtab = 'sec_schema.underlying_table'


In [4]:
def plotly_plot(df_in,x_column,plot_title=None,
                y_left_label=None,y_right_label=None,
                bar_plot=False,figsize=(16,10),
                number_of_ticks_display=20,
                yaxis2_cols=None,
                x_value_labels=None):
    ya2c = [] if yaxis2_cols is None else yaxis2_cols
    ycols = [c for c in df_in.columns.values if c != x_column]
    # create tdvals, which will have x axis labels
    td = list(df_in[x_column]) 
    nt = len(df_in)-1 if number_of_ticks_display > len(df_in) else number_of_ticks_display
    spacing = len(td)//nt
    tdvals = td[::spacing]
    tdtext = tdvals
    if x_value_labels is not None:
        tdtext = [x_value_labels[i] for i in tdvals]
    
    # create data for graph
    data = []
    # iterate through all ycols to append to data that gets passed to go.Figure
    for ycol in ycols:
        if bar_plot:
            b = go.Bar(x=td,y=df_in[ycol],name=ycol,yaxis='y' if ycol not in ya2c else 'y2')
        else:
            b = go.Scatter(x=td,y=df_in[ycol],name=ycol,yaxis='y' if ycol not in ya2c else 'y2')
        data.append(b)

    # create a layout
    layout = go.Layout(
        title=plot_title,
        xaxis=dict(
            ticktext=tdtext,
            tickvals=tdvals,
            tickangle=45,
            type='category'),
        yaxis=dict(
            title='y main' if y_left_label is None else y_left_label
        ),
        yaxis2=dict(
            title='y alt' if y_right_label is None else y_right_label,
            overlaying='y',
            side='right'),
        margin=Margin(
            b=100
        )        
    )

    fig = go.Figure(data=data,layout=layout)
#     fig.update_layout(
#         title={
#             'text': plot_title,
#             'y':0.9,
#             'x':0.5,
#             'xanchor': 'center',
#             'yanchor': 'top'})
    return fig

def plotly_shaded_rectangles(beg_end_date_tuple_list,fig):
    ld_shapes = []
    for beg_end_date_tuple in beg_end_date_tuple_list:
        ld_beg = beg_end_date_tuple[0]
        ld_end = beg_end_date_tuple[1]
        ld_shape = dict(
            type="rect",
            # x-reference is assigned to the x-values
            xref="x",
            # y-reference is assigned to the plot paper [0,1]
            yref="paper",
            x0=ld_beg[i],
            y0=0,
            x1=ld_end[i],
            y1=1,
            fillcolor="LightSalmon",
            opacity=0.5,
            layer="below",
            line_width=0,
        )
        ld_shapes.append(ld_shape)

    fig.update_layout(shapes=ld_shapes)
    return fig

#### get all contracts in the options database

In [5]:
pga = db_info.get_db_info()
print(f"futtab max date: {pga.get_sql(f'select max(settle_date) from {futtab}')}")
print(f"opttab max date: {pga.get_sql(f'select max(settle_date) from {opttab}')}")


  sec_db



The psycopg2 wheel package will be renamed from release 2.8; in order to keep installing from binary please use "pip install psycopg2-binary" instead. For details see: <http://initd.org/psycopg/docs/install.html#binary-install-from-pypi>.



futtab max date:         max
0  20200306
opttab max date:         max
0  20200306


In [6]:
df_iv_final = pd.read_csv('./temp_folder/df_iv_final_ES.csv')
df_iv_skew = pd.read_csv('./temp_folder/df_iv_skew_ES.csv')

### graph skew

In [8]:
def create_skew_per_date_df(df):
    '''
    Find the first settle_date whose count of rows is equal to max count of rows.
    '''
    # get the first symbol (which should be the only symbol)
    contract = df.symbol.unique()[0]
    # get just that symbol's data
    df12 = df[df.symbol==contract]
    df_counts = df12[['settle_date','moneyness']].groupby('settle_date',as_index=False).count()
    max_count = df_counts.moneyness.max()
    first_max_count_settle_date = df_counts[df_counts.moneyness==max_count].iloc[0].settle_date
    
    df_ret = df12[df12.settle_date==first_max_count_settle_date][['moneyness']]
    all_settle_dates = sorted(df_counts.settle_date.unique())
    for settle_date in all_settle_dates:
        df_temp = df12[df12.settle_date==settle_date][['moneyness','vol_skew']]
        df_ret = df_ret.merge(df_temp,on='moneyness',how='outer')
        df_ret = df_ret.rename(columns={'vol_skew':str(settle_date)})
    df_ret = df_ret.sort_values('moneyness')
    df_ret.moneyness = df_ret.moneyness.round(4)
    return df_ret


In [9]:
def graph_skew(df_iv_final,do_plot=False):
    '''
    Graph skew for ONLY ONE symbol.
    If df_iv_final contains more than one symbol, we will only graph the first symbol in the DataFrames    
    '''
    # get the first symbol (which should be the only symbol)
    contract = df_iv_final.symbol.unique()[0]
    # get just that symbol's data and only days that have sufficient skew data
    dft = df_iv_final[df_iv_final.symbol==contract]
    dft_count = dft[['settle_date','symbol']].groupby('settle_date',as_index=False).count()
    valid_settle_dates = dft_count[dft_count.symbol>2].settle_date.unique()
    dft = dft[dft.settle_date.isin(valid_settle_dates)]
    dfp = create_skew_per_date_df(dft)
    
    settle_dates = sorted([c for c in dfp.columns.values if c != 'moneyness'])
    splits = list(np.arange(5,len(settle_dates),5))
    settle_date_groups = np.split(np.array(settle_dates),splits)
    ret_figs = []
    for sdg in settle_date_groups:
        sdg_sorted = [str(c) for c in sorted(sdg)]
        cols = ['moneyness']+list(sdg_sorted)
        dfp_sub = dfp[cols]
        t = f'{contract} {sdg[0]} - {sdg[-1]}' 
        f = plotly_plot(dfp_sub,x_column='moneyness',plot_title=t,y_left_label='vol skew')
        ret_figs.append(f)
        if do_plot:
            iplot(f)
    return ret_figs

#### Do the same plots as above, but using a grid

In [28]:
def graph_skew_subplots(df,rows=2,cols=2):
    fig_list = graph_skew(df)
    n = rows*cols   
    # using list comprehension 
    fig_groups = [fig_list[i*n:(i + 1)*n] for i in range((len(fig_list) + n - 1) // n )]  
    for fig_group in fig_groups:
        iplot(graph_skew_subplot_quad(fig_group,rows=rows,cols=cols))
    
def graph_skew_subplot_quad(fig_group,rows=2,cols=2):
    '''
    Use subplots to output the results of the method graph_skew above
    '''
    rfs = fig_group # rfs stands for return_figs
#     rows = 2#len(rfs)//2
    f1 = tls.make_subplots(rows=rows, cols=cols,  
        shared_yaxes=False, 
        subplot_titles=[rfs[i]['layout'].title for i in range(len(rfs))],
        horizontal_spacing=0.09,
        vertical_spacing=0.11,                       
        print_grid=False)

    pl_width=450*cols #900
    pl_height=400*rows #800 
    title = 'Skew plots<br>'

    f1['layout'].update(title=title,                                 
        font= Font(family="Open Sans, sans-serif"),
        showlegend=True,     
        hovermode='x',  
        autosize=True,       
        width=pl_width,       
        height=pl_height,
        plot_bgcolor='#EFECEA', 
        bargap=0.05,
        margin=Margin(
                      l=45,
                      r=15,
                      b=55,
                      t=50
        )
    )    

    for i in range(len(rfs)):
        x = int(i/2) + 1
        y = i % 2 + 1
        f = rfs[i]
        l = f.layout
        
        try:
            yaxis = f'yaxis{i+1}'
            xaxis = f'xaxis{i+1}'
            if i < 10:
                yaxis = yaxis.replace('1','') 
                xaxis = xaxis.replace('1','') 
            f1['layout'].update({xaxis:l.xaxis})
            f1['layout'].update({yaxis:l.yaxis})
            gname = f'{x,y}'#rfs[i]['layout'].title
            for d in f.data:
                data_y = f'y{i+1}'.replace('1','') 
                d['yaxis']=data_y
#                 d['legendgroup'] =  gname
#                 d['name'] = f"{gname} {d.name}"
                d['legendgroup'] =  l.title
                d['name'] = f"{d.name}"
                f1.append_trace(d,x,y)
        except Exception as e:
            print(f'graph_skew_subplots ERRORS: {str(e)}')
    return f1
# iplot(f1)


### Graph skew changes historically, per day

In [15]:
SYMBOL_TO_RESEARCH = 'ES'
all_contracts = pga.get_sql(f"select symbol from {opttab} where substring(symbol,1,2)='{SYMBOL_TO_RESEARCH}'").symbol.unique()
all_contracts 

array(['ESU11', 'ESF11', 'ESG11', 'ESH11', 'ESJ11', 'ESM11', 'ESZ11',
       'ESK11', 'ESN11', 'ESH12', 'ESQ11', 'ESV11', 'ESM12', 'ESX11',
       'ESF12', 'ESU12', 'ESG12', 'ESJ12', 'ESZ12', 'ESK12', 'ESN12',
       'ESH13', 'ESQ12', 'ESV12', 'ESM13', 'ESX12', 'ESF13', 'ESU13',
       'ESG13', 'ESJ13', 'ESZ13', 'ESK13', 'ESN13', 'ESH14', 'ESQ13',
       'ESV13', 'ESM14', 'ESX13', 'ESF14', 'ESU14', 'ESG14', 'ESJ14',
       'ESZ14', 'ESK14', 'ESN14', 'ESH15', 'ESQ14', 'ESV14', 'ESM15',
       'ESX14', 'ESF15', 'ESU15', 'ESG15', 'ESJ15', 'ESZ15', 'ESK15',
       'ESN15', 'ESH16', 'ESQ15', 'ESV15', 'ESM16', 'ESX15', 'ESF16',
       'ESU16', 'ESG16', 'ESJ16', 'ESZ16', 'ESK16', 'ESH17', 'ESM17',
       'ESU17', 'ESZ17', 'ESH18', 'ESM18', 'ESU18', 'ESZ18', 'ESH19',
       'ESM19', 'ESU19', 'ESZ19', 'ESH20', 'ESM20', 'ESU20', 'ESZ20'],
      dtype=object)

In [29]:
grid_plot=True
clist = [c for c in all_contracts if (c[:2]==f'{SYMBOL_TO_RESEARCH}') & (c[-2:]=='19')]
volcols = ['symbol','settle_date','moneyness','vol_skew']
dfivf = df_iv_final[volcols].groupby(['symbol','settle_date','moneyness'],as_index=False).mean()

for c in clist:
    dft = dfivf[dfivf.symbol==c]
    if len(dft)<=0:
        print(f'no data for symbol {c}')
        continue
    if grid_plot:
        graph_skew_subplots(dfivf[dfivf.symbol==c],rows=1,cols=2)
    else:
        rls = graph_skew(dfivf[dfivf.symbol==c],do_plot=True)
        

### Use the implied vol skew dataframes to graph changes in vol skew vs price

In [33]:
y = 19
c = SYMBOL_TO_RESEARCH
df_year = df_iv_skew[(df_iv_skew.symbol.str.slice(0,2)==c) & (df_iv_skew.symbol.str.slice(-2,)==str(y))]
df_year['diff'] = df_year['-0.2'] - df_year['0.2']
df_year = df_year[['settle_date','diff']]
df_year = df_year.merge(df_etf[['settle_date','close']],on='settle_date',how='inner')
iplot(plotly_plot(df_year,x_column='settle_date',y_left_label='put skew',yaxis2_cols=['close'],y_right_label=f'{etf} close'))





A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [30]:
df_nan = df_iv_skew[(df_iv_skew['0.35'].isna()) & (df_iv_skew.settle_date>=20181020)]
print(f'len: {len(df_iv_skew)} nans:{len(df_nan)}')
df_nan.head(20)

len: 2311 nans:229


Unnamed: 0,-0.3,-0.25,-0.2,-0.15,-0.1,-0.05,0.0,0.05,0.1,0.15,0.2,0.25,0.3,0.35,symbol,settle_date
479,0.1646,0.1382,0.1114,0.0842,0.0565,0.0286,0.0,-0.0249,-0.036,-0.0292,-0.0175,-0.0044,0.0093,,ESH19,20181107
480,0.1694,0.1414,0.1134,0.0859,0.058,0.0293,0.0,-0.027,-0.0366,-0.0284,-0.0158,-0.0025,0.0128,,ESH19,20181108
510,0.1288,0.111,0.0921,0.0732,0.0522,0.028,0.0,-0.0282,-0.0461,-0.0446,-0.0354,-0.0272,-0.0187,,ESH20,20190923
511,0.128,0.1093,0.0911,0.0722,0.0518,0.0277,0.0,-0.0294,-0.05,-0.0504,-0.0425,-0.0325,-0.0254,,ESH20,20190924
512,0.129,0.1108,0.0921,0.0731,0.0524,0.0283,0.0,-0.0293,-0.0483,-0.0477,-0.0386,-0.0296,-0.0226,,ESH20,20190925
513,0.1301,0.111,0.0927,0.0735,0.0524,0.0286,0.0,-0.0295,-0.0486,-0.048,-0.0389,-0.0301,-0.0224,,ESH20,20190926
514,0.1285,0.1097,0.091,0.0726,0.052,0.0278,0.0,-0.03,-0.0507,-0.0514,-0.0426,-0.033,-0.0255,,ESH20,20190927
515,0.1296,0.1108,0.0927,0.0738,0.0531,0.0288,0.0,-0.0305,-0.0503,-0.0485,-0.0387,-0.028,-0.0186,,ESH20,20190930
519,0.1273,0.1088,0.09,0.0719,0.0521,0.0285,0.0,-0.0316,-0.053,-0.0525,-0.0427,-0.0329,-0.022,,ESH20,20191004
524,0.1318,0.112,0.0934,0.0748,0.0535,0.0297,0.0,-0.0306,-0.0466,-0.0433,-0.0323,-0.0221,-0.0127,,ESH20,20191011


In [31]:
df_atm_vol = df_iv_final[df_iv_final.settle_date>=20181020][['settle_date','atm_iv']].drop_duplicates()
df_atm_vol.index = range(len(df_atm_vol))
df_atm_vol[df_atm_vol.settle_date==20181025]


Unnamed: 0,settle_date,atm_iv
3,20181025,0.182198


In [33]:
df_iv_skew.tail()

Unnamed: 0,-0.3,-0.25,-0.2,-0.15,-0.1,-0.05,0.0,0.05,0.1,0.15,0.2,0.25,0.3,0.35,symbol,settle_date
2306,0.1721,0.1428,0.1179,0.0931,0.0654,0.0351,0.0,-0.0304,-0.0357,-0.0266,-0.0178,-0.0059,0.0162,,ESZ19,20190916
2307,0.1699,0.1416,0.117,0.0912,0.0656,0.0355,0.0,-0.0299,-0.0343,-0.0254,-0.0117,-0.0008,0.0162,,ESZ19,20190917
2308,0.1707,0.1404,0.1139,0.0905,0.0647,0.0349,0.0,-0.0299,-0.0339,-0.0245,-0.0142,-0.0025,0.0198,,ESZ19,20190918
2309,0.1695,0.1395,0.1135,0.09,0.0638,0.0345,0.0,-0.0287,-0.0332,-0.0234,-0.0132,-0.0012,0.021,,ESZ19,20190919
2310,0.1679,0.1379,0.1136,0.0896,0.0641,0.0352,0.0,-0.0334,-0.0413,-0.0326,-0.0194,-0.005,0.0131,,ESZ19,20190920


In [35]:
y = 19
c = SYMBOL_TO_RESEARCH
df_fut = pga.get_sql(f"select symbol,settle_date,close from {futtab} where symbol like '{c}%%{y}'")
df_year = df_iv_skew[(df_iv_skew.symbol.str.slice(0,2)==c) & (df_iv_skew.symbol.str.slice(-2,)==str(y))]
df_year = df_year[['settle_date','-0.2','0.2']]
df_year = df_year.merge(df_fut[['settle_date','close']],on='settle_date',how='inner')
iplot(plotly_plot(df_year,x_column='settle_date',yaxis2_cols=['close'],y_right_label='uso close'))

In [48]:
def get_postgres_data(contract,pga):
    osql = f"select * from {opttab} where symbol='{contract}';"
    dfo = pga.get_sql(osql)
    usql = f"select * from {futtab} where symbol='{contract}';"
    dfu = pga.get_sql(usql)
    # Merge options and futures data
    df = dfo.merge(dfu,how='inner',on=['symbol','settle_date'])
    # Get options expiration dates
    df_expiry_dates = dfo[['symbol','settle_date']].groupby('symbol',as_index=False).max()
    return df,df_expiry_dates

In [53]:
USE_PYVOL = True
def lam_pyvol(r):
    try:
        return implied_volatility.implied_volatility(r.close_x,r.close_y,r.strike,.02,r.dte/365, r.pc.lower())
    except:
        return -1
# lam_pyvol = lambda r:implied_volatility.implied_volatility(r.close_x,r.close_y,r.strike,.02,r.dte/365, r.pc.lower())
lam_mibian = lambda r:mibian.BS([r.close_y,r.strike,2,r.dte], callPrice=r.close_x).impliedVolatility

def get_implieds(df,df_expiry_dates,contract):
    df2 = df[['symbol','contract_num','pc','settle_date','strike','close_x','close_y']]
    df2 = df2[(((df2.pc=='C' )& (df2.strike>=df2.close_y)) | ((df2.pc=='P' ) & (df2.strike<df2.close_y)))  & (df2.symbol.str.contains(contract))]
    df2 = df2[df2.contract_num==2]
    phigh = df2.close_y.max()
    plow = df2.close_y.min()
    high_strike = round(phigh * 1.3)
    low_strike = round(plow * .7)
    df2 = df2[(df2.strike>=low_strike) & (df2.strike<=high_strike)]

    df9 = df2[df2.symbol==contract]
    df9 = df9.merge(df_expiry_dates.rename(columns={'settle_date':'expiry'}),on='symbol',how='inner')
    df9['syear'] = df9.settle_date.astype(str).str.slice(0,4).astype(int)
    df9['smon'] = df9.settle_date.astype(str).str.slice(4,6).astype(int)
    df9['sday'] = df9.settle_date.astype(str).str.slice(6,8).astype(int)
    df9['eyear'] = df9.expiry.astype(str).str.slice(0,4).astype(int)
    df9['emon'] = df9.expiry.astype(str).str.slice(4,6).astype(int)
    df9['eday'] = df9.expiry.astype(str).str.slice(6,8).astype(int)
    df9['sdatetime'] = df9.apply(lambda r:datetime.datetime(r.syear,r.smon,r.sday),axis=1)
    df9['edatetime'] = df9.apply(lambda r:datetime.datetime(r.eyear,r.emon,r.eday),axis=1)
    df9['dte'] = df9.edatetime - df9.sdatetime
    df9.dte = df9.dte.dt.days
    df9 = df9[['symbol','settle_date','pc','contract_num','strike','close_x','close_y','dte']]
    df10 = df9.iloc[:len(df9)].copy()
    df10.index = list(range(len(df10)))
    if USE_PYVOL:
        df10['iv'] = df10.apply(lam_pyvol,axis=1)
    else:
        n = 100
        for i in tqdm_notebook(np.arange(0,len(df10)-n,n)):
                df10.loc[i:i+n,'iv'] = df10.loc[i:i+n].apply(lam_mibian,axis=1)
        print(f'doing remaining {datetime.datetime.now()}')
        i = df10[df10.iv.isna()].index[0]
        df10.loc[i:,'iv'] = df10.loc[i:].apply(lam_mbian,axis=1)
        print(f'done with remaining {datetime.datetime.now()}')
    return df10



In [54]:
def get_even_moneyness_strikes(df10):
    # define amounts around the money which will help create strikes to add
    moneyness = np.arange(.7,1.4,.05).round(6)
    # define columns on which to execute groupby
#     gb_cols = ['symbol','settle_date','pc','contract_num','dte','close_y']
    gb_cols = ['symbol','settle_date','contract_num','dte','close_y']
    # define function used in groupby.apply to create strikes and iv's at those strikes
    #   where the strikes are an even amount from the money 
    #   (like .7, .8, ... 1, 1.1, 1.2, etc)
    def _add_even_moneyness_strikes(df):
        # get underlying from first row (the groupby makes them all the same)
        r = df.iloc[0]
        underlying = r.close_y
        # create new rows to append to df, using only the gb_cols
        df_ret1 = df.iloc[:len(moneyness)][gb_cols].copy()
        # add nan iv's !!!! MUST BE np.nan - NOT None
        df_ret1['iv'] = np.nan
        # add new strikes
        df_ret1['strike'] = moneyness * underlying
        # append the new strikes
        dfa = df.append(df_ret1,ignore_index=True,sort=True).copy()
        df_ret2 = dfa.sort_values(['symbol','settle_date','pc','strike'])
        df_ret2 = df_ret2.drop_duplicates(subset='strike')
        # set the index to the strike so that interpolate works
        df_ret2.index = df_ret2.strike
        # create interpolated iv's
        df_ret2['iv'] = df_ret2.iv.interpolate(method='polynomial', order=2)
        # reset the index
        df_ret2.index = list(range(len(df_ret2)))
        return df_ret2

    # start here
    df11 = df10.groupby(gb_cols).apply(_add_even_moneyness_strikes).copy()
    df11.index = list(range(len(df11)))
    df11['moneyness'] = df11.strike / df11.close_y
    df11.moneyness = df11.moneyness.round(4)

    df12 = df11[(df11.moneyness.isin(moneyness)) & (~df11.iv.isna())].copy()
    df12.moneyness  = df12.moneyness - 1
    df12.index = list(range(len(df12)))
    df12_atm = df12[df12.moneyness==0][['symbol','settle_date','pc','iv']]
    df12_atm = df12_atm.rename(columns={'iv':'atm_iv'})
    
    df12_atm = df12_atm.drop_duplicates()
    df12 = df12.merge(df12_atm,on=['symbol','settle_date','pc'],how='inner')
    df12.moneyness = df12.moneyness.round(4)
    df12['vol_skew'] = (df12.iv - df12.atm_iv).round(4)
    return df12



## END