In [None]:
import warnings
warnings.filterwarnings("ignore")

## Research various skew related topics
* **This requires plotly 4.0 or greater** *

### Analyse options volatility in futures contract, creating graphs that show vertical skew historically over time, and also show skew vs futures prices over time


In [None]:
import zipfile
import glob
import pandas as pd
import numpy as np

from argparse import ArgumentParser
from argparse import RawDescriptionHelpFormatter
import sys
import os
if  not './' in sys.path:
    sys.path.append('./')
if  not '../' in sys.path:
    sys.path.append('../')

from barchartacs import build_db
from barchartacs import db_info
import cme_expirations as cmeexp
import plotly.graph_objs as go
from plotly.offline import  init_notebook_mode, iplot
init_notebook_mode(connected=True)
import plotly.tools as tls
from plotly.subplots import make_subplots
from plotly.graph_objs.layout import Font,Margin
from IPython import display

import datetime
import io
from tqdm import tqdm,tqdm_notebook
from barchartacs import pg_pandas as pg
import mibian
import py_vollib
import importlib
from py_vollib import black
from py_vollib.black import implied_volatility
import ipdb
import traceback
import pandas_datareader.data as pdr


### important global variables

In [None]:
#bash barchartacs_step_03_specify_months.sh 'sep' 2020 2020 True local

In [None]:

DEBUG_IT=False
opttab = 'sec_schema.options_table'
futtab = 'sec_schema.underlying_table'
pga = db_info.get_db_info()


In [None]:
sql = f"""
select * from {futtab} where symbol='CLF21' and settle_date>='20200101'
"""
df_clf21 = pga.get_sql(sql)
df_clf21[(df_clf21.settle_date>=20200801) & (df_clf21.settle_date<=20201001)]

In [None]:
sql = f"""
select * from {opttab} where symbol='CLF21' and settle_date>='20200101' 
and strike = 50.0 and pc='P'
"""
df_clf21 = pga.get_sql(sql)
df_clf21[(df_clf21.settle_date>=20200801) & (df_clf21.settle_date<=20201001)]

## Define methods to graph skew, and create subplots with separate legends.  

#### This only works with plotly 4..0 and above 

In [None]:
def dt_to_yyyymmdd(datetime_value):
    '''
    convert datetime.datetime object to integer yyyymmdd, 
       like datetime.datetime(2020,11,16) to 20201116
    '''
    y = int(datetime_value.year)
    m = int(datetime_value.month)
    d = int(datetime_value.day)
    return y*100*100 + m*100 + d

def yyyymmdd_to_dt(yyyymmdd):
    '''
    convert integer (or str) of yyyymmdd to a datetime.dateime object
      like 20201116 to datetime.datetime(2020,11,16)
    (The new datetime object will be Timezone naive)
    '''
    y = int(str(yyyymmdd)[0:4])
    m = int(str(yyyymmdd)[4:6])
    d = int(str(yyyymmdd)[6:8])
    return datetime.datetime(y,m,d)

def yyyymmdd_diff(yyyymmdd_low,yyyymmdd_high):
    '''
    Subtract to yyyymmdd dates
    '''
    dt_low = yyyymmdd_to_dt(yyyymmdd_low)
    dt_high = yyyymmdd_to_dt(yyyymmdd_high)
    return (dt_high-dt_low).days

def sub_days_from_yyyymmdd(yyyymmdd,days):
    '''
    Subtract days from a yyyymmdd date
    '''
    d = yyyymmdd_to_dt(yyyymmdd)
    d2 = d - datetime.timedelta(days)
    return dt_to_yyyymmdd(d2)

def yyyymmdd_dayofweek(yyyymmdd):
    '''
    Get the day of week of a yyyymdd daste
    '''
    return yyyymmdd_to_dt(yyyymmdd).weekday()

In [None]:
def figure_crosshairs(fig):
    ''' add crosshairs to plotly_plot figure
    '''
    fig['layout'].hovermode='x'
    fig['layout'].yaxis.showspikes=True
    fig['layout'].xaxis.showspikes=True
    fig['layout'].yaxis.spikemode="toaxis+across"
    fig['layout'].xaxis.spikemode="toaxis+across"
    fig['layout'].yaxis.spikedash="solid"
    fig['layout'].xaxis.spikedash="solid"
    fig['layout'].yaxis.spikethickness=1
    fig['layout'].xaxis.spikethickness=1
    fig['layout'].spikedistance=1000
    return fig


def plotly_plot(df_in,x_column,plot_title=None,
                y_left_label=None,y_right_label=None,
                bar_plot=False,width=800,height=400,
                number_of_ticks_display=20,
                yaxis2_cols=None,
                x_value_labels=None,
                modebar_orientation='v',modebar_color='grey',
                legend_x=None,legend_y=None,
                title_y_pos = 0.9,
                title_x_pos = 0.5):
    '''
    Plot and x/y graph
    '''
    
    ya2c = [] if yaxis2_cols is None else yaxis2_cols
    ycols = [c for c in df_in.columns.values if c != x_column]
    # create tdvals, which will have x axis labels
    td = list(df_in[x_column]) 
    nt = len(df_in)-1 if number_of_ticks_display > len(df_in) else number_of_ticks_display
    spacing = len(td)//nt
    tdvals = td[::spacing]
    tdtext = tdvals
    if x_value_labels is not None:
        tdtext = [x_value_labels[i] for i in tdvals]
    
    # create data for graph
    data = []
    # iterate through all ycols to append to data that gets passed to go.Figure
    for ycol in ycols:
        if bar_plot:
            b = go.Bar(x=td,y=df_in[ycol],name=ycol,yaxis='y' if ycol not in ya2c else 'y2')
        else:
            b = go.Scatter(x=td,y=df_in[ycol],name=ycol,yaxis='y' if ycol not in ya2c else 'y2')
        data.append(b)

    # create a layout

    layout = go.Layout(
        title=plot_title,
        xaxis=dict(
            ticktext=tdtext,
            tickvals=tdvals,
            tickangle=45,
            type='category'),
        yaxis=dict(
            title='y main' if y_left_label is None else y_left_label
        ),
        yaxis2=dict(
            title='y alt' if y_right_label is None else y_right_label,
            overlaying='y',
            side='right'),
        autosize=True,
#         autosize=False,
#         width=width,
#         height=height,
        margin=Margin(
            b=100
        ),
        modebar={'orientation': modebar_orientation,'bgcolor':modebar_color}
    )

    fig = go.Figure(data=data,layout=layout)
    fig.update_layout(
        title={
            'text': plot_title,
            'y':title_y_pos,
            'x':title_x_pos,
            'xanchor': 'center',
            'yanchor': 'top'})
    if (legend_x is not None) and (legend_y is not None):
        fig.update_layout(legend=dict(x=legend_x, y=legend_y))
    fig = figure_crosshairs(fig)
    return fig


def plotly_shaded_rectangles(beg_end_date_tuple_list,fig):
    '''
    Add shaded rectanges that highlight parts of an x/y graph
    '''
    ld_shapes = []
    for beg_end_date_tuple in beg_end_date_tuple_list:
        ld_beg = beg_end_date_tuple[0]
        ld_end = beg_end_date_tuple[1]
        ld_shape = dict(
            type="rect",
            # x-reference is assigned to the x-values
            xref="x",
            # y-reference is assigned to the plot paper [0,1]
            yref="paper",
            x0=ld_beg[i],
            y0=0,
            x1=ld_end[i],
            y1=1,
            fillcolor="LightSalmon",
            opacity=0.5,
            layer="below",
            line_width=0,
        )
        ld_shapes.append(ld_shape)

    fig.update_layout(shapes=ld_shapes)
    return fig

In [None]:
xvals = list(range(1,101))
y1vals = np.arange(101,201,1)
y2vals = np.arange(11,1,-.1)
y3vals = y2vals * .5
             
df = pd.DataFrame({'my_x_vals':xvals, 
                    'yvals_1':y1vals,
                    'yvals_2':y2vals,
                    'yvals_3':y3vals
                  })
fig = plotly_plot(df_in=df,x_column='my_x_vals',
                    plot_title = "example graph",
                    y_left_label='main y vals',
                    y_right_label='alt y vals',
                    yaxis2_cols = ['yvals_2','yvals_3'],
                    number_of_ticks_display=25)
iplot(fig)



In [None]:
def create_skew_per_date_df(df):
    '''
    Find the first settle_date whose count of rows is equal to max count of rows.
    '''
    # get the first symbol (which should be the only symbol)
    contract = df.symbol.unique()[0]
    # get just that symbol's data
    df12 = df[df.symbol==contract]
    df_counts = df12[['settle_date','moneyness']].groupby('settle_date',as_index=False).count()
    max_count = df_counts.moneyness.max()
    first_max_count_settle_date = df_counts[df_counts.moneyness==max_count].iloc[0].settle_date
    
    df_ret = df12[df12.settle_date==first_max_count_settle_date][['moneyness']]
    all_settle_dates = sorted(df_counts.settle_date.unique())
    for settle_date in all_settle_dates:
        df_temp = df12[df12.settle_date==settle_date][['moneyness','vol_skew']]
        df_ret = df_ret.merge(df_temp,on='moneyness',how='outer')
        df_ret = df_ret.rename(columns={'vol_skew':str(settle_date)})
    df_ret = df_ret.sort_values('moneyness')
    df_ret.moneyness = df_ret.moneyness.round(4)
    return df_ret


In [None]:
def graph_skew(df_iv_final,do_plot=False):
    '''
    Graph skew for ONLY ONE symbol.
    If df_iv_final contains more than one symbol, we will only graph the first symbol in the DataFrames    
    '''
    # get the first symbol (which should be the only symbol)
    contract = df_iv_final.symbol.unique()[0]
    # get just that symbol's data and only days that have sufficient skew data
    dft = df_iv_final[df_iv_final.symbol==contract]
    dft_count = dft[['settle_date','symbol']].groupby('settle_date',as_index=False).count()
    valid_settle_dates = dft_count[dft_count.symbol>2].settle_date.unique()
    dft = dft[dft.settle_date.isin(valid_settle_dates)]
    dfp = create_skew_per_date_df(dft)
    
    settle_dates = sorted([c for c in dfp.columns.values if c != 'moneyness'])
    splits = list(np.arange(5,len(settle_dates),5))
    settle_date_groups = np.split(np.array(settle_dates),splits)
    ret_figs = []
    for sdg in settle_date_groups:
        sdg_sorted = [str(c) for c in sorted(sdg)]
        cols = ['moneyness']+list(sdg_sorted)
        dfp_sub = dfp[cols]
        t = f'{contract} {sdg[0]} - {sdg[-1]}' 
        f = plotly_plot(dfp_sub,x_column='moneyness',plot_title=t,y_left_label='vol skew')
        ret_figs.append(f)
        if do_plot:
            iplot(f)
    return ret_figs

#### Do the same plots as above, but using a grid

In [None]:

def graph_skew_subplot_quad(fig_group,rows=1,cols=2):
    '''
    Use subplots to output the results of the method graph_skew above
    '''
    f1 = make_subplots(rows=rows, cols=cols,  
        shared_yaxes=False,
        shared_xaxes=False,                       
        subplot_titles=[fig_group[i]['layout'].title.text for i in range(len(fig_group))],
        horizontal_spacing=0.05,
        vertical_spacing=0.11,                       
        print_grid=False)

    pl_width=450*cols 
    pl_height=400*rows
    title = 'Skew plots<br>'

    f1.update_layout(title=title,                                 
        font= Font(family="Open Sans, sans-serif"),
        showlegend=True,     
        hovermode='x',  
        autosize=True,       
        width=pl_width,       
        height=pl_height,
        plot_bgcolor='#EFECEA', 
        bargap=0.05,
        margin=Margin(
                      l=5,
                      r=5,
                      b=55,
                      t=50
        )
    )    

    for i in range(len(fig_group)):
        x = int(i/2) + 1
        y = i % 2 + 1
        f = fig_group[i]
        l = f.layout
        if y > 1:
            l.yaxis.title=''
        try:
            yaxis = f'yaxis{i+1}'
            xaxis = f'xaxis{i+1}'
            if i < 10:
                yaxis = yaxis.replace('1','') 
                xaxis = xaxis.replace('1','') 
            gname = f'{x,y}'#rfs[i]['layout'].title
            for d in f.data:
                data_y = f'y{i+1}'.replace('1','') 
                d['yaxis']=data_y
                d['legendgroup'] =  gname
                d['name'] = f"{d.name}"
                f1.add_trace(d,x,y)
                f1.update_xaxes(patch=l.xaxis,row=x,col=y)
                f1.update_yaxes(patch=l.yaxis,row=x,col=y)                
        except Exception as e:
            print(f'graph_skew_subplots ERRORS: {str(e)}')
    return f1


def graph_skew_subplots(df,rows=2,cols=2):
    fig_list = graph_skew(df)
    n = rows*cols   
    # using list comprehension 
    fig_groups = [fig_list[i*n:(i + 1)*n] for i in range((len(fig_list) + n - 1) // n )]  
    for fig_group in fig_groups:
        iplot(graph_skew_subplot_quad(fig_group,rows=rows,cols=cols))


### Graph skew changes historically, per day

In [None]:
SYMBOL_TO_RESEARCH = 'CL'
df_iv_final = pd.read_csv(f'./temp_folder/df_iv_final_{SYMBOL_TO_RESEARCH}.csv')
df_iv_skew = pd.read_csv(f'./temp_folder/df_iv_skew_{SYMBOL_TO_RESEARCH}.csv')
# all_contracts = pga.get_sql(f"select symbol from {opttab} where substring(symbol,1,2)='{SYMBOL_TO_RESEARCH}'").symbol.unique()
all_contracts = df_iv_final[df_iv_final.symbol.str.slice(0,2)==SYMBOL_TO_RESEARCH].symbol.unique()
all_contracts 

In [None]:
grid_plot=True
clist = [c for c in all_contracts if (c[:2]==f'{SYMBOL_TO_RESEARCH}') & (c[-2:]=='21')]
volcols = ['symbol','settle_date','moneyness','vol_skew']
dfivf = df_iv_final[volcols].groupby(['symbol','settle_date','moneyness'],as_index=False).mean()

for c in clist:
    dft = dfivf[dfivf.symbol==c]
    if len(dft)<=0:
        print(f'no data for symbol {c}')
        continue
    if grid_plot:
        graph_skew_subplots(dfivf[dfivf.symbol==c],rows=1,cols=2)
    else:
        rls = graph_skew(dfivf[dfivf.symbol==c],do_plot=True)
        

### Use the implied vol skew dataframes to graph changes in vol skew vs price

In [None]:
y = 20
syms = [s for s in all_contracts if  (int(s[-2:]) >= y)]

for sym in syms:
    df_fut = pga.get_sql(f"select symbol,settle_date,close from {futtab} where symbol = '{sym}'")
    df_year = df_iv_skew[df_iv_skew.symbol==sym]
    df_year['2dn_2up'] = df_year['-0.2'] - df_year['0.2']
    df_year = df_year[['settle_date','2dn_2up']]
    df_year = df_year.merge(df_fut[['settle_date','close']],on='settle_date',how='inner')
    iplot(plotly_plot(df_year,x_column='settle_date',
            yaxis2_cols=['close'],y_left_label='skew',y_right_label=sym,
                     plot_title=f'{sym} close vs skew -0.2, 0.2',
                     number_of_ticks_display=15))
    

In [None]:
y = 20
syms = [s for s in all_contracts if  (int(s[-2:]) >= y)]

for sym in syms:
    df_fut = pga.get_sql(f"select symbol,settle_date,close from {futtab} where symbol = '{sym}'")
    df_year = df_iv_final[['settle_date','atm_iv']][df_iv_final.symbol==sym]
    df_year = df_year.groupby(['settle_date'],as_index=False).mean()
    df_year.atm_iv = df_year.atm_iv.round(4)
    df_year = df_year.merge(df_fut[['settle_date','close']],on='settle_date',how='inner')
    iplot(plotly_plot(df_year,x_column='settle_date',
            yaxis2_cols=['close'],y_left_label='atm_iv',y_right_label=sym,
                     plot_title=f'{sym} close vs atm implied vol',
                     number_of_ticks_display=15))
    

### Create a constant maturety contract, and show it's skew over time

In [None]:
y = 20
start_yyyymmdd=20200101
syms = [s for s in all_contracts if  (int(s[-2:]) >= y)]
df_all_years = None
for sym in syms:
    df_year = df_iv_skew[df_iv_skew.symbol==sym]
    df_year['2dn_2up'] = df_year['-0.2'] - df_year['0.2']
    df_year = df_year[['settle_date','2dn_2up']]
    if df_all_years is None:
        df_all_years = df_year.copy()
    else:
        df_all_years = df_all_years.append(df_year,ignore_index=True)

first_settle_date = df_all_years.settle_date.min()
last_settle_date = df_all_years.settle_date.max()

prod = syms[0][0:2]
sql_99 = f"""
select symbol,settle_date,close from {futtab} 
where symbol = '{prod}Z99' 
and settle_date>= {first_settle_date}
and settle_date<={last_settle_date} 
"""

df_fut = pga.get_sql(sql_99)
df_all_years = df_all_years.merge(df_fut,on='settle_date',how='inner')
df_all_years_save = df_all_years.copy()
# only look from 05/01/2020 on b/c of the vol spike in April and change column titles
df_all_years = df_all_years[df_all_years.settle_date>=start_yyyymmdd]
df_all_years = df_all_years.sort_values('settle_date')
df_all_years = df_all_years.rename(columns={'2dn_2up':'20% DN/UP','close':'CL Cash Price'})
df_all_years = df_all_years[['settle_date','20% DN/UP','CL Cash Price']]
years = df_all_years.settle_date.astype(str).str.slice(0,4)
months = df_all_years.settle_date.astype(str).str.slice(4,6)
days = df_all_years.settle_date.astype(str).str.slice(6,8)
df_all_years.settle_date = years + '-' + months + '-' + days
# create the figure
fig = plotly_plot(df_all_years,x_column='settle_date',
        yaxis2_cols=['CL Cash Price'],y_left_label='Skew Difference',y_right_label='CL Cash Price',
                 plot_title=f'CL Cash Close VS the Vol Skew Difference between the<br>20% OTM Put and 20% OTM Call',
                 number_of_ticks_display=15)
fig.update_layout(yaxis_tickformat = '.2%')
# display it
iplot(fig)
# write it to an html file
fig.write_html('../docs/cl_cash_vs_volskew.html')

In [None]:
def get_postgres_data(contract,pga):
    osql = f"select * from {opttab} where symbol='{contract}';"
    dfo = pga.get_sql(osql)
    usql = f"select * from {futtab} where symbol='{contract}';"
    dfu = pga.get_sql(usql)
    # Merge options and futures data
    df = dfo.merge(dfu,how='inner',on=['symbol','settle_date'])
    # Get options expiration dates
    df_expiry_dates = dfo[['symbol','settle_date']].groupby('symbol',as_index=False).max()
    df_expiry_dates.settle_date = df_expiry_dates.symbol.apply(lambda s:dt_to_yyyymmdd(cmeexp.get_expiry(s)))
    return df,df_expiry_dates

In [None]:
USE_PYVOL = True
def lam_pyvol(r):
    try:
        return implied_volatility.implied_volatility(r.close_x,r.close_y,r.strike,.02,r.dte/365, r.pc.lower())
    except:
        return -1
# lam_pyvol = lambda r:implied_volatility.implied_volatility(r.close_x,r.close_y,r.strike,.02,r.dte/365, r.pc.lower())
lam_mibian = lambda r:mibian.BS([r.close_y,r.strike,2,r.dte], callPrice=r.close_x).impliedVolatility

def get_implieds(df,df_expiry_dates,contract,contract_num=2):
    df2 = df[['symbol','contract_num','pc','settle_date','strike','close_x','close_y']]
    df2 = df2[(((df2.pc=='C' )& (df2.strike>=df2.close_y)) | ((df2.pc=='P' ) & (df2.strike<df2.close_y)))  & (df2.symbol.str.contains(contract))]
    if contract_num is not None:
        df2 = df2[df2.contract_num==contract_num]
    phigh = df2.close_y.max()
    plow = df2.close_y.min()
    high_strike = round(phigh * 1.3)
    low_strike = round(plow * .7)
    df2 = df2[(df2.strike>=low_strike) & (df2.strike<=high_strike)]

    df9 = df2[df2.symbol==contract]
    df9 = df9.merge(df_expiry_dates.rename(columns={'settle_date':'expiry'}),on='symbol',how='inner')
    df9['syear'] = df9.settle_date.astype(str).str.slice(0,4).astype(int)
    df9['smon'] = df9.settle_date.astype(str).str.slice(4,6).astype(int)
    df9['sday'] = df9.settle_date.astype(str).str.slice(6,8).astype(int)
    df9['eyear'] = df9.expiry.astype(str).str.slice(0,4).astype(int)
    df9['emon'] = df9.expiry.astype(str).str.slice(4,6).astype(int)
    df9['eday'] = df9.expiry.astype(str).str.slice(6,8).astype(int)
    df9['sdatetime'] = df9.apply(lambda r:datetime.datetime(r.syear,r.smon,r.sday),axis=1)
    df9['edatetime'] = df9.apply(lambda r:datetime.datetime(r.eyear,r.emon,r.eday),axis=1)
    df9['dte'] = df9.edatetime - df9.sdatetime
    df9.dte = df9.dte.dt.days
    df9 = df9[['symbol','settle_date','pc','contract_num','strike','close_x','close_y','dte']]
    df10 = df9.iloc[:len(df9)].copy()
    df10.index = list(range(len(df10)))
    if USE_PYVOL:
        df10['iv'] = df10.apply(lam_pyvol,axis=1)
    else:
        n = 100
        for i in tqdm_notebook(np.arange(0,len(df10)-n,n)):
                df10.loc[i:i+n,'iv'] = df10.loc[i:i+n].apply(lam_mibian,axis=1)
        print(f'doing remaining {datetime.datetime.now()}')
        i = df10[df10.iv.isna()].index[0]
        df10.loc[i:,'iv'] = df10.loc[i:].apply(lam_mbian,axis=1)
        print(f'done with remaining {datetime.datetime.now()}')
    return df10



In [None]:
def get_even_moneyness_strikes(df10):
    # define amounts around the money which will help create strikes to add
    moneyness = np.arange(.7,1.4,.05).round(6)
    # define columns on which to execute groupby
#     gb_cols = ['symbol','settle_date','pc','contract_num','dte','close_y']
    gb_cols = ['symbol','settle_date','contract_num','dte','close_y']
    # define function used in groupby.apply to create strikes and iv's at those strikes
    #   where the strikes are an even amount from the money 
    #   (like .7, .8, ... 1, 1.1, 1.2, etc)
    def _add_even_moneyness_strikes(df):
        # get underlying from first row (the groupby makes them all the same)
        r = df.iloc[0]
        underlying = r.close_y
        # create new rows to append to df, using only the gb_cols
        df_ret1 = df.iloc[:len(moneyness)][gb_cols].copy()
        # add nan iv's !!!! MUST BE np.nan - NOT None
        df_ret1['iv'] = np.nan
        # add new strikes
        df_ret1['strike'] = moneyness * underlying
        # append the new strikes
        dfa = df.append(df_ret1,ignore_index=True,sort=True).copy()
        df_ret2 = dfa.sort_values(['symbol','settle_date','pc','strike'])
        df_ret2 = df_ret2.drop_duplicates(subset='strike')
        # set the index to the strike so that interpolate works
        df_ret2.index = df_ret2.strike
        # create interpolated iv's
        df_ret2['iv'] = df_ret2.iv.interpolate(method='polynomial', order=2)
        # reset the index
        df_ret2.index = list(range(len(df_ret2)))
        return df_ret2

    # start here
    df11 = df10.groupby(gb_cols).apply(_add_even_moneyness_strikes).copy()
    df11.index = list(range(len(df11)))
    df11['moneyness'] = df11.strike / df11.close_y
    df11.moneyness = df11.moneyness.round(4)

    df12 = df11[(df11.moneyness.isin(moneyness)) & (~df11.iv.isna())].copy()
    df12.moneyness  = df12.moneyness - 1
    df12.index = list(range(len(df12)))
    df12_atm = df12[df12.moneyness==0][['symbol','settle_date','pc','iv']]
    df12_atm = df12_atm.rename(columns={'iv':'atm_iv'})
    
    df12_atm = df12_atm.drop_duplicates()
    df12 = df12.merge(df12_atm,on=['symbol','settle_date','pc'],how='inner')
    df12.moneyness = df12.moneyness.round(4)
    df12['vol_skew'] = (df12.iv - df12.atm_iv).round(4)
    return df12



In [None]:
import pdb

In [None]:
def skew_per_symbol(symbol,pga,contract_num=2,strike_divisor=None):
    '''
    For a symbol like CLM16 or EZH19, create 2 Dataframes
      1. df_iv - contains rows of implied vols, for only the 'pseudo' strikes that are an even
                 percent away from the money for each settle_date
      2. df_skew - contains one row per day of skew data of for 'pseudo' strikes that are an even
                 percent away from the money for each settle_date
    '''
    _exception = None
    _stacktrace = None
    df_iv = None
    df_skew = None
    try:
        df,df_expiry_dates = get_postgres_data(symbol,pga)
        if (contract_num is None) or len(df[df.contract_num==contract_num])>0:
            df10 = get_implieds(df,df_expiry_dates,symbol,contract_num=contract_num)
            df12 = get_even_moneyness_strikes(df10)
            df_sk = create_skew_per_date_df(df12)
            df_sk.index = list(range(len(df_sk)))
            df_skt = df_sk.T
            df_skt.columns = df_skt.loc['moneyness']
            df_skt = df_skt.iloc[1:].copy()
            df_skt['symbol'] = symbol
            df_skt['settle_date'] = df_skt.index
            df_iv = df12.copy() 
            df_skew = df_skt.copy()
    except Exception as e:
        _exception = str(e)
        _stacktrace = traceback.format_exc()
    return df_iv,df_skew,_exception,_stacktrace




def cash_futures_to_csv(sym,pga):
    cash_sql = f"select * from sec_schema.underlying_table where symbol='{sym}Z99';"
    df_cash_futures = pga.get_sql(cash_sql)
    print(len(df_cash_futures))
    df_cash_futures.to_csv(f'./temp_folder/df_cash_futures_{sym}.csv',index=False)
    return df_cash_futures


### Use the methods above to compute volskew info and charts for one CL contract

In [None]:
contract='CLG21'
contract_num=3
df_iv,df_skew,_exception,_stacktrace = skew_per_symbol('CLG21',pga,contract_num=contract_num,strike_divisor=None)

In [None]:
print(_stacktrace)

In [None]:
moneyness = .2
df_iv_neg20 = df_iv[df_iv.moneyness==-moneyness][['settle_date','iv','strike','atm_iv']]
df_iv_neg20 = df_iv_neg20.rename(columns={'iv':'ivneg20','strike':'strikeneg20'})
df_iv_pos20 = df_iv[df_iv.moneyness==moneyness][['settle_date','iv','strike']]
df_iv_pos20 = df_iv_pos20.rename(columns={'iv':'ivpos20','strike':'strikepos20'})
df_iv_20 = df_iv_pos20.merge(df_iv_neg20,on='settle_date',how='inner')
df_cash = cash_futures_to_csv(contract[0:2],pga)
df_iv_20 = df_iv_20.merge(df_cash[['settle_date','close']], on='settle_date')
df_iv_20['20dn_20up'] = df_iv_20.ivneg20 - df_iv_20.ivpos20
df_iv_20['strike_diff'] = df_iv_20.strikeneg20 - df_iv_20.strikepos20
iplot(
    plotly_plot(
        df_iv_20[['settle_date','close','20dn_20up']],
        x_column='settle_date',
        yaxis2_cols=['close'],
        y_left_label="Vol Skew",
        y_right_label="Closing Price",
        plot_title="Close vs Skew"
    )
)

iplot(
    plotly_plot(
        df_iv_20[['settle_date','atm_iv','20dn_20up']],
        x_column='settle_date',
        yaxis2_cols=['atm_iv'],
        y_left_label="Vol Skew",
        y_right_label="ATM Vol",
        plot_title="ATM Vol vs Skew"        
    )
)             

df_iv_20['atm_iv_perc_chg'] = df_iv_20.atm_iv/df_iv_20.atm_iv.shift(1) - 1
df_iv_20['20dn_20up_perc_chg'] = df_iv_20['20dn_20up']/df_iv_20['20dn_20up'].shift(1) - 1
df_iv_20['perc_chg_diff'] = df_iv_20['atm_iv_perc_chg'] - df_iv_20['20dn_20up_perc_chg']

iplot(
    plotly_plot(
        df_iv_20[['settle_date','perc_chg_diff','atm_iv']],
        x_column='settle_date',
        yaxis2_cols=['atm_iv'],
        y_left_label="%Chg in ATM vol minus %Chg in Skew",
        y_right_label="ATM Vol",
        plot_title="ATM Vol vs Difference in % changes of Atm vol and Skew"        
    )
)             


In [None]:
iplot(plotly_plot(df_cash[df_cash.settle_date>=20190101][['settle_date','close']],x_column='settle_date'))

### Show average prices per year

In [None]:
years = range(2011,2021)
averages = [
    df_cash[(df_cash.settle_date>=int(y*100*100+101)) & (df_cash.settle_date<int((y+1)*100*100+101))].close.mean()
    for y in years
]
pd.DataFrame({'year':years,'average_price':averages})

### Track the historical path of a specific "close strike" option spread

In [None]:
symbol='CLQ20'
pc = 'P'
beg_yyyymmdd = 20200629
futsql = f"select * from {futtab} where symbol = '{symbol}' and settle_date={str(beg_yyyymmdd)}";
fut_close = pga.get_sql(futsql).close.values[0]
low_strike = int(fut_close/.5)*.5
high_strike = low_strike + .5
low_strike_close_sql = f"""
select close from {opttab} 
where symbol = '{symbol}' 
and settle_date={beg_yyyymmdd} 
and pc='{pc}'
and strike = {low_strike}
"""
low_strike_close = pga.get_sql(low_strike_close_sql).close.values[0]

high_strike_close_sql = f"""
select close from {opttab} 
where symbol = '{symbol}' 
and settle_date={beg_yyyymmdd} 
and pc='{pc}'
and strike = {high_strike}
"""
high_strike_close = pga.get_sql(high_strike_close_sql).close.values[0]

fut_close,low_strike,high_strike,fut_close,low_strike_close-high_strike_close

In [None]:
low_strike

In [None]:
low_strike_close_sql = f"""
select settle_date,close from {opttab} 
where symbol = '{symbol}' 
and settle_date>={beg_yyyymmdd} 
and pc='{pc}'
and strike = {low_strike}
"""
df_low_strike_close = pga.get_sql(low_strike_close_sql)
df_low_strike_close = df_low_strike_close.rename(columns={'close':f'call_{low_strike}'})

high_strike_close_sql = f"""
select settle_date,close from {opttab} 
where symbol = '{symbol}' 
and settle_date>={beg_yyyymmdd} 
and pc='{pc}'
and strike = {high_strike}
"""
df_high_strike_close = pga.get_sql(high_strike_close_sql)
df_high_strike_close = df_high_strike_close.rename(columns={'close':f'call_{high_strike}'})

df_call_spread = df_low_strike_close.merge(df_high_strike_close,on='settle_date',how='inner')
df_call_spread['spread'] = df_call_spread[f'call_{low_strike}'] - df_call_spread[f'call_{high_strike}']
df_call_spread = df_call_spread.merge(df_cash[['settle_date','close']],on='settle_date',how='inner')
                                                           
pga.get_sql(f"select settle_date,close from {opttab} where symbol='CLF21' and settle_date>=20200101")
iplot(
    plotly_plot(
        df_call_spread[['settle_date','close','spread']],
        x_column='settle_date',
        yaxis2_cols=['spread'],
        plot_title="Cash futures from 2020-01-02",
        y_left_label='CLY00 price',
        y_right_label=f'{low_strike}/{high_strike}_spread'
    )
)

### Read the CME's Volume report for APO options for a specific day to see which APOs trade.

In [None]:
import requests
from io import BytesIO
import pdb 
import xlrd
trade_date = 20201207
# apo_url = f"https://www.cmegroup.com/CmeWS/exp/voiProductDetailsViewExport.ctl?media=xls&tradeDate={trade_date}&reportType=F&productId=4707"
apo_url = f"https://www.cmegroup.com/CmeWS/exp/voiProductDetailsViewExport.ctl?media=xls&tradeDate={trade_date}&reportType=P&productId=4707"
r = requests.get( apo_url) # 
f = open('./temp_folder/cme_apo.xls','wb')
f.write(r.content)


In [None]:
book = xlrd.open_workbook('./temp_folder/cme_apo.xls')

In [None]:
first_sheet = book.sheet_by_index(0)
print(first_sheet.name,first_sheet.nrows)


In [None]:
beg_row = -1
for row in range(100):
    try:
        cells = first_sheet.row_slice(rowx=row)        
        cell0 = str(cells[0].value).lower()
        if "average" in cell0:
            beg_row = row
            break
    except Exception as e:
        print(e)

In [None]:
beg_row

In [None]:
valid_rows = {}
header = []
for row in tqdm_notebook(range(beg_row,first_sheet.nrows)):
    cells = first_sheet.row_slice(rowx=row)
    cell0 = str(cells[0].value).lower()
    if ("call" in cell0) or ("put" in cell0):
        new_contract = cell0
        valid_rows[new_contract] = []
        for data_row in range(row+1,10000):
            cells = first_sheet.row_slice(rowx=data_row)
            cell0 = str(cells[0].value).lower()
            if 'strike' in cell0:
                valid_rows[new_contract].append([c.value for c in cells])
                continue
            try:
                int(cell0)
                valid_rows[new_contract].append([c.value for c in cells])
            except:
                row = data_row
                break


In [None]:
df_all=None
for pc in valid_rows.keys():
    df = pd.DataFrame(valid_rows[pc][1:],columns=valid_rows[pc][0])
    df['contract'] = pc[0:6]
    df['pc'] = pc[7]
    if df_all is None:
        df_all = df.copy()
    else:
        df_all = df_all.append(df,ignore_index=True)
df_all = df_all.rename(columns = {c:c.replace(' ','_').lower() for c in df_all.columns.values})

In [None]:
df_all[df_all['total_volume'].astype(int)>0]

In [None]:
df_all2 = df_all[
    df_all['total_volume'].astype(int)>0
]
df_all2.total_volume = df_all.total_volume.astype(int)
df_all2[
    ['contract','strike','pc','total_volume']
].sort_values(['total_volume','contract','strike'],ascending=False).head(60)


In [None]:
a = 660*1000000
c = 15000
a/c-38000,c*10000


In [None]:
metals_etf_dfs = pd.read_html("https://etfdb.com/etfs/natural-resources/industrial-metals/")
for df in metals_etf_dfs:
    display.display(df)

## END