##  Energy Spread Analysis

### Imports

In [1]:
import sys
import os
if  not './' in sys.path:
    sys.path.append('./')
if  not '../' in sys.path:
    sys.path.append('../')

from barchartacs import build_db
from barchartacs import db_info
import plotly.graph_objs as go
from plotly.offline import  init_notebook_mode, iplot
init_notebook_mode(connected=True)
import plotly.tools as tls
from plotly.graph_objs.layout import Font,Margin
from IPython import display

import pandas as pd
import numpy as np
import datetime
import pytz
import io
from tqdm import tqdm,tqdm_notebook
from barchartacs import pg_pandas as pg
import traceback
import pandas_datareader.data as pdr
from pandas.tseries.holiday import USFederalHolidayCalendar
BUS_DAY_US = pd.offsets.CustomBusinessDay(calendar=USFederalHolidayCalendar())
TIMEZONE = 'US/Eastern'


### Run this cell if you want to suppress warnings.  This is useful if the warnings print local information that you don't want to show the public but you still want to save this notebook with its output.

In [2]:
import warnings
warnings.filterwarnings("ignore")

### Instantiate an instance of pg_pandas

In [3]:
pga = db_info.get_db_info()
opttab = 'sec_schema.options_table'
futtab = 'sec_schema.underlying_table'


  sec_db


### general second order linear regression

In [4]:
def regress_2nd(x,y):
    x1 = np.array([[1 for _ in  range(len(x))],x,x**2]).T
    xm = np.matrix(x1)
    xmt = xm.T
    rgr = (np.linalg.inv(xmt @ xm) @ xmt @ np.matrix(y).T)
    
    return {f'b{i}':np.asarray(rgr[i]).reshape(-1)[0] for i in range(len(rgr))}


### Define a generalized plotly chart method

In [5]:
def plotly_plot(df_in,x_column,plot_title=None,
                y_left_label=None,y_right_label=None,
                bar_plot=False,figsize=(16,10),
                number_of_ticks_display=20,
                yaxis2_cols=None):
    ya2c = [] if yaxis2_cols is None else yaxis2_cols
    ycols = [c for c in df_in.columns.values if c != x_column]
    # create tdvals, which will have x axis labels
    td = list(df_in[x_column]) 
    nt = len(df_in)-1 if number_of_ticks_display > len(df_in) else number_of_ticks_display
    spacing = len(td)//nt
    tdvals = td[::spacing]
    
    # create data for graph
    data = []
    # iterate through all ycols to append to data that gets passed to go.Figure
    for ycol in ycols:
        if bar_plot:
            b = go.Bar(x=td,y=df_in[ycol],name=ycol,yaxis='y' if ycol not in ya2c else 'y2')
        else:
            b = go.Scatter(x=td,y=df_in[ycol],name=ycol,yaxis='y' if ycol not in ya2c else 'y2')
        data.append(b)

    # create a layout
    layout = go.Layout(
        title=plot_title,
        xaxis=dict(
            ticktext=tdvals,
            tickvals=tdvals,
            tickangle=45,
            type='category'),
        yaxis=dict(
            title='y main' if y_left_label is None else y_left_label
        ),
        yaxis2=dict(
            title='y alt' if y_right_label is None else y_right_label,
            overlaying='y',
            side='right'),
        margin=Margin(
            b=100
        )        
    )

    fig = go.Figure(data=data,layout=layout)
    return fig


### Define some Date conversion methods

In [6]:
def yyyymmdd_to_datetime(yyyymmdd,tzone = 'US/Eastern'):
    tz = pytz.timezone(tzone)
    return datetime.datetime(int(str(yyyymmdd)[0:4]),int(str(yyyymmdd)[4:6]),int(str(yyyymmdd)[6:8]),tzinfo=tz)

def datetime_to_yyyymmdd(dt):
    y = int(dt.year)
    m = int(dt.month)
    d = int(dt.day)
    return y*100*100 + m*100 + d

ymd = yyyymmdd_to_datetime(20180403)
dt_ymd = datetime_to_yyyymmdd(ymd)
print(ymd,dt_ymd)

2018-04-03 00:00:00-04:56 20180403


### Define ```CommoditySpreadInfo```, a class that helps visualized changes in backwardation and contango

In [7]:
class CommoditySpreadInfo():
    def __init__(self,commod,num_contracts):
        self.commod = commod
        self.num_contracts = num_contracts

    def consecutive_contracts(self,first_settle_date_yyyymmdd):
        sql = f"""
        select distinct symbol from {futtab} 
        where settle_date>={first_settle_date_yyyymmdd} and 
        symbol ~* '^{self.commod}';
        """
        symbols = pga.get_sql(sql).symbol.values
        symbols_ppyym = sorted([f'{c[:(len(c)-3)]}' + f'{c[-2:]}' + f'{c[2]}' for c in symbols])[:self.num_contracts]
        symbols_ppmyy = [f'{c[:(len(c)-3)]}' + f'{c[-1]}' + f'{c[-3:-1]}' for c in symbols_ppyym]
        symbol_dict = {symbols_ppmyy[i]:{'index':i,'ppyym':symbols_ppyym[i]} for i in range(len(symbols_ppmyy))}
        return symbol_dict    
    
    def create_spread_matrix(self,calendar_days_forward,first_settle_date_yyyymmdd,price_col='close'):
        # Step 01: generate yyyymmdd's
        yyyymmdds = []
        front_contracts = []
        price_arrays = []
        current_dt = yyyymmdd_to_datetime(first_settle_date_yyyymmdd)
        end_yyyymmdd = datetime_to_yyyymmdd(current_dt + datetime.timedelta(calendar_days_forward))

        sql = f"""
        select symbol,settle_date,{price_col} from {futtab} where symbol ~* '^{self.commod}' and
         settle_date >= {first_settle_date_yyyymmdd} and settle_date <= {end_yyyymmdd};
        """
        df_all_days = pga.get_sql(sql)
        all_days = sorted(df_all_days.settle_date.unique())
    
        for i in range(len(all_days)):
            current_yyyymmdd = all_days[i]
            df_prices_per_contract_this_day = df_all_days[df_all_days.settle_date==current_yyyymmdd]
            symbols = df_prices_per_contract_this_day.symbol.unique()
            symbols_ppyym = sorted([f'{c[:(len(c)-3)]}' + f'{c[-2:]}' + f'{c[2]}' for c in symbols])[:self.num_contracts]
            symbols_sorted_by_date = [f'{c[:(len(c)-3)]}' + f'{c[-1]}' + f'{c[-3:-1]}' for c in symbols_ppyym]
            df_prices_per_contract_this_day = df_prices_per_contract_this_day[df_prices_per_contract_this_day.symbol.isin(symbols_sorted_by_date)]
            front_symbol = symbols_sorted_by_date[0]
            front_contracts.append(front_symbol)
            price_array = df_prices_per_contract_this_day[price_col].values
            price_arrays.append(price_array)
            yyyymmdds.append(current_yyyymmdd)
#             current_dt = current_dt + 1*BUS_DAY_US
        d = {'yyyymmdd':yyyymmdds,'front':front_contracts}
        m = np.array(price_arrays).T
        for i in range(len(m)):
            d[i+1] = m[i].tolist()
        df = pd.DataFrame(d)
        return df                    

ncontracts = 12
csi = CommoditySpreadInfo('CL',ncontracts)
syms = list(csi.consecutive_contracts(20180401).keys())

beg_yyyymmdd = 20140101
num_bdays_forward = 252*4
beg_dt = yyyymmdd_to_datetime(beg_yyyymmdd)
cdays = ((beg_dt + num_bdays_forward * BUS_DAY_US) - beg_dt).days

df_csm_close = csi.create_spread_matrix(cdays,beg_yyyymmdd)
df_csm_high = csi.create_spread_matrix(cdays,beg_yyyymmdd,price_col='high')



In [8]:
def price_to_percent(df_csm):
    # turn prices of forward months into percentages of first month
    df_csm2 = df_csm.copy()
    # price cols, which are just numbers relating the each symbols position 
    #  in the forward curve, where 1 = price of front month
    price_cols = [c for c in df_csm2 if c in list(range(num_bdays_forward*2))]
    df_csm2[price_cols] = df_csm2[price_cols].apply(lambda r:pd.Series({i+1:r[i+1]/r[1] for i in range(len(r))}),axis=1)
    lambda_regress = lambda r: pd.Series(regress_2nd(np.array(price_cols),np.array([r[i] for i in price_cols])))
    df_csm2[['b0','b1','b2']] = df_csm2[price_cols].apply(lambda_regress,axis=1)
    return df_csm2


df_csm_close_percent = price_to_percent(df_csm_close)
price_cols = [c for c in df_csm_close if c in list(range(num_bdays_forward*2))]

new_cols = df_csm_close_percent[['yyyymmdd','front']].apply(lambda r:f'{r.front}_{str(r.yyyymmdd)}',axis=1)
df_skew_per_day = pd.DataFrame(df_csm_close_percent[price_cols].as_matrix().T,columns=new_cols)

graph_group_size = 25
num_groups = int(len(new_cols)/graph_group_size) + (1 if (len(new_cols) % graph_group_size) > 0 else 0)

for i in range(num_groups):
    beg_index = i * graph_group_size
    end_index = min(len(new_cols)+1,beg_index + graph_group_size)
    curr_cols = new_cols[beg_index:end_index]
    df_temp = df_skew_per_day[curr_cols]
    df_temp['x'] = df_temp.index.values
    iplot(plotly_plot(df_temp,x_column='x'))
                          

### display the *```contango_percent```* historically, which is the percent difference between the front contract and the back contract

In [9]:
df_temp = df_csm_close_percent[['yyyymmdd','front',1,12]].copy()
df_temp['x'] = df_temp.apply(lambda r: f'{r.front}_{r.yyyymmdd}',axis=1)
df_temp['contango_percent'] = df_temp.apply(lambda r: r[12] - r[1],axis=1)
df_temp['front_close'] = df_csm_close[1].values
iplot(plotly_plot(df_temp[['x','front_close','contango_percent']],x_column='x',yaxis2_cols=['contango_percent']))


### Display the *```b1 coefficient```*, which discribes the average slope of the forward curve.  This coefficient changes very similarly to changes in the ```*coefficient percent```* above.

In [10]:
df_temp = df_csm_close_percent[['yyyymmdd','front','b1']].copy()
df_temp['x'] = df_temp.apply(lambda r: f'{r.front}_{r.yyyymmdd}',axis=1)
df_temp['front_close'] = df_csm_close[1].values
iplot(plotly_plot(df_temp[['x','front_close','b1']],x_column='x',yaxis2_cols=['b1']))


### Display the **b2 coefficient** which discribes the curvature of the forward curve

In [11]:
df_temp = df_csm_close_percent[['yyyymmdd','front','b2']].copy()
df_temp['x'] = df_temp.apply(lambda r: f'{r.front}_{r.yyyymmdd}',axis=1)
# df_temp['contango_coefficent'] = df_temp.apply(lambda r: (r.b1 + r.b2)*1000,axis=1)
b2_magnify = 1000 #df_temp.iloc[0].b1/df_temp.iloc[0].b2
df_temp.b2 = df_temp.b2.apply(lambda v: v*b2_magnify)
df_temp['front_close'] = df_csm_close[1].values
# iplot(plotly_plot(df_temp[['x','front_close','contango_coefficent']],x_column='x',yaxis2_cols=['contango_coefficent']))
iplot(plotly_plot(df_temp[['x','front_close','b2']],x_column='x',yaxis2_cols=['b2']))
