In [1]:
import pandas as pd
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot
from plotly.graph_objs import Scatter, Figure, Layout, Histogram, Heatmap
from plotly import tools
init_notebook_mode(connected=True)

from utils.plot_utils import plot, plot_subplot, data_from_table, engine, gen_datetime_col
# Grab our data again. 
df_query = data_from_table('cl_data', 'index')
df_query['datetime'] = gen_datetime_col(df_query, 'year', 'month', 'day')
df_query.head()

Unnamed: 0_level_0,open,high,low,close,volume,openint,contract_name,year,month,day,contract_symbol,contract_year,contract_month,datetime
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
0,29.01,29.56,29.01,29.4,949,470,CL1983-06,1983,3,30,CL,1983,6,1983-03-30
1,29.4,29.6,29.25,29.29,521,523,CL1983-06,1983,3,31,CL,1983,6,1983-03-31
2,29.3,29.7,29.29,29.44,156,583,CL1983-06,1983,4,4,CL,1983,6,1983-04-04
3,29.5,29.8,29.5,29.71,175,623,CL1983-06,1983,4,5,CL,1983,6,1983-04-05
4,29.9,29.92,29.65,29.9,392,640,CL1983-06,1983,4,6,CL,1983,6,1983-04-06


## Open Interest

Open interest acts like a weight for how much "interest" there is in a contract. This has some consequences:

1. Open Interest will start and end at "0"
2. Open Interest indicates amount of contracts that are held
3. Decreasing Open Interest indicates close-outs. 
4. Volume measures activity

According to investopedia (http://www.investopedia.com/articles/technical/02/112002.asp) an idea of how the market is doing can be measured with this chart:

| Price | Volume | Open Interest | Market |
| ----- | ------ | ------------- | ------ |
| Rising|   Up   | Up            | Strong |
| Rising|   Down | Down          | Weak   |
| Declining|Up   | Up            | Weak   |
| Declining|Down | Down          | Strong |

In [2]:
df = df_query[df_query['contract_name'] == 'CL2001-01']
plot(
    [
        Scatter(
            x=df['datetime'],
            y=df['openint'],
            mode='lines',
            name='Date vs Open Interest'),
        Scatter(
            x=df['datetime'],
            y=df['volume'],
            mode='lines',
            name='Date vs Volume')
    ],
    title='January 2001 Contract: date vs open interest and date vs volume')

There will probably be a time where we will want to know when the Open Interest will decrease for any given contract. In any case this will most likely indicate when contract holders were interested in closing out

In [57]:
df = df_query[df_query['contract_name'] == 'CL2001-01']
# Simple method: get the max without checking the steepness of the decline afterwards.
# Different local maximum will tell us different things about the market.
df[df.index == df['openint'].argmax()]

Unnamed: 0_level_0,open,high,low,close,volume,openint,contract_name,year,month,day,contract_symbol,contract_year,contract_month,datetime
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
81465,34.15,35.09,34.15,35.03,79730,151220,CL2001-01,2000,11,17,CL,2001,1,2000-11-17


Above we can see for January 2001 the drop-off started occuring about a month-and-a-half before the contract expired. How many days is the median for most contracts year by year?

In [108]:
open_int_maxes = df_query[df_query.groupby('contract_name')['openint']
                          .transform('max') == df_query['openint']]
df = open_int_maxes
df = pd.DataFrame(df[df['contract_year'] < 2017])
df['contract_date'] = [
    date(y, m, 1) for y, m in zip(df['contract_year'], df['contract_month'])
]
df['days_to_expire'] = (
    df['contract_date'] - df['datetime']).map(lambda x: x.days)
open_int_maxes = df

In [109]:
df_mean = open_int_maxes.groupby('contract_year')['days_to_expire'].mean()
df_median = open_int_maxes.groupby('contract_year')['days_to_expire'].median()
plot(
    [
        Scatter(x=df_mean.index, y=df_mean, name='Mean'),
        Scatter(x=df_median.index, y=df_median, name='Median')
    ],
    title='Days from expirey for Open Interest drop-off ')

Nothing note-worth here. Generally contracts stay behaved wthin a 2 day period around 40 days. The Mean suggests there are no significant outliers. What about month-by-month?

In [111]:
df_mean = open_int_maxes.groupby('contract_month')['days_to_expire'].mean()
df_median = open_int_maxes.groupby('contract_month')['days_to_expire'].median()
plot(
    [
        Scatter(x=df_mean.index, y=df_mean, name='Mean'),
        Scatter(x=df_median.index, y=df_median, name='Median')
    ],
    title='Days from expirey for Open Interest drop-off ')

This time we see somethign interesting: March and April dip much lower than all the other months. Is this true for more current years?

In [144]:
df = open_int_maxes[open_int_maxes['contract_year'] > 2000]
df_mean = df.groupby('contract_month')['days_to_expire'].mean()
df_median = df.groupby('contract_month')['days_to_expire'].median()
plot(
    [
        Scatter(x=df_mean.index, y=df_mean, name='Mean'),
        Scatter(x=df_median.index, y=df_median, name='Median')
    ],
    title='Days from expirey for Open Interest drop-off (2000-2017)')

Even more interesting. Remember: Lower numbers means Open Interest drops off LATER. 

In [143]:
data_mean = []
months = [
    '', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct',
    'Nov', 'Dec'
]
for m in range(1, 13):
    df = open_int_maxes[(open_int_maxes['contract_year'] > 2000)
                        & (open_int_maxes['contract_month'] == m)]
    df_mean = df.groupby('contract_year')['days_to_expire'].mean()
    data_mean.append(Scatter(x=df_mean.index, y=df_mean, name=months[m]))

plot(
    data_mean,
    title='Days since open interest drop off; monthly analysis',
    interactive=True)

This begs the question: is there significance between months ending closer to their expiration date?

In [254]:
def heatmap_days_to_expire(
        year_start=None,
        year_end=None,
        title_prefix='Correlation between open interest drop-off and month'):
    b = np.ones(open_int_maxes.shape[0], dtype=int)
    if year_start is not None:
        b &= (open_int_maxes['contract_year'] >= year_start)
    if year_end is not None:
        b &= open_int_maxes['contract_year'] < year_end

    df_int = open_int_maxes[b]

    # Looking for correlation between contract_month and days_to_expire
    df = pd.DataFrame()
    for m in range(1, 13):
        df[m] = df_int[df_int['contract_month'] == m]['days_to_expire'].values

    c = df_int['contract_year']
    y_start, y_end = c.min(), c.max()
    corr_mat = df.corr().values
    title = '{} ({}-{})'.format(title_prefix, y_start, y_end)
    return Heatmap(z=corr_mat), title


data, title = heatmap_days_to_expire(2000)
plot([data], title=title, interactive=True)

In [258]:
data, titles = [], []
for y in range(1995, 2015, 5):
    d, t = heatmap_days_to_expire(y, y+6, title_prefix='')
    d.zmin = 0
    d.zmax = 1
    data.append(d)
    titles.append(t)

plot_subplot(data, titles=titles, main_title='Multiple year correlation plots', rows=2, cols=2)

This is the format of your plot grid:
[ (1,1) x1,y1 ]  [ (1,2) x2,y2 ]
[ (2,1) x3,y3 ]  [ (2,2) x4,y4 ]



This is a bit hard to read, but we can see that there are some vague correlation trends making an "X" pattern after 2005. 

In [273]:
data, titles = [], []
for y in range(2003, 2015):
    d, t = heatmap_days_to_expire(y, y+4, title_prefix='')
    d.zmin = 0
    d.zmax = 1
    data.append(d)
    titles.append(t)

print(len(data))
plot_subplot(data, titles=titles, main_title='Multiple year correlation plots', rows=3, cols=4)

12
This is the format of your plot grid:
[ (1,1) x1,y1 ]    [ (1,2) x2,y2 ]    [ (1,3) x3,y3 ]    [ (1,4) x4,y4 ]  
[ (2,1) x5,y5 ]    [ (2,2) x6,y6 ]    [ (2,3) x7,y7 ]    [ (2,4) x8,y8 ]  
[ (3,1) x9,y9 ]    [ (3,2) x10,y10 ]  [ (3,3) x11,y11 ]  [ (3,4) x12,y12 ]

