In [1]:
import sys
sys.path.append('../utils/')

from query import query_dataframe_f, pprint_query, nbprint_and_query
use_latex = False

# Question 2

In [None]:
q2_data = nbprint_and_query('Q2_atm.sql')
q2_data.to_csv('q2_data.csv')

In [None]:
import pandas as pd
from datetime import datetime, timedelta

from trading_days import TradingDay

# Code run before this to retrieve data:
#q2a_data = nbprint_and_query('Q2a.sql')
#q2a_data.to_csv('q2a_data.csv')

q2_data = pd.read_csv('q2_data.csv',
                      parse_dates=['Date', 'Expiration'],
                      index_col=0)
q2_data['AdjStrike'] = q2_data['Strike'] / 1000.
q2_data['ActualExpiration'] = q2_data['Expiration'] - timedelta(days=1)

In [None]:
from datetime import datetime, timedelta
from itertools import chain
# Now, we've actually retrieved too much data.
# We first want to filter down to things that are actually +/- 10 trading days
# of an expiration. First, get all trading days, then build out all possible
# days, then filter where date is in that list of possible days.
def flatmap(f, items):
    return list(chain.from_iterable(map(f, items)))

def trading_window(date):
    return [TradingDay(n) + date
            for n in range(-11, 11)]

trading_days = flatmap(trading_window, q2_data['ActualExpiration'].unique())

q2_valid_data = q2_data[q2_data['Date'].isin(trading_days)]
q2_valid_data.head()

In [None]:
# Now we need to build out the 'score' we will be binning on.
# The 'score' is defined as Open Interest / Avg. daily volume for
# the five days before expiration.
# The way we go about doing this is to first calculate for each row
# how many business days there are before expiration. We will re-use
# this data later in actually building the chart. Once this is done,
# we can define the score column in a vectorized form, which will make
# it much faster to calculate relative to the filtering we've done so far.
def bus_days_between(start, end):
    if start.date() == end.date():
        return 0
    elif start < end:
        return len(pd.date_range(start=start.date(),
                                 end=end.date(),
                                 closed='left',
                                 freq=TradingDay(1)))
    else:
        return -len(pd.date_range(start=end.date(),
                                  end=start.date(),
                                  closed='left',
                                  freq=TradingDay(1)))
    
q2_data['DeltaExpiration'] = q2_data.apply(
    lambda r: bus_days_between(r.Date, r.ActualExpiration),
    1)

In [None]:
import numpy as np

def score(group):
    group['Score'] = group['OpenInterest'] / group[
        (group['DeltaExpiration'] >= 1) & (group['DeltaExpiration'] <= 5)
    ]['ClosePrice'].std()
    return group

q2_data_scored = q2_data.groupby(['Ticker', 'Expiration']).apply(score)

def did_pin(group):
    group['Pinned'] = np.abs(group['ClosePrice'] - group['AdjStrike']) < .125
    return group

q2_pinned = q2_data_scored.groupby(['Ticker', 'Expiration']).apply(did_pin)
q2_pinned.to_csv('q2_pinned.csv')

In [None]:
bins = np.percentile(q2_pinned['Score'], [25, 50, 75])

In [None]:
q2_pinned.head(400)

In [None]:
ppn_1 = q2_pinned[q2_pinned['Score'] <= bins[0]]
ppn_2 = q2_pinned[
    (q2_pinned['Score'] > bins[0]) & (q2_pinned['Score'] < bins[1])
]
ppn_3 = q2_pinned[
    (q2_pinned['Score'] > bins[1]) & (q2_pinned['Score'] < bins[2])
]
ppn_4 = q2_pinned[q2_pinned['Score'] > bins[2]]

In [None]:
%matplotlib inline
ppn_1.groupby(['DeltaExpiration']).mean().head(40)#['Pinned'].plot(kind='bar', by='DeltaExpiration')

In [None]:
ppn_2.groupby(['DeltaExpiration']).mean()['Pinned'].plot(kind='bar', by='DeltaExpiration')

In [None]:
ppn_3.groupby(['DeltaExpiration']).mean()['Pinned'].plot(kind='bar', by='DeltaExpiration')

In [None]:
ppn_4.groupby(['DeltaExpiration']).mean()['Pinned'].plot(kind='bar', by='DeltaExpiration')

# Problem 2b.

We now repeat the same analysis for the entire marketplace (all optionable securities).

In [None]:
def score(group):
    group['Score2'] = group['OpenInterest'] / group[
        (group['DeltaExpiration'] >= 1) & (group['DeltaExpiration'] <= 5)
    ]['ClosePrice'].std() / group['ImpliedVolatility']
    return group

q2_atm_scored = q2_atm.groupby(['Ticker', 'Expiration']).apply(score)

In [None]:
import numpy as np
bins = np.percentile(q2_atm_scored['Score2'], [25, 50, 75])

In [None]:
ppn_1 = q2_atm_pinned[q2_atm_scored['Score2'] <= bins[0]]
ppn_2 = q2_atm_pinned[
    (q2_atm_pinned['Score2'] > bins[0]) & (q2_atm_pinned['Score2'] < bins[1])
]
ppn_3 = q2_atm_pinned[
    (q2_atm_pinned['Score2'] > bins[1]) & (q2_atm_pinned['Score2'] < bins[2])
]
ppn_4 = q2_atm_pinned[q2_atm_scored['Score2'] > bins[2]]

In [None]:
ppn_1.groupby(['DeltaExpiration']).mean()['Pinned'].plot(kind='bar', by='DeltaExpiration')

In [None]:
ppn_2.groupby(['DeltaExpiration']).mean()['Pinned'].plot(kind='bar', by='DeltaExpiration')

In [None]:
ppn_3.groupby(['DeltaExpiration']).mean()['Pinned'].plot(kind='bar', by='DeltaExpiration')

In [None]:
ppn_4.groupby(['DeltaExpiration']).mean()['Pinned'].plot(kind='bar', by='DeltaExpiration')