In [1]:
import sys
sys.path.append('../utils/')

from query import query_dataframe_f, pprint_query, nbprint_and_query
use_latex = False

# Question 2

In [2]:
q2a_data = nbprint_and_query('Q2a.sql')
q2a_data.to_csv('q2a_data.csv')

In [2]:
import pandas as pd

from trading_days import TradingDay

# Code run before this to retrieve data:
#q2a_data = nbprint_and_query('Q2a.sql')
#q2a_data.to_csv('q2a_data.csv')

q2a_data = pd.read_csv('q2a_data.csv',
                       parse_dates=['Date', 'Expiration'],
                       index_col=0)
q2a_data['AdjStrike'] = q2a_data['Strike'] / 1000.

In [3]:
from datetime import datetime, timedelta
from itertools import chain
# Now, we've actually retrieved too much data.
# We first want to filter down to things that are actually +/- 10 trading days
# of an expiration. First, get all trading days, then build out all possible
# days, then filter where date is in that list of possible days.
def flatmap(f, items):
    return list(chain.from_iterable(map(f, items)))

def trading_window(date):
    return [TradingDay(n) + date
            for n in range(-10, 11)]

trading_days = flatmap(trading_window, q2a_data['Expiration'].unique())

q2a_valid_data = q2a_data[q2a_data['Date'].isin(trading_days)]
q2a_valid_data.head()

Unnamed: 0,Ticker,ClosePrice,Volume,Date,Strike,OpenInterest,Expiration,AdjStrike
26,A,34.09,2676600,2007-01-05,15000,1095454,2007-01-20,15.0
27,A,34.09,2676600,2007-01-05,17500,86720,2007-01-20,17.5
28,A,34.09,2676600,2007-01-05,20000,1033137,2007-01-20,20.0
29,A,34.09,2676600,2007-01-05,22500,75416,2007-01-20,22.5
30,A,34.09,2676600,2007-01-05,25000,3206691,2007-01-20,25.0


In [4]:
# Now comes the fun part. Currently we have all strikes,
# and we essentially want to filter this down to only ATM strikes.
# Once we've accomplished that, the rest of this analysis becomes trivial
from datetime import timedelta
def find_atm_strike(df):
    # Expiration is marked as a Saturday in Ivy, so we need to do
    # df['Date'] + 1 to compensate
    prices_at_expiration = df[df['Expiration'] == df['Date'] + timedelta(days=1)]
    prices_at_expiration['Diff'] = (prices_at_expiration['AdjStrike'] -
                                    prices_at_expiration['ClosePrice']).abs()
    # And get the strike with minimum difference from close
    # We first sort ordering smallest difference first, get the first row
    # (i.e. minimum) and then its actual value
    try:
        atm_strike = (prices_at_expiration.sort_values('Diff')
                      .head(1)['AdjStrike'].values[0])
    except IndexError:
        atm_strike = 0
        
    return atm_strike
    
def filter_atm(df):
    return df[df['AdjStrike'] == find_atm_strike(df)]

q2a_groups = q2a_valid_data.groupby(['Expiration', 'Ticker'])
q2a_atm = pd.concat([filter_atm(df) for groupname, df in q2a_groups])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [6]:
q2a_atm.to_csv('q2a_atm.csv')

In [None]:
# Now we need to build out the 'score' we will be binning on.
# The 'score' is defined as Open Interest / Avg. daily volume for
# the five days before expiration.
# The way we go about doing this is to first calculate for each row
# how many business days there are before expiration. We will re-use
# this data later in actually building the chart. Once this is done,
# we can define the score column in a vectorized form, which will make
# it much faster to calculate relative to the filtering we've done so far.
def bus_days_between(start, end):
    return len(pd.date_range(start=start, end=end, freq=TradingDay(1)))

