In [1]:
from IBPY import IB_PY 
import numpy as np
import pandas as pd
import datetime
import time
# A function that gets api information, ticker, date, starting strike and intervals.
# The function calculated the stats in the Augen Book
def expiration_stats(ticker_, date_, strikes_, ib_whatToShow_):
    """
    This function calculates the stats in the Augen book. 
    We need these stats to be able to pin down stocks that are good candidate for expiration trading.
    The function spits out a tuple of 1 * 3. 
    
    api_ : an object of api_ type to connect to IB. This should be my own api class
    ticker_ : ticker of the contract. A string object.
    date_ : the date the we are interested in. A datetime object.
    strikes_: an arraye of strikes. Sorted numpy array. 1D.
    ib_whatToShow_: what to query from ib. could be 'LAST' or 'MIDPOINT' or 'IMPLIED_VOL'. Has to be a string
    """
    api_ = IB_PY()
    cont = api_.make_contract(symbol = ticker_, secType = 'STK', currency = 'USD', exchange = 'SMART')
    api_.get_historical_data(cont, barSizeSetting_= '1 min', 
                        durationStr_= '1 D', 
                        whatToShow_= ib_whatToShow_, 
                        endDateTime_= date_, 
                        sameContract_ = True)
    df = api_.hist_data.copy(deep = True)
    start_day_ = date_ - datetime.timedelta(hours = 6.5, minutes = 1)
    df = df[pd.to_datetime(df['date']) >= start_day_]
    
    if df.shape[0] == 391:
        ind = []
        minutes_away_from_strike_helper = [] 
        for i in range(df.shape[0]):
            diffed = strikes_ - df.close.iloc[i]
            diffed_high = strikes_ - df.high.iloc[i] # only if high of the minute
            diffed_low = strikes_ - df.low.iloc[i] # and the low of the minute is more than a $1 away
            ind_candidate = np.abs(diffed).argmin()
            if diffed[ind_candidate] <= 0:
                ind_candidate += 1
            ind.append(ind_candidate)
            val = int((np.all(np.abs(diffed_low) > 1)) & (np.all(np.abs(diffed_high) > 1))) # more than a $1 away
            minutes_away_from_strike_helper.append(val)
        
        
        ind = np.array(ind)
        helper_ind = ind[1:] - ind[:-1]
        total_times_crossed_strike = int(sum(helper_ind != 0))
        
        # minutes away from a strike
        minutes_away_from_strike = np.sum(np.array(minutes_away_from_strike_helper))
        minutes_away_first_60 = np.sum(np.array(minutes_away_from_strike_helper)[:60])
        minutes_away_second_60 = np.sum(np.array(minutes_away_from_strike_helper)[60:120])
        minutes_away_third_60 = np.sum(np.array(minutes_away_from_strike_helper)[120:180])
        minutes_away_fourth_60 = np.sum(np.array(minutes_away_from_strike_helper)[180:240])
        minutes_away_fifth_60 = np.sum(np.array(minutes_away_from_strike_helper)[240:300])
        minutes_away_sixth_60 = np.sum(np.array(minutes_away_from_strike_helper)[300:360])
        minutes_away_final_30 = np.sum(np.array(minutes_away_from_strike_helper)[360:])
        
        # minutes away 10.30 to 13.30
        minutes_away_1030_130 = np.sum(np.array(minutes_away_from_strike_helper)[60:240])
        
        # minutes away 11.30 to 13.30
        minutes_away_1130_130 = np.sum(np.array(minutes_away_from_strike_helper)[120:240])
        
        # minutes away 10.30 to 16.00
        minutes_away_1030_1600 = np.sum(np.array(minutes_away_from_strike_helper)[60:])
        
        # minutes away 11.30 to 16.00
        minutes_away_1130_1600 = np.sum(np.array(minutes_away_from_strike_helper)[120:])
        
        # =================================
        # This section is about in the money minutes
        # Of course with time memory, meaning with regard to strikes that have already passes 
        # 
        
        # 1030 to 1600
        stock_high = np.max(df.high[:60]) 
        stock_low = np.min(df.low[:60])
        strikes_crossed = strikes_[np.where((strikes_ >= stock_low) & (strikes_ <= stock_high))]
        strikes_crossed = np.tile(strikes_crossed, (df.shape[0]-60, 1))
        vals_high = df.high[60:].values
        vals_low = df.low[60:].values
        vals_high.shape = (391 - 60, 1)
        vals_low.shape = (391 - 60, 1)
        distance_high_post_1030 = np.abs(strikes_crossed - vals_high)
        distance_low_post_1030 = np.abs(strikes_crossed - vals_low)
        minutes_away_high_strikes_post_1030 = np.prod(distance_high_post_1030 > 1, axis = 1)
        minutes_away_low_strikes_post_1030 = np.prod(distance_low_post_1030 > 1, axis = 1)
        minutes_away_strikes_post_1030 = \
        np.sum(minutes_away_high_strikes_post_1030 * minutes_away_low_strikes_post_1030)
        

        # 1130 to 1600
        stock_high = np.max(df.high[:120]) 
        stock_low = np.min(df.low[:120])
        strikes_crossed = strikes_[np.where((strikes_ >= stock_low) & (strikes_ <= stock_high))]
        strikes_crossed = np.tile(strikes_crossed, (df.shape[0]-120, 1))
        vals_high = df.high[120:].values
        vals_low = df.low[120:].values
        vals_high.shape = (391 - 120, 1)
        vals_low.shape = (391 - 120, 1)
        distance_high_post_1130 = np.abs(strikes_crossed - vals_high)
        distance_low_post_1130 = np.abs(strikes_crossed - vals_low)
        minutes_away_high_strikes_post_1130 = np.prod(distance_high_post_1130 > 1, axis = 1)
        minutes_away_low_strikes_post_1130 = np.prod(distance_low_post_1130 > 1, axis = 1)
        minutes_away_strikes_post_1130 = \
        np.sum(minutes_away_high_strikes_post_1130 * minutes_away_low_strikes_post_1130)

        # 1230 to 1600
        stock_high = np.max(df.high[:180]) 
        stock_low = np.min(df.low[:180])
        strikes_crossed = strikes_[np.where((strikes_ >= stock_low) & (strikes_ <= stock_high))]
        strikes_crossed = np.tile(strikes_crossed, (df.shape[0]-180, 1))
        vals_high = df.high[180:].values
        vals_low = df.low[180:].values
        vals_high.shape = (391 - 180, 1)
        vals_low.shape = (391 - 180, 1)
        distance_high_post_1230 = np.abs(strikes_crossed - vals_high)
        distance_low_post_1230 = np.abs(strikes_crossed - vals_low)
        minutes_away_high_strikes_post_1230 = np.prod(distance_high_post_1230 > 1, axis = 1)
        minutes_away_low_strikes_post_1230 = np.prod(distance_low_post_1230 > 1, axis = 1)
        minutes_away_strikes_post_1230 = \
        np.sum(minutes_away_high_strikes_post_1230 * minutes_away_low_strikes_post_1230)
        
        # 130 to 1600
        stock_high = np.max(df.high[:240]) 
        stock_low = np.min(df.low[:240])
        strikes_crossed = strikes_[np.where((strikes_ >= stock_low) & (strikes_ <= stock_high))]
        strikes_crossed = np.tile(strikes_crossed, (df.shape[0]-240, 1))
        vals_high = df.high[240:].values
        vals_low = df.low[240:].values
        vals_high.shape = (391 - 240, 1)
        vals_low.shape = (391 - 240, 1)
        distance_high_post_130 = np.abs(strikes_crossed - vals_high)
        distance_low_post_130 = np.abs(strikes_crossed - vals_low)
        minutes_away_high_strikes_post_130 = np.prod(distance_high_post_130 > 1, axis = 1)
        minutes_away_low_strikes_post_130 = np.prod(distance_low_post_130 > 1, axis = 1)
        minutes_away_strikes_post_130 = \
        np.sum(minutes_away_high_strikes_post_130 * minutes_away_low_strikes_post_130)
        
        # 230 to 1600
        stock_high = np.max(df.high[:300]) 
        stock_low = np.min(df.low[:300])
        strikes_crossed = strikes_[np.where((strikes_ >= stock_low) & (strikes_ <= stock_high))]
        strikes_crossed = np.tile(strikes_crossed, (df.shape[0]-300, 1))
        vals_high = df.high[300:].values
        vals_low = df.low[300:].values
        vals_high.shape = (391 - 300, 1)
        vals_low.shape = (391 - 300, 1)
        distance_high_post_230 = np.abs(strikes_crossed - vals_high)
        distance_low_post_230 = np.abs(strikes_crossed - vals_low)
        minutes_away_high_strikes_post_230 = np.prod(distance_high_post_230 > 1, axis = 1)
        minutes_away_low_strikes_post_230 = np.prod(distance_low_post_230 > 1, axis = 1)
        minutes_away_strikes_post_230 = \
        np.sum(minutes_away_high_strikes_post_230 * minutes_away_low_strikes_post_230)
        
        # ==============================
        # In this section, we look at the same statistics for the ATM strike
        # Not, all the strikes passed before.
        
        # =====> hourly
        closest_strike_1030 = strikes_[np.argmin(np.abs(strikes_ - df.close.iloc[60]))]
        closest_strike_1130 = strikes_[np.argmin(np.abs(strikes_ - df.close.iloc[120]))]
        closest_strike_1230 = strikes_[np.argmin(np.abs(strikes_ - df.close.iloc[180]))]
        closest_strike_130 = strikes_[np.argmin(np.abs(strikes_ - df.close.iloc[240]))]
        closest_strike_230 = strikes_[np.argmin(np.abs(strikes_ - df.close.iloc[300]))]

        minutes_away_from_atm_1030 = \
        np.sum((np.abs(df.high[60:120] - closest_strike_1030) > 1) & (np.abs(df.low[60:120] - closest_strike_1030) > 1))
        minutes_away_from_atm_1130 = \
        np.sum((np.abs(df.high[120:180] - closest_strike_1130) > 1) & (np.abs(df.low[120:180] - closest_strike_1130) > 1))
        minutes_away_from_atm_1230 = \
        np.sum((np.abs(df.high[180:240] - closest_strike_1230) > 1) & (np.abs(df.low[180:240] - closest_strike_1230) > 1))
        minutes_away_from_atm_130 = \
        np.sum((np.abs(df.high[240:300] - closest_strike_130) > 1) & (np.abs(df.low[240:300] - closest_strike_130) > 1))
        minutes_away_from_atm_230 = \
        np.sum((np.abs(df.high[300:360] - closest_strike_230) > 1) & (np.abs(df.low[300:360] - closest_strike_230) > 1))

        # =======> multiple hrs
        minutes_away_from_atm_1030_130 = \
        np.sum((np.abs(df.high[60:240] - closest_strike_1030) > 1) & (np.abs(df.low[60:240] - closest_strike_1030) > 1))
        minutes_away_from_atm_1030_1600 = \
        np.sum((np.abs(df.high[60:390] - closest_strike_1030) > 1) & (np.abs(df.low[60:390] - closest_strike_1030) > 1))
        minutes_away_from_atm_130_1600 = \
        np.sum((np.abs(df.high[240:390] - closest_strike_130) > 1) & (np.abs(df.low[240:390] - closest_strike_130) > 1))
        
        
        # strike crosses over time
        if total_times_crossed_strike != 0:
            first_60 = float(sum(helper_ind[:60] != 0))
            second_60 = float(sum(helper_ind[60:120] != 0))
            third_60 = float(sum(helper_ind[120:180] != 0))
            fourth_60 = float(sum(helper_ind[180:240] != 0))
            fifth_60 = float(sum(helper_ind[240:300] != 0))
            sixth_60 = float(sum(helper_ind[300:360] != 0))
            final_30 = float(sum(helper_ind[360:] != 0))
        else:
            first_60 = 0.0
            second_60 = 0.0
            third_60 = 0.0
            fourth_60 = 0.0
            fifth_60 = 0.0
            sixth_60 = 0.0
            final_30 = 0.0
            
        
        diffed = strikes_ - df.close[df.shape[0]-1]
        argmin = np.abs(diffed).argmin()
        how_close_to_strike_at_expiration = diffed[argmin] 
        
        last_interval = strikes_[ind_candidate] - strikes_[ind_candidate - 1]

        length_of_dataframe = df.shape[0] 
        
        high_low_over_close_percent = (df.high.max() - df.low.min())/df.close[df.shape[0]-1] * 100
        
        # unique strike crosses
        strike_crosses = len(np.unique(ind))-1 
        strike_crosses_first_60 = len(np.unique(ind[:60]))-1 
        strike_crosses_second_60 = len(np.unique(ind[60:120]))-1 
        strike_crosses_third_60 = len(np.unique(ind[120:180]))-1 
        strike_crosses_fourth_60 = len(np.unique(ind[180:240]))-1 
        strike_crosses_fifth_60 = len(np.unique(ind[240:300]))-1 
        strike_crosses_sixth_60 = len(np.unique(ind[300:360]))-1 
        strike_crosses_final_30 = len(np.unique(ind[360:]))-1 
        
        # unique strike crosses between close and 10.30/11.30/12.30/1.30/2.30
        strike_crosses_1030_close = len(np.unique(ind[60:]))-1
        strike_crosses_1130_close = len(np.unique(ind[120:]))-1
        strike_crosses_1230_close = len(np.unique(ind[180:]))-1
        strike_crosses_130_close = len(np.unique(ind[240:]))-1
        strike_crosses_230_close = len(np.unique(ind[300:]))-1
        
        # unique strike crosses from 1030/1130 to 1.30
        strike_crosses_1030_130 = len(np.unique(ind[60:240]))-1
        strike_crosses_1130_130 = len(np.unique(ind[120:240]))-1
        
        # mid-day prices
        open_price = df.open.iloc[0] 
        ten_thirty_price = df.close.iloc[60]
        eleven_thirty_price = df.close.iloc[120]
        twelve_thirty_price = df.close.iloc[180]
        one_thirty_price = df.close.iloc[240]
        two_thirty_price = df.close.iloc[300]
        close_price = df.close.iloc[df.shape[0]-1]
        
        # return from close to 10.30
        ret_close_ten_thirty_percent = float(df.close.iloc[390] - df.close.iloc[60])/df.close.iloc[60] * 100
        
        # return from close to 1.30
        ret_close_one_thirty_percent = float(df.close.iloc[390] - df.close.iloc[240])/df.close.iloc[240] * 100
        
        # Variance of return by hours
        annual_var_first_60_percent = np.var(np.log(np.array(df.close.iloc[1:60])/np.array(df.close.iloc[:59]))) * 25200 * 6.5 
        annual_var_second_60_percent = np.var(np.log(np.array(df.close.iloc[61:120])/np.array(df.close.iloc[60:119]))) * 25200 * 6.5 
        annual_var_third_60_percent = np.var(np.log(np.array(df.close.iloc[121:180])/np.array(df.close.iloc[120:179]))) * 25200 * 6.5 
        annual_var_fourth_60_percent = np.var(np.log(np.array(df.close.iloc[181:240])/np.array(df.close.iloc[180:239]))) * 25200 * 6.5 
        annual_var_fifth_60_percent = np.var(np.log(np.array(df.close.iloc[241:300])/np.array(df.close.iloc[240:299]))) * 25200 * 6.5 
        annual_var_sixth_60_percent = np.var(np.log(np.array(df.close.iloc[301:360])/np.array(df.close.iloc[300:359]))) * 25200 * 6.5 
        annual_var_final_30_percent = np.var(np.log(np.array(df.close.iloc[361:390])/np.array(df.close.iloc[360:389]))) * 25200 * 6.5 * 2 
        
        del api_
        return tuple([date_,
                      total_times_crossed_strike, 
                      first_60,
                      second_60,
                      third_60,
                      fourth_60,
                      fifth_60,
                      sixth_60,
                      final_30,
                      strike_crosses,
                      strike_crosses_first_60,
                      strike_crosses_second_60,
                      strike_crosses_third_60,
                      strike_crosses_fourth_60,
                      strike_crosses_fifth_60,
                      strike_crosses_sixth_60,
                      strike_crosses_final_30,
                      strike_crosses_1030_close,
                      strike_crosses_1130_close,
                      strike_crosses_1230_close,
                      strike_crosses_130_close,
                      strike_crosses_230_close,
                      strike_crosses_1030_130,
                      strike_crosses_1130_130,
                      minutes_away_from_strike,
                      minutes_away_first_60,
                      minutes_away_second_60,
                      minutes_away_third_60,
                      minutes_away_fourth_60,
                      minutes_away_fifth_60,
                      minutes_away_sixth_60,
                      minutes_away_final_30,
                      minutes_away_1030_130,
                      minutes_away_1130_130,
                      minutes_away_1030_1600,
                      minutes_away_1130_1600,
                      minutes_away_strikes_post_1030,
                      minutes_away_strikes_post_1130,
                      minutes_away_strikes_post_1230,
                      minutes_away_strikes_post_130,
                      minutes_away_strikes_post_230,
                      minutes_away_from_atm_1030,
                      minutes_away_from_atm_1130,
                      minutes_away_from_atm_1230,
                      minutes_away_from_atm_130,
                      minutes_away_from_atm_230,
                      minutes_away_from_atm_1030_130,
                      minutes_away_from_atm_1030_1600,
                      minutes_away_from_atm_130_1600,
                      high_low_over_close_percent,
                      how_close_to_strike_at_expiration,
                      last_interval,
                      open_price,
                      ten_thirty_price,
                      eleven_thirty_price,
                      twelve_thirty_price,
                      one_thirty_price,
                      two_thirty_price,
                      close_price,
                      ret_close_ten_thirty_percent,
                      ret_close_one_thirty_percent,
                      annual_var_first_60_percent,
                      annual_var_second_60_percent,
                      annual_var_third_60_percent,
                      annual_var_fourth_60_percent,
                      annual_var_fifth_60_percent,
                      annual_var_sixth_60_percent,
                      annual_var_final_30_percent,
                      length_of_dataframe])
    else:
        print 'DataFrame had an unusual size'
        return tuple(np.repeat(np.NaN, 69))
    
    
# A function to get the implied vol every minute
def get_ivol_minutebar_from_ib(ticker_, date_, how_many_weeks_):
    """
    The function returns a dictionary of specified dates as keys and a dataframe of minutes and clsing implied vol as 
    values. There is a few things to notice here. 
    1) IB calculated the implied vol. This is tricky. 
    2) We can go far back as many weeks as we want.
    
    -----
    ticker_: A ticker for the stock we are considering. Has to be a string.
    date_: the last friday which we want to consider. A datetime object.
    how_many_weeks_: how many weeks we want to go back. An integer
    
    """
    api_ = IB_PY()
    cont = api_.make_contract(symbol = ticker_, secType = 'STK', currency = 'USD', exchange = 'SMART')
    out = dict()
    for i in range(how_many_weeks_):
        api_.get_historical_data(cont, barSizeSetting_= '1 min', 
                            durationStr_= '1 D', 
                            whatToShow_= 'OPTION_IMPLIED_VOLATILITY', 
                            endDateTime_= date_, 
                            sameContract_ = True)
        df = api_.hist_data.copy(deep = True)
        start_day_ = date_ - datetime.timedelta(hours = 6.5, minutes = 1)
        df = df[df['date'] >= start_day_]
        out_df = pd.DataFrame(columns = ['close'])
        out_df['close'] = df.loc[:,'close'].copy(deep = True)
        out_df.index = df.date.apply(lambda x: str(x)[11:16])
        out_df.index.names = ['time']
        out[date_] = out_df
        date_ = date_ - datetime.timedelta(days = 7)
        
    return out
# A function to get the price every minute
def get_prices_minutebar_from_ib(ticker_, date_, how_many_weeks_, whatToShow_):
    """
    The function returns a dictionary of specified dates as keys and a dataframe of minutes and specified prices as 
    values. There is a few things to notice here.     
    -----
    ticker_: A ticker for the stock we are considering. Has to be a string.
    date_: the last friday which we want to consider. A datetime object.
    how_many_weeks_: how many weeks we want to go back. An integer
    whatToShow_: should be 'MIDPOINT'/'BID'/'ASK'/'TRADES'/'BID_ASK'. A string. 
    
    """
    api_ = IB_PY()
    cont = api_.make_contract(symbol = ticker_, secType = 'STK', currency = 'USD', exchange = 'SMART')
    out = dict()
    for i in range(how_many_weeks_):
        api_.get_historical_data(cont, barSizeSetting_= '1 min', 
                            durationStr_= '1 D', 
                            whatToShow_= whatToShow_, 
                            endDateTime_= date_, 
                            sameContract_ = True)
        df = api_.hist_data.copy(deep = True)
        start_day_ = date_ - datetime.timedelta(hours = 6.5, minutes = 1)
        df = df[df['date'] >= start_day_]
        out_df = pd.DataFrame(columns = ['close'])
        out_df['close'] = df.loc[:,'close'].copy(deep = True)
        out_df.index = df.date.apply(lambda x: str(x)[11:16])
        out_df.index.names = ['time']
        out[date_] = out_df
        date_ = date_ - datetime.timedelta(days = 7)
        
    return out 

#=============================
def iter_expiration_stats(ticker, end_date, strikes, weeks, ib_whatToShow):
    """
    This function facilitates the data aggregation from ib. Loops on all the dates
    
    ticker: ticker of the stock. string. 
    end_date: a datetime object. last day we are interested in.
    strikes: strikes related to the stock. A numpy array.
    weeks: an integer. how many weeks we are going back.
    ib_whatToShow: a string. what do we what from ib. 
    
    """
    stats = np.empty((0, 69))
    for i in range(weeks):
        res = expiration_stats(ticker_ = ticker, 
                               date_ = end_date, 
                               strikes_ = strikes,
                               ib_whatToShow_ = ib_whatToShow)
        stats = np.vstack((stats, np.array(res)))
        end_date = end_date - datetime.timedelta(days = 7)
        time.sleep(1)
    # converting it to a dataframe
    df_stats = pd.DataFrame(data = stats[:,1:], 
                            columns = ['total_strike_crosses', 'first_60', 'second_60', 'third_60', 'fourth_60', \
                                    'fifth_60', 'sixth_60', 'final_30', \
                                    'unique_strike_crosses', 'unique_first_60', 'unique_second_60', 'unique_third_60', \
                                    'unique_fourth_60', 'unique_fifth_60', 'unique_sixth_60', 'unique_final_30', \
                                    'unique_strike_1030_close', 'unique_strike_1130_close', \
                                    'unique_strike_1230_close', 'unique_strike_130_close', 'unique_strike_230_close',\
                                    'unique_strike_1030_130', 'unique_strike_1130_130',\
                                    'minutes_one_dollar_away_from_strike', \
                                    'minutes_away_first_60', 'minutes_away_second_60', 'minutes_away_third_60', \
                                    'minutes_away_fourth_60', 'minutes_away_fifth_60', 'minutes_away_sixth_60', \
                                    'minutes_away_final_30', \
                                    'minutes_away_1030_130', 'minutes_away_1130_130', 'minutes_away_1030_1600', \
                                    'minutes_away_1130_1600', \
                                    'minutes_away_post_1030', 'minutes_away_post_1130', 'minutes_away_post_1230', \
                                    'minutes_away_post_130', 'minutes_away_post_230', \
                                    'minutes_away_atm_1030', 'minutes_away_atm_1130', 'minutes_away_atm_1230', \
                                    'minutes_away_atm_130', 'minutes_away_atm_230', 'minutes_away_atm_1030_130', \
                                    'minutes_away_atm_1030_1600', 'minutes_away_atm_130_1600', \
                                    'high_low_over_close_percent', 'distance_strike_at_expiration', 'last_gap_between_strikes',
                                    'open_price', 'ten_thirty_price', 'eleven_thirty_price', \
                                    'twelve_thirty_price', 'one_thirty_price', 'two_thirty_price', 'close_price', \
                                    'ret_1030_close_percent', 'ret_130_close_percent', \
                                    'annual_var_first_60', 'annual_var_second_60', 'annual_var_third_60', \
                                    'annual_var_fourth_60', 'annual_var_fifth_60', 'annual_var_sixth_60', \
                                    'annual_var_final_30',
                                    'total_number_minutes'],
                           index = stats[:,0],
                            dtype = np.float)
    df_stats.sort_index(axis=0, inplace=True)
    df_stats.index = pd.to_datetime(df_stats.index)
    df_stats = df_stats.loc[pd.notnull(df_stats.index),:]
    return df_stats