## This notebook is being used to create different indicator columns to add to the daily_price dataframes

In [1]:
import requests
import pandas as pd
import numpy as np
import json
import datetime
import quandl
import matplotlib.pyplot as plt
import sqlite3

% matplotlib inline

In [2]:
def create_df_crypto(symbol, curr='USD', limit=2000):
    ''' This function takes in a symbol of a cryptocurrency to be
        used with the Cryptocompare API, and returns a formatted dataframe
        for later processing.

        Args: symbol - cryptocurrency symbol
              curr - currency to report in (default USD)
              limit - max number of data points (default 2500)

        Return: df - dataframe of daily price info for symbol
    '''
    # Set url and params for the call to Cryptocompare API
    url = 'https://min-api.cryptocompare.com/data/histoday'
    params = {'fsym': symbol, 'tsym': curr, 'limit': limit}

    # Call API for symbol and put data into pandas dataframe
    response = requests.get(url, params=params)
    data = response.json()['Data']
    df = pd.DataFrame(data)

    # Add date column and set to index
    df['Date'] =[datetime.date.fromtimestamp(d) for d in df.time]
    df = df[['open', 'high', 'low', 'close', 'volumeto', 'Date']]
    df.set_index('Date', inplace=True)

    # Rename volumeto column
    df.rename(columns={'volumeto': 'volume'}, inplace=True)

    return df

def create_df_quandl(symbol, api_key):
    ''' This function takes in a symbol of a futures contract to be used
        with the Quandl API as well as the API key, and returns a formatted
        dataframe for processing.

        Args: symbol - a symbol for a continuous futures contract
              api_key - Quandl API key

        Return: df - dataframe for daily price info for symbol
    '''
    # Quandl API call which puts price data into dataframe
    df = quandl.get('SCF/CME_{s}1_FW'.format(s=symbol), authtoken=api_key)

    # Drop open interest column and rename Settle column
    df.drop(['Prev. Day Open Interest'], axis=1, inplace=True)
    df.rename(columns={'Open': 'open',
                      'High': 'high',
                      'Low': 'low',
                      'Settle': 'close',
                      'Volume': 'volume'}, inplace=True)
    return df

def clean_df_crypto(df, volume_thresh=1000000):
    ''' This function takes in a dataframe and a volume threshold and returns a filtered
        dataframe from the first data point that achieves the threshold.  This is written
        to be used specifically with the cryptocurrency dataframes.

        Args: df - dataframe to be filtered on volume
              volume_thresh - min volume to reach before using data in the dataframe

        Return df - the filtered dataframe with only points after the volume threshold is hit
    '''
    # Find start_date and filter df
    start_date = df.index[df.volume > volume_thresh].tolist()[0]
    df = df[start_date:]

    return df

def replace_df_zeros(df):
    ''' This function takes in a dataframe of price information, finds all zero values
        for the 'volume' column and replaces them with the mean volume of the dataset.

        Args: df - dataframe of price information

        Return: df - cleaned dataframe with no zero volume entries
    '''
    df['volume'].replace(0.0, df['volume'].mean(), inplace=True)

    return df

def check_outliers(df):
    ''' This function finds all closing price points that are more than 3 stds away from
        the mean and plots them on a line graph of all the data.  This can be used to see
        if these points are truly outliers.

        Args: df - dataframe to be checked for outliers

        Return: None - shows a graph of the price data series with annotations for outliers
    '''
    # Use a copy of the dataframe
    cpy = df.copy()
    # Create range of values that are more than 3 stds away from mean
    cpy['stds_from_mean'] = ((cpy['close'] - cpy['close'].mean()).apply(abs)) / cpy['close'].std()
    locs_gt_3std = [cpy.index.get_loc(x) for x in cpy.index[cpy['stds_from_mean'] > 3.0]]

    # Plot the price data, highlighting the outliers
    plt.figure(figsize=(15,10))
    plt.plot(cpy.index, cpy.close, linestyle='solid', markevery=locs_gt_3std,
                marker='o', markerfacecolor='r', label='Outliers')

    # Apply title, legend and labels
    plt.title('Closing Prices')
    plt.xlabel('Date')
    plt.ylabel('Price')
    plt.legend()
    plt.grid()

    plt.show()

    # Print out description
    print('Number of data points: {}'.format(len(cpy.index)))
    print('Number of outliers: {}'.format(len(locs_gt_3std)))

def generate_df_dict(product_dict, api_key=None):
    ''' This function takes in a dict of product symbols mapped to
        information about the product  and a Quandl API key and returns
        a dict object with the symbols as keys and a dataframe of price
        info as values.

        Args: product_dict - a dict of symbols for products with maps to
                             a list of info
              api_key - Quandl API key

        Return: df_dict - a dictionary of symbols mapped to dataframes
                          of price info
    '''
    df_dict = {}

    # Iterate through list
    for product, info in product_dict.items():
        # Determine what dataframe creator to use
        if info[0] == 1:
            df = create_df_crypto(product)
            df = clean_df_crypto(df)
            df = replace_df_zeros(df)
        if info[0] == 2:
            df = create_df_quandl(product, api_key)
            df = replace_df_zeros(df)

        df_dict[product] = df

    return df_dict

def insert_symbols_table(product_dict, sqlite_file, table_name='Symbols'):
    ''' This function takes in a dict of product symbols mapped to
        information about the product.  It also takes in a sqlite file and then
        uses the info to insert all symbols in the dict into the Symbols
        table of the database.

        Args: product_dict - a dict of symbols for products with maps to
                             a list of info
              sqlite_file - file for the database to write to
              table_name - default to 'Symbols' for this function

        Return: None - nothing explicit but inserts info into the database
    '''
    # Create the column name list for database insertion
    cols = ['data_id', 'symbol', 'name', 'sector', 'exchange']

    # Open a connection to the database
    conn = sqlite3.connect(sqlite_file)
    c = conn.cursor()

    # Iterate through all symbols of product_dict
    for symbol, s_info in product_dict.items():
        # Set params and insert row into database
        params = (s_info[0], symbol, s_info[1], s_info[2], s_info[3])
        c.execute("INSERT INTO {tn} ({c0}, {c1}, {c2}, {c3}, {c4}) VALUES (?, ?, ?, ?, ?)"\
            .format(tn=table_name, c0=cols[0], c1=cols[1], c2=cols[2],\
            c3=cols[3], c4=cols[4]), params)

    # Close connection to database
    conn.commit()
    conn.close()

def insert_daily_prices_table(product_dict, df_dict, sqlite_file, table_name='Daily_Prices'):
    ''' This function takes in a 2 dicts, one with product keys mapping
        to info about the product and the other with product keys mapping
        to a dataframe a daily price information.  It also takes in a sqlite
        file and then uses the info to insert all rows into the Daily_Prices
        table of the database.

        Args: product_dict - a dict of symbols for products with maps to
                             a list of info
              df_dict - dict of dataframes with futures symbols and price data
              sqlite_file - file for the database to write to
              table_name - default to 'Daily_Prices' for this function

        Return: None - nothing explicit but inserts info into the database
    '''
    # Create the column name list for database insertion
    cols = ['data_id', 'symbol', 'date', 'open', 'high', 'low', 'close', 'volume']

    # Open a connection to the database
    conn = sqlite3.connect(sqlite_file)
    c = conn.cursor()

    # Iterate through all symbols and then the dataframe to get all price data
    for symbol, df in df_dict.items():
        data_id = product_dict[symbol][0]
        for i, row in df.iterrows():
            date = i.strftime('%Y-%m-%d')
            # Set params and insert row into database
            params = (data_id, symbol, date, row.open, row.high, row.low, row.close, row.volume)
            c.execute("INSERT INTO {tn} ({c0}, {c1}, {c2}, {c3}, {c4}, {c5}, {c6}, {c7}) VALUES (?, ?, ?, ?, ?, ?, ?, ?)"\
                .format(tn=table_name, c0=cols[0], c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4],\
                c5=cols[5], c6=cols[6], c7=cols[7]), params)

    # Close connection to database
    conn.commit()
    conn.close()

In [3]:
# Setting up the products, database and api key
# Sqlite3 db info and Quandl key
sqlite_file = 'securities_master_db.sqlite'
api_key = 'Hv95pPh1xQWzt5DFhxS7'

# Dict of all products with maps to data_id, name, sector, and exchange
products = {'CL': [2, 'Crude', 'Energy', 'CME'],
                'HO': [2, 'HeatOil', 'Energy', 'CME'],
                'NG': [2, 'NatGas', 'Energy', 'CME'],
                'GC': [2, 'Gold', 'Metals', 'CME'],
                'SI': [2, 'Silver', 'Metals', 'CME'],
                'AD': [2, 'Aussie', 'Forex', 'CME'],
                'CD': [2, 'Canadien', 'Forex', 'CME'],
                'EC': [2, 'Euro', 'Forex', 'CME'],
                'BP': [2, 'Pound', 'Forex', 'CME'],
                'JY': [2, 'Yen', 'Forex', 'CME'],
                'US': [2, '30-yr', 'Treasuries', 'CME'],
                'C': [2, 'Corn', 'Grains', 'CME'],
                'W': [2, 'Wheat', 'Grains', 'CME'],
                'S': [2, 'Soybeans', 'Grains', 'CME'],
                'ES': [2, 'E-mini', 'Indexes', 'CME'],
                'BTC': [1, 'Bitcoin', 'Cryptocurrency', 'CCAgg'],
                'ETH': [1, 'Ethereum', 'Cryptocurrency', 'CCAgg'],
                'XRP': [1, 'Ripple', 'Cryptocurrency', 'CCAgg'],
                'BCH': [1, 'BitcoinCash', 'Cryptocurrency', 'CCAgg'],
                'LTC': [1, 'Litecoin', 'Cryptocurrency', 'CCAgg'],
                'ADA': [1, 'Cardano', 'Cryptocurrency', 'CCAgg'],
                'NEO': [1, 'Neo', 'Cryptocurrency', 'CCAgg'],
                'XLM': [1, 'Stellar', 'Cryptocurrency', 'CCAgg'],
                'EOS': [1, 'EOS', 'Cryptocurrency', 'CCAgg'],
                'XMR': [1, 'Monero', 'Cryptocurrency', 'CCAgg'],}

In [4]:
df_dict = generate_df_dict(products, api_key)

In [None]:
cl_df = df_dict['CL']
check_outliers(cl_df)

In [None]:
cl_df['20day_ave_vol'] = cl_df.volume.rolling(window=20, center=False).mean().shift(1)
cl_df.tail()

In [None]:
def vol_bo_long(row):
    ''' This is a helper function to determine if a volume breakout long signal has occured.
        
        Args: row - row of a dataframe to use apply function with
        
        Return: 1 if signal occured, 0 if not
    '''
    # Check if today's volume is greater than 200% of 20-day average volume
    if row['volume'] > (2 * row['20day_ave_vol']):
        # Check if close is above prev day close
        if row['close'] > row['high'].shift(1):
            return 1
    else:
        return 0

In [None]:
cl_df['close'] - cl_df['high'].shift()

In [None]:
cl_df.tail()

In [None]:
def vol_bo(row):
    if row['volume'] > (2 * row['20day_ave_vol']):
        return 1
    else:
        return 0
    
cl_df['vol_bo'] = cl_df.apply(vol_bo, axis=1)

In [None]:
cl_df.tail()

In [None]:
cl_df['close_gt_prev_h'] = cl_df['close'] - cl_df['high'].shift()

In [None]:
cl_df['close_lt_prev_l'] = cl_df['close'] - cl_df['low'].shift()

In [None]:
cl_df.tail()

In [None]:
def vol_bo_long(row):
    if (row['vol_bo'] == 1) and (row['close_gt_prev_h'] > 0.0):
        return 1
    else:
        return 0
    
cl_df['vol_bo_long'] = cl_df.apply(vol_bo_long, axis=1)

In [None]:
cl_df.tail(20)

In [None]:
def vol_bo_short(row):
    if (row['vol_bo'] == 1) and (row['close_lt_prev_l'] < 0.0):
        return 1
    else:
        return 0

In [None]:
cl_df['vol_bo_short'] = cl_df.apply(vol_bo_short, axis=1)

In [None]:
cl_df['20day_high'] = cl_df.high.rolling(window=20, center=False).max().shift(1)
cl_df.tail(100)

In [None]:
cl_df['20day_low'] = cl_df.low.rolling(window=20, center=False).min().shift(1)
cl_df.tail()

In [None]:
def range_bo_long(row):
    if row['high'] > row['20day_high']:
        return 1
    else:
        return 0
    
def range_bo_short(row):
    if row['low'] < row['20day_low']:
        return 1
    else:
        return 0

In [None]:
cl_df.tail()

In [None]:
cl_df['range_bo_long'] = cl_df.apply(range_bo_long, axis=1)
cl_df['range_bo_short'] = cl_df.apply(range_bo_short, axis=1)

In [None]:
cl_df.tail(100)

In [None]:
longs = cl_df[cl_df['vol_bo_short'] == 1]
longs

In [None]:
cl_df = cl_df[['open', 'high', 'low', 'close', 'volume', 'stds_from_mean', '20day_ave_vol', 'vol_bo', 'close_gt_prev_h', 'close_lt_prev_l', 'vol_bo_long', 'vol_bo_short', '20day_high', '20day_low', 'range_bo_long', 'range_bo_short']]

In [None]:
cl_df.tail()

In [None]:
cl_df['ma20'] = cl_df['close'].rolling(window=20, center=False).mean()
cl_df['ma50'] = cl_df['close'].rolling(window=50, center=False).mean()
cl_df['ma100'] = cl_df['close'].rolling(window=100, center=False).mean()
cl_df.tail()

In [None]:
def ma_signal(row, ma=20, direction='long'):
    if direction == 'long':
        if row['close'] > row['ma{}'.format(ma)]:
            return 1
        else:
            return 0
    if direction == 'short':
        if row['close'] < row['ma{}'.format(ma)]:
            return 1
        else:
            return 0

In [None]:
cl_df['ma20_long'] = cl_df.apply(lambda row: ma_signal(row), axis=1)
cl_df.tail(44)

In [None]:
cl_df['ma20_short'] = cl_df.apply(lambda row: ma_signal(row, direction='short'), axis=1)
cl_df.tail(44)

In [None]:
cl_df['ma50_long'] = cl_df.apply(lambda row: ma_signal(row, ma=50), axis=1)
cl_df['ma50_short'] = cl_df.apply(lambda row: ma_signal(row, ma=50, direction='short'), axis=1)
cl_df['ma100_long'] = cl_df.apply(lambda row: ma_signal(row, ma=100), axis=1)
cl_df['ma100_short'] = cl_df.apply(lambda row: ma_signal(row, ma=100, direction='short'), axis=1)
cl_df.tail(44)

In [None]:
cl_df['bb_high'] = cl_df['ma20'] + (2 * cl_df['close'].rolling(window=20, center=False).std())
cl_df['bb_low'] = cl_df['ma20'] - (2 * cl_df['close'].rolling(window=20, center=False).std())
cl_df.tail()

In [None]:
def bb_trade(row, direction='long'):
    if direction == 'long':
        if row['low'] < row['bb_low']:
            return 1
        else:
            return 0
    if direction == 'short':
        if row['high'] > row['bb_high']:
            return 1
        else:
            return 0
        
cl_df['bb_long'] = cl_df.apply(lambda row: bb_trade(row), axis=1)
cl_df['bb_short'] = cl_df.apply(lambda row: bb_trade(row, direction='short'), axis=1)
cl_df.tail(34)

In [None]:
cl_df['pct_change_1day'] = cl_df['close'].pct_change()
cl_df.head()

In [None]:
cl_df['pct_change_5day'] = cl_df['close'].pct_change(periods=5)
cl_df['pct_change_10day'] = cl_df['close'].pct_change(periods=10)
cl_df['pct_change_20day'] = cl_df['close'].pct_change(periods=20)
cl_df.tail()

In [None]:
ave_bb_long_5day_return = cl_df[cl_df['bb_long'] == 1]['pct_change_5day'].mean()
ave_bb_long_5day_return

In [None]:
ave_bb_short_5day_return = cl_df[cl_df['bb_short'] == 1]['pct_change_5day'].mean()
ave_bb_short_5day_return

In [None]:
# TEST OF ALL THE CODE TOGETHER
import pandas as pd
import numpy as np

def vol_bo(row, direction):
    ''' This is a helper function to use in volume breakout column creation.  It takes
        in a row of a dataframe and a direction of trade, and returns 1 if a breakout
        has occured, 0 if not.

        Args: row - row of a dataframe
              direction - 'long' or 'short'

        Return: 1 if volume breakout occured, 0 if not
    '''
    if row['20day_ave_vol']:
        
        if row['volume'] > (2 * row['20day_ave_vol']):
            if (direction == 'long') and (row['close_gt_prev_h'] > 0.0):
                return 1
            elif (direction == 'short') and (row['close_lt_prev_l'] < 0.0):
                return 1
            else:
                return 0
    else:
        return 0

def range_bo(row, direction):
    ''' This is a helper function to use in range breakout column creation.  It takes
        in a row of a dataframe and a direction of trade, and returns 1 if a breakout
        has occured, 0 if not.

        Args: row - row of a dataframe
              direction - 'long' or 'short'

        Return: 1 if volume breakout occured, 0 if not
    '''
    if direction == 'long':
        if row['high'] > row['20day_high']:
            return 1
        else:
            return 0
    if direction == 'short':
        if row['low'] < row['20day_low']:
            return 1
        else:
            return 0

def ma_signal(row, ma, direction):
    ''' This is a helper function to use in all moving average columns creation.  It takes
        in a row of a dataframe, a length of time and a direction of trade, and returns
        1 if a breakout has occured, 0 if not.

        Args: row - row of a dataframe
              ma - length of time for moving average
              direction - 'long' or 'short'

        Return: 1 if volume breakout occured, 0 if not
    '''
    if direction == 'long':
        if row['close'] > row['ma{}'.format(ma)]:
            return 1
        else:
            return 0
    if direction == 'short':
        if row['close'] < row['ma{}'.format(ma)]:
            return 1
        else:
            return 0

def bb_signal(row, direction):
    ''' This is a helper function to use in all bollinger band column creation.  It takes
        in a row of a dataframe and a direction of trade, and returns 1 if a breakout
        has occured, 0 if not.

        Args: row - row of a dataframe
              direction - 'long' or 'short'

        Return: 1 if volume breakout occured, 0 if not
    '''
    if direction == 'long':
        if row['low'] < row['bb_low']:
            return 1
        else:
            return 0
    if direction == 'short':
        if row['high'] > row['bb_high']:
            return 1
        else:
            return 0

def add_all_indicators(df):
    ''' This function takes in a cleaned dataframe of price information and uses
        the helper functions to add all relevant indicators as columns
        to the dataframe.

        Args: df - cleaned dataframe of price information

        Return: df - cleaned dataframe with added columns for all indicators
    '''
    # All columns for 20day volume breakout indicator
    df['20day_ave_vol'] = df.volume.rolling(window=20, center=False).mean().shift(1)
    df['close_gt_prev_h'] = df['close'] - df['high'].shift(1)
    df['close_lt_prev_l'] = df['close'] - df['low'].shift(1)
    df['vol_bo_long'] = df.apply(lambda row: vol_bo(row, direction='long'), axis=1)
    df['vol_bo_short'] = df.apply(lambda row: vol_bo(row, direction='short'), axis=1)

    # All columns for 20day range breakout indicator
    df['20day_high'] = df.high.rolling(window=20, center=False).max().shift(1)
    df['20day_low'] = df.low.rolling(window=20, center=False).min().shift(1)
    df['range_bo_long'] = df.apply(lambda row: range_bo(row, direction='long'), axis=1)
    df['range_bo_short'] = df.apply(lambda row: range_bo(row, direction='short'), axis=1)

    # All columns for moving average indicators
    df['ma20'] = df['close'].rolling(window=20, center=False).mean()
    df['ma50'] = df['close'].rolling(window=50, center=False).mean()
    df['ma100'] = df['close'].rolling(window=100, center=False).mean()
    df['ma20_long'] = df.apply(lambda row: ma_signal(row, ma=20, direction='long'), axis=1)
    df['ma20_short'] = df.apply(lambda row: ma_signal(row, ma=20, direction='short'), axis=1)
    df['ma50_long'] = df.apply(lambda row: ma_signal(row, ma=50, direction='long'), axis=1)
    df['ma50_short'] = df.apply(lambda row: ma_signal(row, ma=50, direction='short'), axis=1)
    df['ma100_long'] = df.apply(lambda row: ma_signal(row, ma=100, direction='long'), axis=1)
    df['ma100_short'] = df.apply(lambda row: ma_signal(row, ma=100, direction='short'), axis=1)

    # All columns for bollinger band indicators
    df['bb_high'] = df['ma20'] + (2 * df['close'].rolling(window=20, center=False).std())
    df['bb_low'] = df['ma20'] - (2 * df['close'].rolling(window=20, center=False).std())
    df['bb_long'] = df.apply(lambda row: bb_signal(row, direction='long'), axis=1)
    df['bb_short'] = df.apply(lambda row: bb_signal(row, direction='short'), axis=1)

    # All columns for percentage change for timeframe into the future
    df['pct_change_1day'] = df['close'].pct_change()
    df['pct_change_5day'] = df['close'].pct_change(periods=5)
    df['pct_change_10day'] = df['close'].pct_change(periods=10)
    df['pct_change_20day'] = df['close'].pct_change(periods=20)
    
    return df

In [None]:
df_dict

In [None]:
corn_df = df_dict['C']
corn_df.tail(50)

In [None]:
corn_df = add_all_indicators(corn_df)


In [None]:
corn_df.describe()

In [None]:
df_dict['W']

In [None]:
wheat_df['volume'].replace(68.0, int(wheat_df['volume'].mean()), inplace=True)

In [None]:
bean_df = df_dict['S']
bean_df.info()

In [None]:
bean_ind= add_all_indicators(bean_df)

In [None]:
bean_ind.info()

In [None]:
bean_ind['vol_bo_short'] = bean_ind['vol_bo_short'].fillna(0.0)

In [None]:
bean_ind.loc[bean_ind['volume'] == 0.0].index


In [None]:
bean_ind.loc['2007-04-05']

In [None]:
wheat_indicators['range_bo_short'].value_counts()

In [None]:
bean_ind['volume'].replace(30.0, bean_ind['20day_ave_vol'], inplace=True)

In [None]:
bean_ind.describe()

In [None]:
bean_ind[bean_ind['volume'] < 1000.0]

In [None]:
def replace_df_zeros(df):
    ''' This function takes in a dataframe of price information, finds all zero values
        for the 'volume' column and replaces them with the mean volume of the dataset.

        Args: df - dataframe of price information

        Return: df - cleaned dataframe with no zero volume entries
    '''
    df['volume'].replace(0.0, df['volume'].mean(), inplace=True)

    return df

In [None]:
silver = df_dict['SI']

In [None]:
silver[silver['volume'] == 0.0]

In [None]:
silver = replace_df_zeros(silver)
silver[silver['volume'] == 0.0]

In [None]:
silver.loc['2005-12-05']

In [None]:
c = df_dict['C']
c.head()

In [None]:
import sqlite3

sqlite_file = 'securities_master_db.sqlite'

conn = sqlite3.connect(sqlite_file)
c = conn.cursor()

c.execute("DELETE FROM Symbols")

conn.commit()
conn.close()


In [None]:
crude = df_dict['CL']
crude.head()

In [None]:
crude[crude['volume'] == 0.0]

In [None]:
for name, df in df_dict.items():
    print(name)
    print(df.head())

In [9]:
def vol_bo(row, direction):
    ''' This is a helper function to use in volume breakout column creation.  It takes
        in a row of a dataframe and a direction of trade, and returns 1 if a breakout
        has occured, 0 if not.

        Args: row - row of a dataframe
              direction - 'long' or 'short'

        Return: 1 if volume breakout occured, 0 if not
    '''
    if row['volume'] > (2 * row['20day_ave_vol']):
        if (direction == 'long') and (row['close_gt_prev_h'] > 0.0):
            return 1
        elif (direction == 'short') and (row['close_lt_prev_l'] < 0.0):
            return 1
        else:
            return 0

def range_bo(row, direction):
    ''' This is a helper function to use in range breakout column creation.  It takes
        in a row of a dataframe and a direction of trade, and returns 1 if a breakout
        has occured, 0 if not.

        Args: row - row of a dataframe
              direction - 'long' or 'short'

        Return: 1 if volume breakout occured, 0 if not
    '''
    if direction == 'long':
        if row['high'] > row['20day_high']:
            return 1
        else:
            return 0
    if direction == 'short':
        if row['low'] < row['20day_low']:
            return 1
        else:
            return 0

def ma_signal(row, ma, direction):
    ''' This is a helper function to use in all moving average columns creation.  It takes
        in a row of a dataframe, a length of time and a direction of trade, and returns
        1 if a breakout has occured, 0 if not.

        Args: row - row of a dataframe
              ma - length of time for moving average
              direction - 'long' or 'short'

        Return: 1 if volume breakout occured, 0 if not
    '''
    if direction == 'long':
        if row['close'] > row['ma{}'.format(ma)]:
            return 1
        else:
            return 0
    if direction == 'short':
        if row['close'] < row['ma{}'.format(ma)]:
            return 1
        else:
            return 0

def bb_signal(row, direction):
    ''' This is a helper function to use in all bollinger band column creation.  It takes
        in a row of a dataframe and a direction of trade, and returns 1 if a breakout
        has occured, 0 if not.

        Args: row - row of a dataframe
              direction - 'long' or 'short'

        Return: 1 if volume breakout occured, 0 if not
    '''
    if direction == 'long':
        if row['low'] < row['bb_low']:
            return 1
        else:
            return 0
    if direction == 'short':
        if row['high'] > row['bb_high']:
            return 1
        else:
            return 0

def add_all_indicators(df):
    ''' This function takes in a cleaned dataframe of price information and uses
        the helper functions to add all relevant indicators as columns
        to the dataframe.

        Args: df - cleaned dataframe of price information

        Return: df - cleaned dataframe with added columns for all indicators
    '''
    # All columns for 20day volume breakout indicator
    df['20day_ave_vol'] = df.volume.rolling(window=20, center=False).mean().shift(1)
    df['close_gt_prev_h'] = df['close'] - df['high'].shift(1)
    df['close_lt_prev_l'] = df['close'] - df['low'].shift(1)
    df['vol_bo_long'] = df.apply(lambda row: vol_bo(row, direction='long'), axis=1)
    df['vol_bo_short'] = df.apply(lambda row: vol_bo(row, direction='short'), axis=1)
    
    df['vol_bo_long'].fillna(value=0, inplace=True)
    df['vol_bo_short'].fillna(value=0, inplace=True)

    # All columns for 20day range breakout indicator
    df['20day_high'] = df.high.rolling(window=20, center=False).max().shift(1)
    df['20day_low'] = df.low.rolling(window=20, center=False).min().shift(1)
    df['range_bo_long'] = df.apply(lambda row: range_bo(row, direction='long'), axis=1)
    df['range_bo_short'] = df.apply(lambda row: range_bo(row, direction='short'), axis=1)

    # All columns for moving average indicators
    df['ma20'] = df['close'].rolling(window=20, center=False).mean()
    df['ma50'] = df['close'].rolling(window=50, center=False).mean()
    df['ma100'] = df['close'].rolling(window=100, center=False).mean()
    df['ma20_long'] = df.apply(lambda row: ma_signal(row, ma=20, direction='long'), axis=1)
    df['ma20_short'] = df.apply(lambda row: ma_signal(row, ma=20, direction='short'), axis=1)
    df['ma50_long'] = df.apply(lambda row: ma_signal(row, ma=50, direction='long'), axis=1)
    df['ma50_short'] = df.apply(lambda row: ma_signal(row, ma=50, direction='short'), axis=1)
    df['ma100_long'] = df.apply(lambda row: ma_signal(row, ma=100, direction='long'), axis=1)
    df['ma100_short'] = df.apply(lambda row: ma_signal(row, ma=100, direction='short'), axis=1)

    # All columns for bollinger band indicators
    df['bb_high'] = df['ma20'] + (2 * df['close'].rolling(window=20, center=False).std())
    df['bb_low'] = df['ma20'] - (2 * df['close'].rolling(window=20, center=False).std())
    df['bb_long'] = df.apply(lambda row: bb_signal(row, direction='long'), axis=1)
    df['bb_short'] = df.apply(lambda row: bb_signal(row, direction='short'), axis=1)

    # All columns for percentage change for timeframe into the future
    df['pct_change_1day'] = df['close'].pct_change()
    df['pct_change_5day'] = df['close'].pct_change(periods=5)
    df['pct_change_10day'] = df['close'].pct_change(periods=10)
    df['pct_change_20day'] = df['close'].pct_change(periods=20)

    return df

In [6]:
emini = df_dict['ES']
emini.tail()

Unnamed: 0_level_0,open,high,low,close,volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2014-12-24,2078.75,2083.5,2077.0,2078.75,235054.0
2014-12-26,2078.75,2088.75,2078.5,2084.25,614078.0
2014-12-29,2086.0,2088.75,2076.0,2085.75,636306.0
2014-12-30,2087.5,2088.5,2073.0,2076.75,716610.0
2014-12-31,2076.25,2082.75,2050.75,2052.5,835262.0


In [7]:
emini_ind = add_all_indicators(emini)
emini_ind.tail()

Unnamed: 0_level_0,open,high,low,close,volume,20day_ave_vol,close_gt_prev_h,close_lt_prev_l,vol_bo_long,vol_bo_short,...,ma100_long,ma100_short,bb_high,bb_low,bb_long,bb_short,pct_change_1day,pct_change_5day,pct_change_10day,pct_change_20day
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-12-24,2078.75,2083.5,2077.0,2078.75,235054.0,1263141.32,-5.75,7.25,,,...,1,0,2111.738883,1976.291117,0,0,-0.00012,0.035105,0.029339,0.006805
2014-12-26,2078.75,2088.75,2078.5,2084.25,614078.0,1245425.85,0.75,7.25,,,...,1,0,2114.14641,1975.51859,0,0,0.002646,0.011772,0.029768,0.007907
2014-12-29,2086.0,2088.75,2076.0,2085.75,636306.0,1261111.33,-3.0,7.25,,,...,1,0,2117.484882,1974.690118,0,0,0.00072,0.009071,0.047852,0.012181
2014-12-30,2087.5,2088.5,2073.0,2076.75,716610.0,1284785.3,-12.0,0.75,,,...,1,0,2120.430628,1975.069372,0,0,-0.004315,0.002051,0.047145,0.016271
2014-12-31,2076.25,2082.75,2050.75,2052.5,835262.0,1319991.85,-36.0,-20.5,,,...,1,0,2119.972587,1974.902413,0,0,-0.011677,-0.012747,0.044529,-0.003036


In [10]:
emini_ind2 = add_all_indicators(emini)

In [11]:
emini_ind2.tail()

Unnamed: 0_level_0,open,high,low,close,volume,20day_ave_vol,close_gt_prev_h,close_lt_prev_l,vol_bo_long,vol_bo_short,...,ma100_long,ma100_short,bb_high,bb_low,bb_long,bb_short,pct_change_1day,pct_change_5day,pct_change_10day,pct_change_20day
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-12-24,2078.75,2083.5,2077.0,2078.75,235054.0,1263141.32,-5.75,7.25,0.0,0.0,...,1,0,2111.738883,1976.291117,0,0,-0.00012,0.035105,0.029339,0.006805
2014-12-26,2078.75,2088.75,2078.5,2084.25,614078.0,1245425.85,0.75,7.25,0.0,0.0,...,1,0,2114.14641,1975.51859,0,0,0.002646,0.011772,0.029768,0.007907
2014-12-29,2086.0,2088.75,2076.0,2085.75,636306.0,1261111.33,-3.0,7.25,0.0,0.0,...,1,0,2117.484882,1974.690118,0,0,0.00072,0.009071,0.047852,0.012181
2014-12-30,2087.5,2088.5,2073.0,2076.75,716610.0,1284785.3,-12.0,0.75,0.0,0.0,...,1,0,2120.430628,1975.069372,0,0,-0.004315,0.002051,0.047145,0.016271
2014-12-31,2076.25,2082.75,2050.75,2052.5,835262.0,1319991.85,-36.0,-20.5,0.0,0.0,...,1,0,2119.972587,1974.902413,0,0,-0.011677,-0.012747,0.044529,-0.003036
