In [4]:
from googlefinance import getNews
from os.path import isfile, join
from yahoo_finance import Share
from nyse_dates_prds import *
from os import listdir
from sys import stdout
import cPickle as pickle
import pandas as pd
import numpy as np
import datetime
import urllib
import os

print "LOADED"

LOADED


In [2]:
def print_helper_stocks():
    """
    Print the helper Indexes/Funds/ETFs that are added to every company indicator dataframes 
    during the prediction process due to informational knowledge you can gain by following
    these helper stocks.
    """
    print "Stocks used to help track movement include..."
    attrs      = ['^GSPC', '^DJI', '^IXIC', 'USO', 'GLD', 'SPY']    
    names      = ['S&P 500 Index', 'Dow Jones Index', 'Nasdaq Index', 'U.S. Oil Fund',
                  'SPDR Gold Shares', 'SPDR S&P 500 ETF']
    df         = pd.DataFrame([attrs, names]).T
    df.columns = ['Stock Tickers', 'Ticker Names']
    df         = df.set_index('Stock Tickers')
    print df, '\n'
    return

def get_stock_info(hlc):
    """
    Retrieve information of the available stocks you can predict for. For the real-time 
    calculating portion of the project, you can calculate indicators for more than 50 
    companies at once. Call this function to learn many attributes of the stock, like 
    price, avg volume, etc.. We do this by using a yahoo finance function that 
    internally calls a yahoo finance server. We don't use this server during real-time 
    calculation because of a 15-minute delay but it's fine for just simply learning 
    about stocks we might want to try.
    """
    stock_info_dict = {}
    stock_nm        = []
    stock_tk        = []
    stock_names   = {'BPOP':'Popular Inc.',
                     'FITB':'Fifth Third Bancorp', 
                     'HBAN':'Huntington Bancshares Inc.',
                     'CMCSA':'Comcast Corp.', 
                     'EBAY':'Ebay Inc.',
                     'AAPL':'Apple Inc.', 
                     'AMAT':'Applied Materials Inc.',
                     'BRCD':'Brocade Communications Systems Inc.',
                     'CSCO':'Cisco Systems Inc.', 
                     'GOOG':'Google Inc.',
                     'INTC':'Intel Corp.',
                     'LVLT':'Level 3 Communications Inc.', 
                     'MSFT':'Microsoft Corp.',
                     'MU':'Micron Technology Inc.',
                     'NVDA':'NVIDIA Corp.', 
                     'ORCL':'Oracle Corp.',
                     'QCOM':'QUALCOMM Inc.',
                     'SIRI':'Sirius XM Holdings Inc.', 
                     'WIN':'Windstream Holdings Inc.',
                     'YHOO':'Yahoo! Inc.',
                     'BHP':'BHP Billiton Limited', 
                     'BP':'British Petroleum Plc',
                     'RIO':'Rio Tinto Plc',
                     'XOM':'Exxon Mobil Corp.', 
                     'GE':'General Electric Company',
                     'F':'Ford Motor Company',
                     'MO':'Altria Group Inc.', 
                     'XRX':'Xerox Corp.',
                     'GS':'Goldman Sachs Group Inc.',
                     'JPM':'JPMorgan Chase & Co.', 
                     'LYG':'Lloyds Banking Group Plc',
                     'MS':'Morgan Stanley',
                     'RF':'Regions Financial Corp.', 
                     'USB':'U.S. Bancorp',
                     'WFC':'Wells Fargo & Co.',
                     'MRK':'Merck & Co. Inc.', 
                     'PFE':'Pfizer Inc.',
                     'LMT':'Lockheed Martin Corp.',
                     'MGM':'MGM Resorts International', 
                     'AMD':'Advanced Micro Devices Inc.',
                     'GLW':'Corning Inc.',
                     'HPQ':'HP Inc.', 
                     'S':'Sprint Corp.',
                     'T':'AT&T Inc.'}
    for each in hlc.keys():
        if each in stock_names.keys():
            stock_tk.append(each)
            stock_nm.append(stock_names[each])
    
    print "Stocks available to track/predict..."
    name_df = pd.DataFrame([stock_tk, stock_nm]).T
    name_df.columns = ['Abreviations', 'Stock Names']
    name_df = name_df.set_index('Abreviations')
    print name_df, '\n'
    print "For information on certain stock, type: \n\t print info['stock abreviation'] \n"
    print "Example: print info['AAPL']"
    
    for name, name2 in zip(stock_tk, stock_nm):
        comp  = Share(name)
        comp.refresh()
        price       = comp.get_price()
        avg_vol     = comp.get_avg_daily_volume()
        stk_exc     = comp.get_stock_exchange()
        mark_cap    = comp.get_market_cap()
        book_val    = comp.get_book_value()
        ebit        = comp.get_ebitda()
        div_share   = comp.get_dividend_share()
        div_yield   = comp.get_dividend_yield()
        earn_shares = comp.get_earnings_share()
        yr_hi       = comp.get_year_high()
        yr_lo       = comp.get_year_low()
        avg50       = comp.get_50day_moving_avg()
        avg200      = comp.get_200day_moving_avg()
        pe_rat      = comp.get_price_earnings_ratio()
        pe_growth   = comp.get_price_earnings_growth_ratio()
        short_rat   = comp.get_short_ratio()

        df_index   = ['Stock Price:', 'Average Daily Volume:', 'Stock Exchange:', 
                      'Market Cap:', 'Book Value:', 'EBITDA:', 'Dividend Share:', 
                      'Dividend Yield:', 'Earnings Share:', 'Year High:', 
                      'Year Low:', 'Moving 50 Day Average:', 
                      'Moving 200 Day Average:', 'Price Earnings Ratio:', 
                      'Price Earnings Growth Ratio:','Short Ratio']
        index_vals = [price, avg_vol, stk_exc, mark_cap, book_val,
                      ebit, div_share, div_yield, earn_shares, yr_hi,
                      yr_lo, avg50, avg200, pe_rat, pe_growth,
                      short_rat]
        
        stock_info_dict[name] = pd.DataFrame(index_vals, index = df_index, columns=[name2])
    return stock_info_dict

def get_parameter_choices():
    """
    There are 12 different indicators that we use for prediction, but for each different 
    indicator, we have up to 32 different period length values, which is essentially how
    far back the data uses to create the indicator value, so some use 10 minutes of previous
    stock data, while others use up to 125 stock days worth of data to calculate. 
    
    The real-time calculation will calculate all of indicators with all of the period lengths
    but for the prediction process, you need to choose a subset of these to use because the more
    attributes you use, the longer it takes to predict, and soon becomes to long to be useful. I
    found a good number to be 230 attributes. Now we use our 6 helper stocks, plus the stock we're
    are predicting for, so for every indicator/period we choose, that's equal to 7 attributes
    because each stock uses it to help the prediction. 
    
    So try to keep the number of different indicator/periods you choose to be around 40 unless
    you have the computing power/memory to do it. I'm running 12GB of memory, with a i7-4510U
    so if you have better specs you can use more indicators.
    """
    attribute_choices = {}

    attributes      = ['rets', 'per', 'bol', 'vol', 'aro', 'mac', 
                       'mac2', 'sma', 'kdo', 'mom', 'adx', 'rsi']
    attr_names      = ['Returns', 'Price Earnings Ratio', 'Bollinger Bands',
                       'Volatility', 'Aroon', 'Moving Average Convergence Divergence',
                       'Custom MACD', 'Simple Moving Average', 'Stochastic Oscillators',
                       'Momentum', 'Average Directional Index', 'Relative Strength Index']

    day_lengths     = ['1day',  '2day',  '3day',  '5day', 
                      '8day',  '10day', '12day', '14day', 
                      '16day', '20day', '25day', '30day', 
                      '40day', '50day', '80day', '125day']
    
    day_lengths3    = ['1&2day','2&3day','3&5day','5&8day',
                       '8&10day','10&12day','12&14day','14&16day',
                       '16&20day','20&25day','25&30day','30&40day',
                       '40&50day','50&80day']
    day_lengths4    = ['1&2&4day','2&3&8day','3&5&10day','5&8&12day',
                       '8&10&14day','10&12&16day','12&14&20day','14&16&25day',
                       '16&20&30day','20&25&40day','25&30&50day','30&40&80day',
                       '40&50&125day']
    
    day_lengths5    = ['1&4day','2&8day','3&10day','5&12day',
                       '8&14day','10&16day','12&20day','14&25day',
                       '16&30day','20&40day','25&50day','30&80day',
                       '40&125day']

    minute_lengths1 = ['10min',  '14min',  '16min',  '18min',  
                      '20min',  '25min',  '30min',  '40min',  
                      '50min',  '75min',  '100min', '125min', 
                      '150min', '200min', '250min', '300min']
    
    minute_lengths2 = ['1min','2min','3min','4min','5min'] + minute_lengths1
    
    minute_lengths3 = ['8&10min','10&14min','14&16min','16&18min',
                       '18&20min','20&25min','25&30min','30&40min',
                       '40&50min','50&75min','75&100min','100&125min',
                       '125&150min','150&200min','200&250min','250&300min',
                       '300&350min','350min&1day']
    
    minute_lengths4 = ['6&8&14min','8&10&16min','10&14&18min','14&16&20min',
                       '16&18&25min','18&20&30min','20&25&40min','25&30&50min',
                       '30&40&75min','40&50&100min','50&75&125min','75&100&150min',
                       '100&125&200min','125&150&250min','150&200&300min','200&250&350min',
                       '250&300&1day','300&350min&2day','350min&1&3day']
    
    minute_lengths5 = ['6&14min','8&16min','10&18min','14&20min',
                       '16&25min','18&30min','20&40min','25&50min',
                       '30&75min','40&100min','50&125min','75&150min',
                       '100&200min','125&250min','150&300min','200&350min',
                       '250min&1day','300min&2day','350min&3day']

    minute_lengths2 = ['1min','2min','3min','4min','5min'] + minute_lengths1

    indicator_lengths1 = minute_lengths1 + day_lengths
    indicator_lengths2 = minute_lengths2 + day_lengths[:11]
    indicator_lengths3 = minute_lengths3 + day_lengths3
    indicator_lengths4 = minute_lengths4 + day_lengths4
    indicator_lengths5 = minute_lengths5 + day_lengths5

    new_df = pd.DataFrame()
    lst = ['rsi','mac','mac2','vol','kdo','rets','ALL OTHERS']
    for key in lst:
        if key in ['rsi', 'vol']:
            df = pd.DataFrame(indicator_lengths5, columns=[key])
        elif key in ['mac', 'mac2']:
            df = pd.DataFrame(indicator_lengths4, columns=[key])
        elif key == 'kdo':
            df = pd.DataFrame(indicator_lengths3, columns=[key])
        elif key == 'rets':
            df = pd.DataFrame(indicator_lengths2, columns=[key])
        else:
            df = pd.DataFrame(indicator_lengths1,  columns=[key])
        new_df = pd.concat([new_df, df], axis=1)
    
    names = pd.DataFrame([attributes, attr_names]).T
    names.columns = ['Abreviations','Indicators']
    names = names.set_index('Abreviations')
    print "Indicators available to use, with the "
    print names, '\n'
    print new_df
    return

def create_indicator_list(choice, rets=None, per=None, bol=None, vol=None, aro=None, mac=None, 
                          mac2=None, sma=None, kdo=None, mom=None, adx=None, rsi=None):
    """
    This parses your parameter choices you made for indicators. You feed in a string with your
    number choices, based on the number row from the dataframe shown in the parameter choices
    function.
    """
    indicators = []
    stocks = ['^GSPC','^IXIC','^DJI','GLD','USO','SPY',choice]
    lst1  = [rets, per, bol, vol, aro, mac, mac2, sma, kdo, mom, adx, rsi]
    lst2 = ['rets','per','bol','vol','aro','mac','mac2','sma','kdo','mom','adx','rsi']
    lst3 = [per, bol, vol, aro, mac, mac2, sma, kdo, mom, adx, rsi]
    lst4 = ['per','bol','vol','aro','mac','mac2','sma','kdo','mom','adx','rsi']
    nums = ['0','1','2','3','4','5','6','7','8','9']
    
    for stock in stocks:
        name_lst     = []
        name_nums    = []
        through_char = False
        double       = False
        
        if stock != choice:
            ind_lst1 = lst1
            ind_lst2 = lst2
        else:
            ind_lst1 = lst3
            ind_lst2 = lst4
            
        for key, key2 in zip(ind_lst1, ind_lst2):
            if key != None:
                while key != '':
                    if through_char == False:
                        next_char = key[0]
                        if next_char == '-':
                            through_char = True
                        elif next_char == ',':
                            tt = 0
                        elif next_char == ' ':
                            print "No spaces allowed in parameters"
                            break
                        else:
                            try:
                                if key[1] != '-':
                                    if key[1] in nums:
                                        if int(key[:2]) < 32:
                                            try:
                                                if key[2] == '-':
                                                    num_start = int(key[:2])
                                                    double = True
                                                else:
                                                    if int(key[:2]) not in name_nums:
                                                        name_lst.append(key[:2])
                                                        name_nums.append(int(key[:2]))
                                                    double = True
                                            except:
                                                if int(key[:2]) not in name_nums:
                                                    name_lst.append(key[:2])
                                                    name_nums.append(int(key[:2]))
                                                double = True
                                        else:
                                            print "Number incorrect:"
                                            print "Make sure number is between 0 and 31"
                                            break
                                    else:
                                        if int(key[0]) >= 0:
                                            if int(key[0]) not in name_nums:
                                                name_lst.append(key[0])
                                                name_nums.append(int(key[0]))
                                        else:
                                            print "Number incorrect:"
                                            print "Make sure number is between 0 and 31"
                                            break

                                else:
                                    num_start = int(key[0])

                            except:
                                if int(key[0]) not in name_nums:
                                    name_lst.append(key[0])
                                    name_nums.append(int(key[0]))
                                pass

                        if double == False:
                            key = key[1:]
                        else:
                            key = key[2:]
                            double = False

                    else:
                        try:
                            if key[1] not in nums:
                                next_char = int(key[0])
                                if next_char > num_start:
                                    comb = xrange(num_start, int(next_char)+1)
                                    for each in comb:
                                        if int(each) not in name_nums:
                                            name_lst.append(str(each))
                                            name_nums.append(int(each))
                                else:
                                    print "x-y range incorrect:"
                                    print "Make sure y > x and y < 32 and x > 0"
                                    break
                            else:
                                next_char = int(key[:2])
                                double = True
                                if next_char > num_start and next_char < 32:
                                    comb = xrange(num_start, int(next_char)+1)
                                    for each in comb:
                                        if int(each) not in name_nums:
                                            name_lst.append(str(each))
                                            name_nums.append(int(each))
                                else:
                                    print "x-y range incorrect:"
                                    print "Make sure y > x and y < 32 and x > 0"
                                    break
                        except:
                            next_char = int(key[0])
                            if next_char > num_start:
                                comb = xrange(num_start, int(next_char)+1)
                                for each in comb:
                                    if int(each) not in name_nums:
                                        name_lst.append(str(each))
                                        name_nums.append(int(each))
                            else:
                                print "x-y range incorrect:"
                                print "Make sure y > x and y < 32 and x > 0"
                                break

                        if double == False:
                            key = key[1:]
                        else:
                            key = key[2:]
                        double = False
                        through_char = False 
                
                for number in name_lst:
                    indicators.append(stock+'_'+key2+number)
    return indicators

def create_columnname_dict(hlc):
    """
    This creates a column name list for each company in your highlowclose dictionary
    that is used in the real-time calculation.
    """
    tickers2      = {'USO':'NYSEARCA:USO','GLD':'NYSEARCA:GLD',
                     'SPY':'NYSEARCA:SPY','^DJI':'INDEXDJX:.DJI',
                     '^GSPC':'INDEXSP:.INX','^IXIC':'INDEXNASDAQ:.IXIC', 
                     'LMT':'NYSE:LMT'}
    indicator_lst = ['rsi', 'vol', 'sma', 'cci', 'per', 'mom', 'bol', 
                     'aro', 'mac', 'mactwo', 'adx', 'kdo', 'rets']
    nm_dict = {}
    for name in hlc.keys():
        if name in tickers2.keys():
            name2 = tickers2[name]
        else:
            name2 = name
            
        indicator_nms = []
        for ind in indicator_lst:
            for d in range(32):
                indicator_nms.append(name+'_'+indd+str(d))
        nm_dict[name2] = indicator_nms
    
    opp = open('Pickles/columnnames.pickle','wb')
    pickle.dump(nm_dict, opp)
    opp.close()
    return

def company_news(newstickers):
    """
    Use Google's server and use https://github.com/hongtaocai/googlefinance module to retrieve
    the real-time stock news data for each company stock symbol fed to it.
    """
    tickernewslist = {}
    
    for each in newstickers:
        tickernewslist[each] = getNews(each)  
    return tickernewslist

def create_news_dict(hlc):
    """
    We use the google finance function that calls the Google Finance servers for news articles
    for every company we provide it. They archive articles having to do with the stocks from 
    current to several years ago. We currently aren't using the news for prediction but are in
    the process of creating a NLP component to help our RTNN component.
    """
    tickers       = []
    tickers2      = {'USO':'NYSEARCA:USO','GLD':'NYSEARCA:GLD',
                     'SPY':'NYSEARCA:SPY','^DJI':'INDEXDJX:.DJI',
                     '^GSPC':'INDEXSP:.INX','^IXIC':'INDEXNASDAQ:.IXIC', 
                     'LMT':'NYSE:LMT'}
    for each in hlc.keys():
        if each in tickers2.keys():
            tickers.append(tickers2[each])
        else:
            tickers.append(each)
            
    hist_news = {}
    last_date = {}

    todays_news = company_news(tickers)
    for tick in tickers:
        count = 0
        news  = {}

        for value in todays_news[tick]:
            d     = value['d']
            title = value['t']
            url   = value['u']

            try:
                date  = datetime.datetime.strptime(d, '%b %d, %Y').date()
            except:
                date  = datetime.date.today()
                pass

            news[count]  = {'Date':date, 'Title':title, 'URL':url}
            if count == 0: 
                latest = date
            else:
                if date > latest:
                    latest = date
            count += 1

        last_date[tick] = latest
        hist_news[tick] = news

    news_lst = [hist_news, last_date]
    opp = open('Pickles/newsdict.pickle','wb')
    pickle.dump(news_lst, opp)
    opp.close()
    return

def create_hlc_dict():
    """
    Download at least the base helper stocks, plus at least one other and store them in the a 
    directory called HLC/ in your path. This will put them all into a dictionary that is used 
    heavily throughout the program. These are precalculated highs/lows/closes/typical values 
    for each company.
    """
    highlowclose = {}
    mypath       = 'HLC/'
    onlyfiles    = [f for f in listdir(mypath) if isfile(join(mypath, f))]

    for fl in onlyfiles:
        opp = open(fl, 'rb')
        highlowclose[fl[:fl.find('.')]] = pickle.load(opp)
        opp.close()
    
    opp = open('Pickles/pickleadjustedintracomplete.pickle','wb')
    pickle.dump(highlowclose, opp)
    opp.close()
    return highlowclose

def get_prev_10d1min_intraday(hlc):
    """
    Added to allow you to make sure the data from our other source is accurate.
    This grabs the last 10 days of intraday data from Google which is a more
    reliable source but not used because they only store 10 days of data.
    """

    tickers    = {'^DJI':'.DJI','^GSPC':'.INX','^IXIC':'IXIC'}

    pages = {}
    for each in hlc.keys():
        if each in tickers.keys():
            name = tickers[each]
        else:
            name = each
        page = 'http://www.google.com/finance/getprices?i=60&p=10d&f=d,c&df=cpct&q='+name
        pages[each] = urllib.urlopen(page).read()

    complete_list = {}
    day_list      = []

    for each in hlc.keys():
        current = pages[each]
        start = current.find('\na')+2
        end = len(current)
        test = True
        df_list = []

        while test == True:
            date = current.find(',',start)
            price = current.find('\n',date)
            df_list.append(float(current[date+1:price]))

            if price != end-1:
                if current[price+1] == 'a':
                    day_list.append(df_list)
                    df_list = []
                start = price+2    
            else:
                test = False
                complete_list[each] = day_list
                day_list = []        
    return complete_list

def get_base_index(hlc):
    """
    Find the missing dates that one or more dataframes have but another doesn't
    so we can match up data and find what dates are missing and drop the dates
    from the base index. A date is considered bad if it's missing more than
    half the day's worth of data. Then return that base index.
    """
    count = 0
    bases = ['^IXIC','^GSPC','^DJI','GLD','USO','SPY']
    complete_bad_dates = []
    new_dfs = {}

    for key in bases:
        if count == 0:
            length     = len(hlc[key])
            length_key = key
        else:
            next_length = len(hlc[key])
            if next_length < length:
                length     = next_length
                length_key = key
        count += 1

        for key2 in bases:
            if key != key2:
                val   = hlc[key].index
                val2  = hlc[key2].index

                diffs = val.difference(val2)

                dates, bad_dates = {}, []
                for datetime in diffs:
                    date = str(datetime)[:10]

                    if date not in dates.keys():
                        dates[date] = 1
                    elif dates[date] < 190:
                        dates[date] += 1
                    else:
                        if date not in bad_dates:
                            bad_dates.append(date)

                for bad in bad_dates:
                    if bad not in complete_bad_dates:
                        complete_bad_dates.append(bad)

    for key in bases:
        cpy_df = hlc[key].copy(deep=True)
        for dt in complete_bad_dates:
            try: 
                nums   = cpy_df.index.get_loc(dt)
                cpy_df = cpy_df.drop(cpy_df.index[nums.start:nums.stop])
            except:
                pass
        new_dfs[key] = cpy_df
    
    cpy_df = hlc[length_key].copy(deep=True)
    for dt in complete_bad_dates:
        try: 
            nums   = cpy_df.index.get_loc(dt)
            cpy_df = cpy_df.drop(cpy_df.index[nums.start:nums.stop])
        except:
            pass
    
    return cpy_df.index, bases, new_dfs, hlc

def get_bad_comp_dates(hlc, bases, base, new_dfs):
    """
    Take the base index, and compare to each companies indexes to find which dates
    can be removed to match up the data better. A days data is considered bad if
    more than half the days data is missing. Then return a dictionary with the bad
    dates for each company as well as the new highlowclose dataframe without the 
    bad dates.
    """
    indexes   = {}
    date_dict = {}
    for key, val in hlc.iteritems():
        if key not in bases:
            indexes[key] = val.index
    
    for key, val in indexes.itervalues():
        diffs1 = val.difference(base)
        diffs2 = base.difference(val)
        diffs  = [diffs1, diffs2]

        bad_dates = []
        for lst in diffs:
            dates = {}
            for datetime in lst:
                date = str(datetime)[:10]
            
                if date not in dates.keys():
                    dates[date] = 1
                elif dates[date] < 190:
                    dates[date] += 1
                else:
                    if date not in bad_dates:
                        bad_dates.append(date)
        
        cpy_df = hlc[key].copy(deep=True)
        for dt in bad_dates:
            try: 
                nums   = cpy_df.index.get_loc(dt)
                cpy_df = cpy_df.drop(cpy_df.index[nums.start:nums.stop])
            except:
                pass
        date_lst[key] = bad_dates
        new_dfs[key]  = cpy_df

    return date_dict, new_dfs

print "LOADED"

LOADED


In [None]:
hlc     = create_hlc_dict()
compare = get_prev_10d1min_intraday(hlc)
info    = get_stock_info(hlc)

print_helper_stocks()
get_parameter_choices()
create_columnname_dict(hlc)
create_news_dict(hlc)

In [None]:
"""
If you choose and don't have advanced stock knowledge, you can use the pre-made indicator list
that I found to be useful, but if you think you can create a more useful indicator list, then use
this creation tool function below.

Pre-made indicator list is called 'pre_made_indicatorlist.pickle'

Once you have chosen your chosen stock to predict on, you'll now call your 
create_indicator_list() function. This will create a list filled with dataframe column names to 
be used as our input values for our ML function. To create you will enter your chosen stock 
ticker, followed by string values for each indicator periods chosen. 

For example, if you want to choose Apple as your chosen stock and you'd like to use period 
lengths 1-5,8 for pe ratio, and 0,2,6 for our volatility indicator, we'd type:
    ind = create_indicator_list('AAPL', per='1-5,8', vol='0,2,6')

You can use '-' to signify you want everything between those two values, and commas for 
individual numbers. Don't use spaces in the strings though and each indicator has a choice 
between 0 and 31 to choose from so look and the parameter choices and choice what you feel 
is best.

Keep in mind there is 6 helper stocks plus your company choice, so 7 different stocks being 
used to predict so when you use 10 different indicator values, you're really using 42 and 
anything over about 250 indicator values starts to become unreasonably long to calculate, 
so try to keep it under that amount of indicators
"""
#ind = create_indicator_list('PUT STOCK TICKER HERE', rets=None, per=None, bol=None, 
#                            vol=None, aro=None, mac=None, mac2=None, sma=None, 
#                            kdo=None, mom=None, adx=None, rsi=None)

In [None]:
"""
Many of the dataframes have missing data on certain dates, below is a list of
dates that some of the base helper stocks have/not-have that the predicting
stocks have/dont-have. These dates are dates where one or more dataframes has
missing data for that day. To adjust for these missing dates, you can feed your
highlowclose dictionary to the below two functions and it will return a new 
dataframe where the dates that have mismatching data will be removed from them.

This is an alternative to simply using the ffill and bfill method when 
concatinating dataframes. Personally, I chose to keep the dates and just use
these fill methods instead. The issue with removing the mismatching dates is
you're taking away usable data even if others aren't correct. 

All in all, it's not a great choice either way because you'll have days where 
the data is missing, or the data is approximated and technically incorrect. 
This is something that happens when you get free data and considering it only
amounts to about 1% of the data, it's not unmanagable. If you can find more
reliable free intraday data, let me know.

These are the dates that have missing data on most of dataframes but I reccomend
calling the two functions below to get a dictionary with the dates that missing
on one dataframe or another between the 6 base dataframes and the 1 chosen 
dataframe as they will be more accurate for any missing data since run as well.

dates = ['2013-08-28', '2013-10-28', '2014-02-12', '2014-02-18', '2014-02-25',
         '2014-10-02', '2014-10-06', '2014-10-08', '2014-10-09', '2014-10-13', 
         '2014-10-14', '2014-10-15', '2014-10-20', '2015-01-14', '2015-03-30', 
         '2015-04-21', '2015-05-05', '2015-05-18', '2015-06-08', '2015-07-08', 
         '2015-08-20', '2015-08-31', '2015-09-08', '2016-02-08', '2016-03-15', 
         '2016-03-21', '2016-03-22', '2016-04-13', '2016-06-15']
"""
#base_index, base_nms, new_dfs, hlc = get_base_index(hlc)
#date_dict, new_hlc = get_bad_comp_dates(hlc, base_nms, base_index, new_dfs)