In [2]:
from RealTimeNN import *
from calc_ind import *
from googlefinance import getQuotes, getNews
from dateutil import rrule  
from sys import stdout
import cPickle as pickle
import pandas as pd
import numpy as np
import datetime
import urllib
import time
import math

print "LOADED"

LOADED


In [2]:
def NYSE_holidays(a = datetime.date.today() - datetime.timedelta(days=390), 
                  b = datetime.date.today() + datetime.timedelta(days=365)): 
    
    # Generate ruleset for holiday observances on the NYSE 
    rs = rrule.rruleset()
    
    # Include all potential holiday observances 
    ###############################################
    
    # New Years Day
    rs.rrule(rrule.rrule(rrule.YEARLY,dtstart=a,until=b,bymonth=12,bymonthday=31, 
                         byweekday=rrule.FR))               
    # New Years Day 
    rs.rrule(rrule.rrule(rrule.YEARLY,dtstart=a,until=b,bymonth=1,bymonthday=1))
    # New Years Day
    rs.rrule(rrule.rrule(rrule.YEARLY,dtstart=a,until=b,bymonth=1,bymonthday=2, 
                         byweekday=rrule.MO))                   
    # MLK Day 
    rs.rrule(rrule.rrule(rrule.YEARLY,dtstart=a,until=b,bymonth=1,
                         byweekday=rrule.MO(3)))                            
    # Washington's Bday
    rs.rrule(rrule.rrule(rrule.YEARLY,dtstart=a,until=b,bymonth=2,
                         byweekday=rrule.MO(3)))                          
    # Good Friday 
    rs.rrule(rrule.rrule(rrule.YEARLY,dtstart=a,until=b,byeaster=-2)) 
    # Memorial Day 
    rs.rrule(rrule.rrule(rrule.YEARLY,dtstart=a,until=b,bymonth=5, 
                         byweekday=rrule.MO(-1)))                        
    # Independence Day 
    rs.rrule(rrule.rrule(rrule.YEARLY,dtstart=a,until=b,bymonth=7,bymonthday=3, 
                         byweekday=rrule.FR))              
    # Independence Day
    rs.rrule(rrule.rrule(rrule.YEARLY,dtstart=a,until=b,bymonth=7,bymonthday=4))
    # Independence Day
    rs.rrule(rrule.rrule(rrule.YEARLY,dtstart=a,until=b,bymonth=7,bymonthday=5, 
                         byweekday=rrule.MO))               
    # Labor Day
    rs.rrule(rrule.rrule(rrule.YEARLY,dtstart=a,until=b,bymonth=9, 
                         byweekday=rrule.MO(1)))                          
    # Thanksgiving Day
    rs.rrule(rrule.rrule(rrule.YEARLY,dtstart=a,until=b,bymonth=11, 
                         byweekday=rrule.TH(4)))                          
    # Christmas 
    rs.rrule(rrule.rrule(rrule.YEARLY,dtstart=a,until=b,bymonth=12,bymonthday=24, 
                         byweekday=rrule.FR))                
    # Christmas
    rs.rrule(rrule.rrule(rrule.YEARLY,dtstart=a,until=b,bymonth=12,bymonthday=25))
    # Christmas
    rs.rrule(rrule.rrule(rrule.YEARLY,dtstart=a,until=b,bymonth=12,bymonthday=26, 
                         byweekday=rrule.MO))                
    ######################################################

    # Exclude potential holidays that fall on weekends 
    rs.exrule(rrule.rrule(rrule.WEEKLY,dtstart=a,until=b,
                          byweekday=(rrule.SA,rrule.SU))) 
    return rs 

def NYSE_tradingdays(a = datetime.date.today() - datetime.timedelta(days=390), 
                     b = datetime.date.today() + datetime.timedelta(days=365)): 
    # Generate ruleset for NYSE trading days 
    rs = rrule.rruleset() 
    rs.rrule(rrule.rrule(rrule.DAILY,dtstart=a,until=b)) 
    
    # Exclude weekends and holidays 
    rs.exrule(rrule.rrule(rrule.WEEKLY,dtstart=a,byweekday=(rrule.SA,rrule.SU)))
    rs.exrule(NYSE_holidays(a, b)) 
    
    return rs 

print "LOADED"

LOADED


In [3]:
def create_prd_lst2(highlowclose, prd_lst, prd_lst_nums, 
                    prd_lst_nums2, prd_lst_nums3, prd_lst_nums4):
    """
    Each company has it's own period list which is how many minutes encompass each
    different period length. There are differences between companies because of about
    20 missing days worth of data in the past 3 years that several companies have and 
    this is meant to adjust for those differences by getting the length of the period
    between each date range that have been already adjusted for the missing days for that 
    company. 
    
    We then are left with the length of, for example, a 30 day period, which adjusted
    for missing days may mean we take the length between 31 days worth of period, that
    because of the missing data is then the correct 30 day period length. We do this for
    each length and then combine these lengths onto the three day length lists which are
    already set up. 
    
    There are four different lists for each company because the different indicators have
    different starting values and more periods. For example, the first lst is the base and 
    is used for almost all the indicators, but the third one is used for MAC indicators. For 
    those, there are 3 extra, 2 at the beginning, 6 and 8, and one at the end, 350. The last
    lst is used for return values.
    """
    prd_dict  = {}
    
    for name in highlowclose.keys():
        close = highlowclose[name]['Closes']
        
        len_p  = []
        all_l  = []
        for each in prd_lst[name]:
            len_p.append(len(close[each:]))

        lst1 = prd_lst_nums  + len_p
        lst2 = prd_lst_nums2 + len_p
        lst3 = prd_lst_nums3 + len_p
        lst4 = prd_lst_nums4 + len_p
        
        all_l.append(lst1)
        all_l.append(lst2)
        all_l.append(lst3)
        all_l.append(lst4)
        
        prd_dict[name] = all_l
    return prd_dict

print "LOADED"

LOADED


In [4]:
def company_news(newstickers):
    """
    Use Google's server and use https://github.com/hongtaocai/googlefinance module to retrieve
    the real-time stock news data for each company stock symbol fed to it.
    """
    tickernewslist = {}
    
    for each in newstickers:
        tickernewslist[each] = getNews(each)  
    return tickernewslist

def new_trading_news(news_dict, tickers2):
    """
    Find if we have any new news for each company. We'll look at the dates
    for each news story and if it's newer than our newest story we already
    have then we add it to our dictionary. Eventually we will use these stories
    to help in the prediction process.
    """
    todays_news = company_news(tickers2)
    
    hist_news   = news_dict[0]
    last_date   = news_dict[1]

    for tick in tickers2:
        count     = 0
        comp_dict = {}
        lastest   = last_date[tick]
        old_news  = hist_news[tick]

        for value in todays_news[tick]:
            d     = value['d']
            title = value['t']
            url   = value['u']
                
            try:
                date  = datetime.datetime.strptime(d, '%b %d, %Y').date()
            except:
                date  = datetime.datetime.strptime('Sep 12, 2016', '%b %d, %Y').date()
                pass
                
            if date > latest:
                length           = len(old_news)
                old_news[length] = {'Date':date, 'Title':title, 'URL':url}
                latest           = date
            
        last_date[tick] = latest
        hist_news[tick] = old_news
        
    new_news_dict = [hist_news, last_date]
    tn = open('Pickles/newsdict.pickle', 'wb')
    pickle.dump(new_news_dict, tn)
    tn.close()
    
    return

def splits(stock_split_dates):
    """
    Read in the stock split site data to discover if any of our watched companies have 
    stock splits in the near future so the data can be adjusted for it. Return which 
    companies if any are discovered on it.
    """
    splitup        = urllib.urlopen(stock_split_dates).read()
    companies      = splitup.find('Announced')
    stop           = splitup.find('th_No_BG', companies)
    company_splits = []
    
    # Find which companies are listed along with the stock split ratio,
    #  the ex-date, and the payment date.
    while True:
        done      = splitup.find(')</a>',stop)+1
        next_stop = splitup.rfind('(', stop, done)
        ratio_s   = splitup.find('<td>',done)+4
        ratio_e   = splitup.find('<',ratio_s)
        payment_e = splitup.find('2016',done)+4
        payment_s = splitup.rfind('<td>',stop,payment_e)+4
        exdate_s  = splitup.find('<td>',payment_e)+4
        exdate_e  = splitup.find('</td>',exdate_s)
        
        company_splits.append([splitup[next_stop+1:done-1],
                               splitup[exdate_s:exdate_e],
                               splitup[ratio_s:ratio_e],
                               splitup[payment_s:payment_e]])
        
        tbody = splitup.find('</table>',done)
        test  = splitup.find('(',done)
        
        if tbody < test:
            break
        stop = done
    return company_splits

def dvds(dividend_dates): 
    """
    Like the stock_splits() function, searches a site to find if any of our companies have
    an upcoming dividend so as to adjust for it. Return a list of companies, if any.
    """
    div_dic           = {}
    for x in xrange(0,1):
        company_dividends = []
        if x != 0:
            dividends      = urllib.urlopen(dividend_dates).read()  
            end            = dividends.find('Next')-2
            end2           = dividends.rfind('href=',0,end)
            end3           = dividends.rfind('href=',0,end2)
            end4           = dividends.rfind('href=',0,end3)
            end            = dividends.rfind('href=',0,end4)
            start          = dividends.rfind('href=',0,end5)
            end            = dividends.find('>', start)
            next_day       = dividends[start+6:end-1]
            dividend_dates = 'http://www.nasdaq.com/dividend-stocks/'+next_day
            
        dividends = urllib.urlopen(dividend_dates).read()  
        start     = dividends.find('Payment Date')
        
        while True:
            start      = dividends.find('&#40;',start)+5
            end        = dividends.find('&#41;',start)
            ex_date_e  = dividends.find('2016',end)+4
            ex_date_s  = dividends.rfind('>',end,ex_date_e)+1
            div_e      = dividends.find('</',ex_date_e+1)
            div_s      = dividends.rfind('>',ex_date_e,div_e)+1
            rec_date_e = dividends.find('2016',div_e)+4
            rec_date_s = dividends.rfind('>',div_e,rec_date_e)+1
            proceed    = dividends.find('2016',rec_date_e)+4
            pay_date_e = dividends.find('2016',proceed)+4
            pay_date_s = dividends.rfind('>',proceed,pay_date_e)+1
            
            company_dividends.append([dividends[start:end],
                                    dividends[ex_date_s:ex_date_e],
                                    dividends[div_s:div_e],
                                    dividends[rec_date_s:rec_date_e],
                                    dividends[pay_date_s:pay_date_e]])
            
            test = dividends.find('&#40;',start)
            if test == -1:
                break
            start = end 
        div_dic[x] = company_dividends
    return div_dic

def yahoo_backup(ticks, day_hl, temp, time_value):
    """
    Due to Google occasionally blocking our google server api, a yahoo backup is 
    set up to retrieve stock info from google finance's page. It is far slower so
    it's to be avoided if at all possible. Increasing latency between each call can
    help Google from your blocking but if blocked, you usually can get unblocked by
    calling this function once.
    """
    prices_array = []
    yahoo      = 'http://finance.yahoo.com/q?s='
    ticks2 = {'NYSE:LMT':'LMT', 'NYSEARCA:USO':'USO', 'NYSEARCA:GLD':'GLD',
              'NYSEARCA:SPY':'SPY', 'INDEXDJX:.DJI':'%5EDJI',
              'INDEXSP:.INX':'^GSPC', 'INDEXNASDAQ:.IXIC':'%5EIXIC'}
    
    # Retrieve the stock price for each company
    for name in ticks:
        if name not in ticks2.keys():
            name2 = name
        else:
            name2 = ticks2[name]
            
        stock       = yahoo + name2
        page        = urllib.urlopen(stock).read()
        price       = page.find('$main-0-Quote.0.1.0.$price.0')
        price_stop  = page.find('</span>',price)
        price_start = page.rfind('>',price_stop-10,price_stop)+1
        
        try:
            close_value     = float(page[price_start:price_stop].replace(',',''))
            day_hl[name][0] = min(close_value, day_hl[name][0])
            day_hl[name][1] = max(close_value, day_hl[name][1])
            typ_price       = (day_hl[name][1] + day_hl[name][0] + close_value)/3.
            temp[name].ix[time_value] = [day_hl[name][1], day_hl[name][0], 
                                         close_value, typ_price]
        except:
            pass
    
    return temp

print "LOADED"

LOADED


In [9]:
def real_time_quotes(ticks, attr, prds, indics, day_hl, temp):
    """
    As the function name implies, it retrieves stock data in real time. It will be started at
    the beginning of the trading day(9:30AM Eastern) and ends at the end of the trading day
    (4:00PM Eastern). It retrieves the data from Google's server by calling getQuotes which uses
    the same program used to retrieve stock news. Credit goes to 
    https://github.com/hongtaocai/googlefinance for creating the program to retrieve the data.
    
    Once the data is retrieved, it's returned in a list of dictionaries, one for each company. 
    We'll take this info, convert the price to a float from a string. Then that data is taken 
    and used to create our indicators in real time. The function calculate indicators is what
    calculates all the indicators at each interval.
    
    If the Google server blocks our retrieval process, we call our yahoo_backup() function 
    which retrieves the data straight from the yahoo finance's website. This is considerably 
    slower so it's always better to increase the sleep time between each retrieval to slow down 
    how often we're retrieving data as this has proved to be effective in combatting being 
    blocked in the first place. However, once blocked, typically you're unblocked after your 
    call to yahoo_backup(), unsure why this is but typically the next call to googles servers 
    will be successful.
    
    Returns 3 dictionaries:          
        -combined   = Dictionary filled with each companies indicators calculated for that day. 
                      There are 13 indicators ranging from bollinger bands, to PE/Ratios, to 
                      Volatilities, etc.. as well as daily returns from the 6 major 
                      indexs/funds/etfs. Each indicator has 32 different values for each 
                      interval due to calculating for 32 different period lengths at a time.
                      
        -temp       = Dictionary containing the highs, lows, closing, and typical prices for 
                      each company at each interval that trading day to be appended to our old 
                      data after trading day is done.
    """    
    last_close   = {} 
    ytestlst     = []

    count, fail_count = 0, 0
    dt_num       = 60. * 1000000000.
    rng_tic_lst  = range(len(ticks))
    
    while True:
        now     = datetime.datetime.now().time()
        if now >= datetime.time(9,30,1):
            break
            
    # Use python time module to retrieve stock data for the 6.5 hour trading day
    seconds, minutes, hours = 60, 60, 6.5
    t_end = time.time() + (seconds * minutes * hours) + 60
        
    while time.time() < t_end:
        start = time.time()
        try:
            # Try to retrieve the quotes from the Google server
            next_quotes = getQuotes(ticks) 
            
            for x in rng_tic_lst:
                # Set close value with time
                name             = ticks[x]
                close_value      = float(next_quotes[x]['LastTradePrice'].replace(',',''))
                time_value       = pd.to_datetime(time.strftime("%Y-%m-%d %H:%M:%S"))
                # Round time to the minute value
                time_value       = pd.Timestamp(math.ceil(time_value.value/dt_num)*dt_num)
                last_close[name] = [close_value, time_value]
                    
                # If the latest stock price is a day low or day high, set new high/low 
                day_hl[name][0]  = min(close_value, day_hl[name][0])
                day_hl[name][1]  = max(close_value, day_hl[name][1])
                typ_price        = (day_hl[name][1] + day_hl[name][0] + close_value) / 3.
                
                # Our day's high, low, close and typical prices
                temp[name].ix[time_value] = [day_hl[name][1], day_hl[name][0], 
                                             close_value, typ_price]
            
            # Call the indicator function which calculates our list of indicators in 
            #  real time which will be used to feed to the real-time recurrent neural netwk
            indics, attr = calculate_indicators(ticks, attr, prds, count, 
                                                last_close, day_hl, indics)
            if len(attr) > 3:
                attr, ytestlst  = calculate_neural_output(indics, attr, ytestlst)
            
            stdout.write("\r%d" % count)
            stdout.flush()
            count += 1
        except:
            # If Google's server denies our retrieval, try calling yahoo_backup()
            try:
                time_value  = pd.to_datetime(time.strftime("%Y-%m-%d %H:%M:%S"))
                time_value  = pd.Timestamp(math.ceil(time_value.value/dt_num)*dt_num)
                temp        = yahoo_backup(ticks, day_hl, temp, time_value)
                
                print "PRIMARY FAIL", count
                count += 1
                fail_count += 1
            except:
                count += 1
                fail_count += 1
                
                print "SECONDARY FAIL"
                pass
            pass
        
        # Stop the timing, and subtract this from 60 seconds,ie. every loop = 60secs
        cycle_time = time.time() - start
        if cycle_time < 58:
            time.sleep(60 - cycle_time)
    
    if len(attr) > 3:
        temp = [temp, ytestlst]

    return indics, temp, attr

print "LOADED"

LOADED


In [10]:
def calculate_indicators(ticks, attr, prd_dict, count, last_close, day_hl, indicators):
    """
    Calculate indicators in real-time using a combination of data sources, firstly some 
    pre-calculated dictionaries that are updated after each time this function is called.
    These indicators are also calculated using the high/low/close/typical values that are
    retrieved and updated every minute from the Google servers. 
    
    We first update our highlowclose dictionary. For each company(tick stands for ticker, 
    i.e. company/stock) we calculate 13 different indicators, and for each indicator, we 
    calculate using 32 different period value lengths, essentially calculating 416 indicator
    values for each company we're targeting. After each period length calc, we append to a 
    list and after all 32 period lengths are calculated, we append to our indicator dictionary
    for the day.
    """
    hlc_dict  = attr[0]
    trpm_dict = attr[1]
    dx_dict   = attr[2]
    
    for name in ticks:
        indicator_list, dx_lst = [], []
        comp_dxs  = dx_dict[name]

        # Todays current low, high, close, time, typical value
        day_lo    = day_hl[name][0]
        day_hi    = day_hl[name][1]
        day_cls   = last_close[name][0]
        day_time  = last_close[name][1]
        day_typ   = (day_lo + day_hi + day_cls) / 3.
        
        # Add new values to our dictionary
        hlc_dict[name].loc[day_time] = [day_hi, day_lo, day_cls, day_typ]
        
        # Comps days highs/lows/typicals at each time interval since 2012 and prev years closes
        highs       = hlc_dict[name]['Highs']
        lows        = hlc_dict[name]['Lows']
        typs        = hlc_dict[name]['Typical']
        clss        = hlc_dict[name]['Closes']
        
        # len_prds are pre-calcd prd lengths, length_clss used to create closing prd dfs
        len_prds    = prd_dict[name]
        length_clss = len(clss)
        
        # Each company has 32 different period length values to find the most useful period values
        for x in range(32):
            len_prd    = len_prds[0][x]
            len_prd1x0 = len_prds[1][x]
            len_prd1x1 = len_prds[1][x+1]
            len_prd2x0 = len_prds[2][x]
            len_prd2x1 = len_prds[2][x+1]
            len_prd2x3 = len_prds[2][x+3]
            len_prd3x0 = len_prds[3][x]
            
            # prd_start is starting value for period(ex. 5days ago)
            prd_start       = length_clss - len_prd
            prd_start2x3    = length_clss - len_prd2x3
            prd_start1x0    = length_clss - len_prd1x0
            
            # New closing period dataframes from prd start to present
            #  Some have larger period lengths due to rolling calculations
            #  that need the extra length to calculate properly
            clss_prd        = clss.iloc[prd_start:]
            clss_prd2       = clss.iloc[prd_start1x0  - len_prd1x1:]
            clss_prd3       = clss.iloc[prd_start2x3  - len_prd2x0:]
            clss_prd4       = clss.iloc[prd_start2x3  - len_prd2x3:]
            typ_prd         = typs.iloc[prd_start:]
            lo_prd          = lows.iloc[prd_start1x0  - len_prd1x1:]
            hi_prd          = highs.iloc[prd_start1x0 - len_prd1x1:]
            
            # Price at the beginning of that period (Ex. the price 5 days ago..)
            clss_prd_0price = clss_prd.iloc[0]
            prev_hi         = highs.iloc[prd_start]
            prev_lo         = lows.iloc[prd_start]
            
            # Retrieve historical tr/posdm/mindm and dx vals
            trpm_name       = name+str(x)
            tr_df           = trpm_dict[trpm_name]
            dx_df           = comp_dxs[x]
            

            #____________________INDICATOR CALCULATIONS:_________________________
            #
            # Average Directional Index:
            new_tr     = max((day_hi-day_lo),(day_hi-clss_prd_0price),(day_lo-clss_prd_0price))
            new_highdm = day_hi  - prev_hi
            new_lowdm  = prev_lo - day_lo
            if new_highdm >= new_lowdm:
                new_plusdm  = max(new_highdm, 0.)
                new_minusdm = 0.
            else:
                new_plusdm  = 0.
                new_minusdm = max(new_lowdm, 0.)
            tr_df.loc[day_time] = [new_tr, new_plusdm, new_minusdm]
            atr_pn    = (tr_df.ewm(ignore_na=False,span=len_prd,min_periods=0,
                                   adjust=True).mean()).iloc[-1]
            atr, pos_dm, neg_dm = atr_pn[0], atr_pn[1], atr_pn[2]
            pos_di    = (pos_dm / atr) * 100.
            neg_di    = (neg_dm / atr) * 100.
            if (pos_di + neg_di) != 0:
                dx    = abs(pos_di - neg_di) / (pos_di + neg_di)
            else:
                dx    = 0.
            dx_df.loc[day_time] = dx
            dx_lst.append(dx)
            adx       = (dx_df.ewm(ignore_na=False,span=len_prd,min_periods=0,
                                   adjust=True).mean()).iloc[-1] * 100.
            # Moving Average Convergence Divergence:
            small     = clss_prd4.ewm(ignore_na=False,span=len_prd2x1,min_periods=0,
                                      adjust=True).mean()
            large     = clss_prd4.ewm(ignore_na=False,span=len_prd2x3,min_periods=0,
                                      adjust=True).mean()
            macd_df   = large[len_prd2x3:] - small[len_prd2x3:]
            signal    = macd_df.ewm(ignore_na=False,span=len_prd2x0,min_periods=0,
                                    adjust=True).mean()
            macd      = macd_df.iloc[-1]
            macd2     = (macd / signal.iloc[-1]) - 1.
            # Relative Strength Index:
            deltas    = clss_prd3 - clss_prd3.shift(len_prd2x0)
            up, down  = deltas.copy(), deltas.copy()
            up[up < 0], down[down > 0] = 0., 0.
            avg_g     = up[len_prd2x0:].mean()
            avg_l     = down[len_prd2x0:].mean()
            if avg_l != 0:
                if (avg_g / avg_l) != -1:
                    rsi = 100. - (100. / (1. + (avg_g / avg_l)))
                else:
                    rsi = 100.
            else:
                rsi   = 0.
            # Aroon index:
            lenfl     = float(len_prd)
            aro_pos   = ((len_prd - clss_prd.index.get_loc(clss_prd.idxmax())) 
                                                             / lenfl) * 100.
            aro_neg   = ((len_prd - clss_prd.index.get_loc(clss_prd.idxmin()))
                                                             / lenfl) * 100.
            aro       = aro_pos - aro_neg
            # Stochastic Oscillators:
            hi_max    = hi_prd.rolling(window=len_prd1x1, center=False).max()
            lo_min    = lo_prd.rolling(window=len_prd1x1, center=False).min()
            kdo_k     = ((clss_prd2 - lo_min) / 
                         (hi_max - lo_min).replace(0, np.NaN) * 100.).fillna(0.)
            d         = kdo_k[len_prd1x1:].mean()
            # Bollinger Bands:
            bol_mean  = clss_prd.mean()
            bol_std   = clss_prd.std()
            bol_uppr  = bol_mean + bol_std * 2.
            bol_upmn  = bol_uppr - bol_mean
            if bol_upmn != 0:
                bol   = (day_cls - bol_mean) / (bol_uppr - bol_mean)
            else:
                bol   = 0.
            # Commodity Channel Index:
            typ_mean  = typ_prd.mean()
            typ_std   = typ_prd.std()
            if typ_std != 0:
                cci   = (day_typ - typ_mean) / (0.015 * typ_std)
            else:
                cci   = 0.
            #Simple Moving Average:
            sma       = clss_prd.mean()
            # Price/Earnings Ratio:
            if (day_cls - clss_prd_0price) != 0:
                pe    = day_cls / (day_cls - clss_prd_0price)
            else:
                pe    = 0.
            # Momentum:
            mom       = ((day_cls - clss_prd_0price) / clss_prd_0price) * 100.
            # Volatility:
            vol_ret   = (clss_prd3 / clss_prd3.shift(len_prd2x0) - 1.).fillna(0.)
            vol       = vol_ret[len_prd2x0:].std() * np.sqrt(float(len_prd2x3))
            # Returns:
            rets      = ((clss_prd / clss_prd.shift(len_prd3x0)) - 1.).iloc[-1]
             
            ######################
            # Combine indicators #
            ######################
            indicator_list.append([rsi,vol,sma,cci,pe,mom,bol,aro,macd,macd2,adx,d,rets])
            #______________________________________________
        
        comp_dxs.loc[day_time] = dx_lst
        # indicator list is added to prev indicator values for day
        df_attrs = []
        for y in range(13):
            indicator_vals = []
            for z in range(32):
                indicator_vals.append(indicator_list[z][y])
            df_attrs = df_attrs + indicator_vals
        indicators[name].loc[day_time] = df_attrs
    attr = [hlc_dict, trpm_dict, dx_dict]
    return indicators, attr

print "LOADED"

LOADED


In [11]:
def calculate_neural_output(indicators, nn_attr, ytestlst):
    """
    Predict the output for the company we chose at the beginning and run our neural network
    test using our precalculated neural network that is re-updated after each trading day. 
    The col_lst list is a list with our chosen inputs we will use to make our predictions. The
    previous input and output are used to help the prediction. 
    
    We combine the indicators from our base stocks, as well as our chosen company, then we 
    convert to a numpy array that's needed for efficient calculation. The previous inputs 
    and outputs only use the previous 5 datapoints so we update this after each round. 
    
    We then append our prediction to our prediction list that will be used to compare the test 
    outputs to real outputs. We'll use these predictions to decide whether or not to 
    buy/sell/short/stay.
    
    Return our prediction, the attributes used to predict them, as well as the prediction list
    for comparison purposes EOD.
    """
    combined   = pd.DataFrame()
    net        = nn_attr[3]
    col_lst    = nn_attr[4]
    prev_in    = nn_attr[5]
    prev_out   = nn_attr[6]
    company    = nn_attr[7]
    x          = nn_attr[8]
    
    if ytestlist != []:
        predicts     = ytestlst[0]
        pred_choices = ytestlst[1]
    else:
        predicts     = []
        pred_choices = []

    comps = ['INDEXNASDAQ:.IXIC', 'INDEXSP:.INX', 'INDEXDJX:.DJI',
             'NYSEARCA:USO', 'NYSEARCA:GLD', 'NYSEARCA:SPY', company]
    
    for key in comps:
        combined = pd.concat([combined, indicators[key][col_lst[key]]], axis=1)
    combined = combined.iloc[-1]
        
    output_name = company+'_rets'+str(x)
    test_inputs = combined.values.T
    
    prev_out = prev_out[1:].append([indicators[company][output_name].iloc[-1]])
    ytest    = NNOut(test_inputs, net, P0=prev_in, Y0=prev_out)
    prev_in  = prev_in[1:].append(test_inputs)
    
    predicts = predicts.append(ytest)
    
    if ytest > 0:
        pred_choices = pred_choices.append('Buy')
    elif ytest < 0:
        pred_choices = pred_choices.append('Sell')
    else:
        pred_choices = pred_choices.append('Stay')
        
    ytestlst   = [predicts, pred_choices]
    nn_attr    = nn_attr[:3] + [net, col_lst, prev_in, prev_out, company, x]
    
    return nn_attr, ytestlst

print "LOADED"

LOADED


In [12]:
def get_data_ready(choices=[]):
    """
    First function called at the beginning of the trading day.
    
    Setup the stock ticker symbols for both the Google server and data dictionary. Then get
    the pickled files for the historical intraday high/low/close/typical price dataframes 
    for each company thats in a dictionary. Also retrieve the news dictionary file for testing
    whether a news story is new or already processed.
    
    Then find the companies with upcoming stock splits and dividends and compare them to 
    our list to see if we have any companies with upcoming dates so we can adjust for them. 
    As of right now (2016-09-03), the function to adjust has not been created but will be soon.
    
    Finally call the company_news() function to retrieve info for each company and 
    index/fund/etf for new articles on them. As of right now the function to automatically 
    read the data, and convert the data to extra information to help in prediction, has not 
    been created but will be created soon. It will utilize a neural network to comprehend 
    the data, possible using Google's new Parsy McParseFace module just released that is 
    built for NLP(Natural Language Processing).
    """
    # Dictionaries that will be used throughout program
    indicatorlist  = {}
    day_hl         = {}
    temp           = {}
    dates          = {}
    
    # Ticker symbols for google servers
    tickers        = {'AAPL':'AAPL','NYSEARCA:USO':'USO','NYSEARCA:GLD':'GLD',
                      'NYSEARCA:SPY':'SPY','INDEXDJX:.DJI':'^DJI','INDEXSP:.INX':'^GSPC',
                      'INDEXNASDAQ:.IXIC':'^IXIC', 'NYSE:LMT':'LMT'}
    tickers2       = ['BPOP','FITB','HBAN','CMCSA','EBAY','AAPL','AMAT','BRCD','CSCO',
                      'GOOG','INTC','LVLT','MSFT','MU','NVDA','ORCL','QCOM','SIRI','WIN',
                      'YHOO','BHP','BP','RIO','XOM','GE','F','MO','XRX','GS','JPM','LYG',
                      'MS','RF','USB','WFC','MRK','PFE','NYSE:LMT','MGM','AMD','GLW',
                      'HPQ','S','T','NYSEARCA:USO','NYSEARCA:GLD','NYSEARCA:SPY',
                      'INDEXDJX:.DJI','INDEXSP:.INX','INDEXNASDAQ:.IXIC']
    hlc_cols       = ['Highs','Lows','Closes','Typical']
    
    plnums   = [10, 14, 16, 18, 20, 25, 30, 40, 50, 75, 100, 125, 150, 200, 250, 300]
    plnums2  = [8, 10, 14, 16, 18, 20, 25, 30, 40, 50, 75,  100, 125, 150, 200, 250, 300, 350]
    plnums3  = [6, 8, 10, 14, 16, 18, 20, 25, 30, 40, 50,  75,  100, 125, 150, 200, 250, 300, 350]
    plnums4  = [1,2,3,4,5, 10, 14, 16, 18, 20, 25, 30, 40, 50, 75, 100, 125, 150, 200, 250, 300]
    rs       = NYSE_tradingdays()
    
    #Historical intraday trading data for every company/index/sector at 1 min intervals
    opp         = open('Pickles/shortpickleintra.pickle', 'rb')
    opp2        = open('Pickles/newsdict.pickle', 'rb')
    opp3        = open('NewBase/ADXD/adx_d3.pickle', 'rb')
    opp4        = open('NewBase/ADXD/adx_d2.pickle', 'rb')
    opp5        = open('Pickles/columnnames.pickle', 'rb')
    hlc_short   = pickle.load(opp)
    news_dict   = pickle.load(opp2)
    trpm_dict   = pickle.load(opp3)
    dx_dict     = pickle.load(opp4)
    nm_dict     = pickle.load(opp5)
    opp.close()
    opp2.close()
    opp3.close()
    opp4.close()
    opp5.close()
    
    if choices != []:
        comb_df, output_df, cols, net = get_neural_inout(choices)
    
    ticks = []
    for key in tickers2:
        if key in hlc_short.keys():
            ticks.append(key)
    
    # Get initial quotes for beginning highs/lows
    initial_quotes = getQuotes(ticks) 
    for key, y in zip(ticks, range(len(ticks))):
        # temp used to store our days highs, lows, closes, and typical prices
        # indicatorlist will contain our indicator values calculated for the day
        indicatorlist[key] = pd.DataFrame(columns=nm_dict[key]) 
        temp[key]          = pd.DataFrame(columns=[hlc_cols])
        
        # day_hl is day highs/lows used to keep track throughout day, start at initial day quote
        day_hl[key] = [float(initial_quotes[y]['LastTradePrice'].replace(',','')), 
                       float(initial_quotes[y]['LastTradePrice'].replace(',',''))]
        
        # Create period dictionaries which gives the period lengths for each companies indicators
        tl = []
        for x in xrange(268, 135, -1):
            try:
                dt   = str(rs[x])[:10]
                test = hlc_short[key][dt]
                tl.append(dt)
                if len(tl) > 125:
                    break
            except:
                pass
        prd_lst    = [tl[1], tl[2], tl[3], tl[5], tl[8], tl[10], tl[12], tl[14], tl[16], 
                      tl[20], tl[25], tl[30], tl[40], tl[50], tl[80], tl[125]]
        dates[key] = prd_lst 
    prd_dict = create_prd_lst2(hlc_short, dates, plnums, plnums2, plnums3, plnums4)
        
    try:
        # Find companies with upcoming dividends and stock splits
        div_dates = 'http://www.nasdaq.com/dividend-stocks/dividend-calendar.aspx'
        spl_dates = 'http://www.nasdaq.com/markets/upcoming-splits.aspx'
        dividend  = pd.DataFrame(dvds(div_dates)[0],columns=['Sym','ExDiv','Div','RecDt','PayDt'])
        split     = pd.DataFrame(splits(spl_dates),columns=['Sym','ExDt','Ratio','Payab'])
        divs      = dividend['Sym'].values
        spls      = split['Sym'].values
        divs_dts  = dividend['ExDiv'].values
        spls_dts  = split['ExDt'].values

        # Print out which companies have stock splits and dividends upcoming, if any
        divspl_lst = [divs, spls]
        divspl_dts = [divs_dts, spls_dts]
        for i in  range(2):
            lst = divspl_lst[i]
            dt  = divspl_dts[i]
            for ii in range(len(lst)):
                comp = lst[ii]
                date = dt[ii]
                if comp in ticks:
                    if i == 0:
                        print comp, "DIV", date
                    else:
                        print comp, "SPLITS", date
                if comp in tickers.keys():
                    if i == 0:
                        print tickers[comp], "DIV", date
                    else:
                        print tickers[comp], "SPLITS", date
        
        # Call our trading news function to get any current news stories for each comp
        #new_trading_news(news_dict, tickers2)
    except:
        print "DIVSPLNEWS FAIL"
        pass
    
    if choices != []:
        attr = [hlc_short, trpm_dict, dx_dict, net, cols, 
                comb_df, output_df, company_choice, output_choice]
    else:
        attr = [hlc_short, trpm_dict, dx_dict]
    
    return tickers2, attr, prd_dict, indicatorlist, day_hl, temp, nm_dict, tickers

print "LOADED"

LOADED


In [None]:
def get_neural_inout(choices, final):
    company_choice = choices[0]
    output_choice  = choices[1]
    
    fl_name  = 'Pickles/net222'+company_choice+'.pickle'
    fl_name2 = 'Pickles/final_lst222'+company_choice+'.pickle'
    opp      = open(fl_name,'rb')
    opp2     = open(fl_name2, 'rb')
    net      = pickle.load(opp)
    final    = pickle.load(opp2)
    opp.close()
    opp2.close()
    
    comb_df, cols = pd.DataFrame(), {}
    lst  = ['^GSPC', '^IXIC', '^DJI', 'GLD', 'USO', 'SPY', company_choice]
    lst2 = {'^IXIC':'INDEXNASDAQ:.IXIC', '^GSPC':'INDEXSP:.INX', 
            '^DJI':'INDEXDJX:.DJI', 'USO':'NYSEARCA:USO', 'GLD':'NYSEARCA:GLD', 
            'SPY':'NYSEARCA:SPY'}
    
    for nm in lst:
        if nm in lst2.keys():
            nm = lst2[nm]
        cols[nm] = []

    for name in final:
        key = name[:name.find('_')]
        key_base = name[name.find('_'):]

        ind = name[name.find('_')+1:name.find('_')+5]
        ind_base = name[:name.find('_')+5]

        if ind != 'rets':
            if key not in cols.keys():
                key  = company_choice
                name = key+key_base
            else:
                name = name
        else:
            name = ind_base+str(output_choice)

        if key == '^GSPC':
            key = lst2[key]
            cols[key] = cols[key] + [name]
        elif key == '^IXIC':
            key = lst2[key]
            cols[key] = cols[key] + [name]
        elif key == '^DJI':
            key = lst2[key]
            cols[key] = cols[key] + [name]
        elif key == 'GLD':
            key = lst2[key]
            cols[key] = cols[key] + [name]
        elif key == 'USO':
            key = lst2[key]
            cols[key] = cols[key] + [name]
        elif key == 'SPY':
            key = lst2[key]
            cols[key] = cols[key] + [name]
        else:
            cols[key] = cols[key] + [name]
            
    for nm in lst:
        if nm != company_choice:
            nm2 = lst2[nm]
        else:
            nm2 = nm
        opp = open('NewBase/'+nm+'/'+nm+'_df14.pickle','rb')
        dff = pickle.load(opp)
        opp.close()
        len_dff = len(dff)
        comb_df = pd.concat([comb_df, dff[cols[nm2]].iloc[len_dff-5:len_dff]], 
                            axis=1).fillna(method='ffill').fillna(method='bfill')
        if nm == company_choice:
            output_df = dff[company_choice+'_rets'+str(output_choice)].iloc[len_dff-4:]
            
    return comb_df, output_df, cols, net

print "LOADED"

In [None]:
def update_files_EOD2(inds, hlc, ticks, ticks2, cols, attr, prds):
    """
    Update our indicators and highlowclose dictionary at the end of the trading day, appending
    our day results to our historical values.
    """
    old_short   = attr[0]
    adx_d2      = attr[1]
    adx_d3      = attr[2]
    
    short_lst   = get_short_index(old_short)
    short_d3    = get_short_index(adx_d3)
    opp         = open('Pickles/pickleadjustedintracomplete.pickle','rb')
    old_hlc     = pickle.load(opp)
    opp.close()

    for name in ticks:
        if name in ticks2.keys():
            name2 = ticks2[name]
        else:
            name2 = name  
        file_name  = 'NewBase/'+name2+'/'+name2+'_df14.pickle'
        
        for x in range(32):
            name3  = key+str(x)
            length = len(adx_d2[name3])
            if (prds[key][0][x]+5) > length:
                adx_d2[name3] = adx_d2[name3]
            else:
                adx_d2[name3] = adx_d2[name3].iloc[length-prds[name][0][x]-5:]

        opp  = open(file_name,'rb')
        df14 = pickle.load(opp)[cols[name]]
        opp.close()

        old_short[name] = old_short[name].loc[short_lst[name][0]:]
        adx_d3[name] = adx_d3[name].loc[short_d3[name][0]:]
        old_hlc[name2] = old_hlc[name2].append(hlc[name])
        df14 = df14.append(inds[name][cols[name]])

        opp = open(file_name,'wb')
        pickle.dump(df14, opp)
        opp.close()

    opp  = open('Pickles/pickleadjustedintracomplete2.pickle','wb')
    opp2 = open('Pickles/shortpickleintra2.pickle','wb')
    opp3 = open('NewBase/ADXD/adx_d22.pickle','wb')
    opp4 = open('NewBase/ADXD/adx_d32.pickle','wb')
    pickle.dump(old_hlc, opp)
    pickle.dump(old_short, opp2)
    pickle.dump(adx_d3, opp3)
    pickle.dump(adx_d2, opp4)
    opp.close()
    opp2.close()
    opp3.close()
    opp4.close()
    return

print "LOADED"

In [None]:
def run_program(choices=[]):
    """
    Function will wait until the start of the training day(9:30AM) and start the
    process about 2 minute before start by calling get_data_ready() which takes roughly
    2 minutes to process everything so it starts recording and calculating in real-time
    at about 9:30.
    
    Until the time is between 9:28 and 9:30, it will check if we're in that range, else
    it sleeps for 60 seconds and does this until its called.
    """
    while True:
        time.sleep(30)
        now      = datetime.datetime.now()
        now_time = now.time()
        
        if now_time >= datetime.time(9,24) and now_time <= datetime.time(9,30):
            if choices != []:
                ticks,attr,prds,indiclst,dayhl,temp,nm_dict,ticks2=get_data_ready(choices)
            else:
                ticks,attr,prds,indiclst,dayhl,temp,nm_dict,ticks2=get_data_ready()
                
            inds, temp, attr = real_time_quotes(ticks, attr, prds, indiclst, dayhl, temp)
            break
    
    return inds, temp, attr, ticks, ticks2, nm_dict, prds

In [13]:
#choices = ['AAPL', 0]
#inds, hlc, attr, ticks, ticks2, cols, prds = run_program(choices)
inds, hlc, attr, ticks, ticks2, cols, prds = run_program()

390

In [None]:
#update_files_EOD(inds, hlc, ticks, ticks2, cols, attr, prds)