In [2]:
from math import exp 
import os
import math
import pickle
import random 
import numpy as np
import pandas as pd

def retrieve_data():
    """Retrieve the dictionaries filled with each companies dataframes from
       both the Nasdaq and NYSE. Dictionaries include closings prices, bollinger
       bands, momentum, and p/e ratios. The data is taken from pickled
       data files that were retrieved with an earlier retrieve data function
       that retrieved the data from the Quandl server. If you need to add more
       data, you need to use the previous retrieval method which will be listed
       soon. You can also use Quandl.get("CODE") where CODE is a specific quandl
       code"""
    # Read from file
    closing_prices = open('newclosingprices.pickle', 'rb')
    bands_dict = open('newbandsdictfile.pickle', 'rb')
    momen_dict = open('newmomentsdictfile.pickle', 'rb')
    perat_dict = open('newperatsdictfile.pickle', 'rb')
    testing_ground = open('testingground.pickle', 'rb')
    features = open('newfeaturesfile.pickle', 'rb')
    
    # Store the retrieved dictionaries in variables
    closingPrices = pickle.load(closing_prices)
    bolBandsDict = pickle.load(bands_dict)
    momentumDict = pickle.load(momen_dict)
    peRatioDict = pickle.load(perat_dict)
    testingGround = pickle.load(testing_ground)
    featureDict = pickle.load(features)
    
    # Close the files
    closing_prices.close()
    bands_dict.close()
    momen_dict.close()
    perat_dict.close()
    testing_ground.close()
    features.close()
    
    technical_indicators = [bolBandsDict, momentumDict, peRatioDict]
    other_stuff = [testingGround, featureDict]
    
    return closingPrices, technical_indicators, other_stuff
closingPrices, technical_indicators, other_stuff = retrieve_data()
print "Done"

Done


In [22]:
def retrieve_daily_returns(df):
    dailyReturnsDict = {}
    for each_key in df.keys():
        each_index = df[each_key]
        daily_returns = compute_daily_returns(each_index)
        dailyReturnsDict[each_key] = daily_returns
    return dailyReturnsDict

def compute_daily_returns(df):
    """Compute and return the daily return values."""
    daily_returns = (df / df.shift(1)) - 1
    daily_returns.ix[0,:] = 0 #Pandas leaves the 0th row full of NaNs
    return daily_returns

def sort_by_timestep(technical_indicators,newBollDicts):
    momentumDict = technical_indicators[1]
    peRatioDict = technical_indicators[2]
    each_company_thresholds = {}
    for each_dict in momentumDict.keys():
        for each_company in momentumDict[each_dict].keys():
            np1 = np.array(momentumDict[each_dict][each_company][21:])
            np2 = np.array(peRatioDict[each_dict][each_company][21:])
            np3 = np.array(newBollDicts[each_company][21:])
            
            t = {}
            step_size = np1.size/18
            np1.sort()
            np2.sort()
            np3.sort()
            for i in range(0, 18):
                mom = np1[(i+1)*step_size]
                pe = np2[(i+1)*step_size]
                bol = np3[(i+1)*step_size]
                t[i] = [mom,pe,bol]
            each_company_thresholds[each_company] = t
    return each_company_thresholds

def descretize_indicators(technical_indicators, company_t, newBollDict):
    momentumDict = technical_indicators[1]
    peRatioDict = technical_indicators[2]
    each_company_descretized = {}
    each_group_length = {}
    
    for each_dict in momentumDict.keys():
        for each_company in momentumDict[each_dict].keys():
            momentum = pd.DataFrame(momentumDict[each_dict][each_company][21:])
            peratio = pd.DataFrame(peRatioDict[each_dict][each_company][21:])
            bollinger = pd.DataFrame(newBollDict[each_company][21:])

            peratio_index = peratio.index
            bollinger = bollinger.set_index(peratio_index)
            bollinger = bollinger.rename(columns = {0:'Bol'})
            
            grouped = momentum.join(peratio, how='outer', rsuffix='Pe')
            grouped = grouped.join(bollinger, how='outer', rsuffix='Bol')

            t = company_t[each_company]
            ranges = [((-1000000, t[0][0]),( t[0][0], t[1][0]),( t[1][0], t[2][0]),( t[2][0], t[3][0]),
                        (t[3][0], t[4][0]),( t[4][0], t[5][0]),( t[5][0], t[6][0]),( t[6][0], t[7][0]),
                        (t[7][0], t[8][0]),( t[8][0], t[9][0]),( t[9][0],t[10][0]),(t[10][0],t[11][0]),
                       (t[11][0],t[12][0]),(t[12][0],t[13][0]),(t[13][0],t[14][0]),(t[14][0],t[15][0]),
                       (t[15][0],t[16][0]),(t[16][0],np.inf)),
                      ((-1000000, t[0][1]),( t[0][1], t[1][1]),( t[1][1], t[2][1]),( t[2][1], t[3][1]),
                        (t[3][1], t[4][1]),( t[4][1], t[5][1]),( t[5][1], t[6][1]),( t[6][1], t[7][1]),
                        (t[7][1], t[8][1]),( t[8][1], t[9][1]),( t[9][1],t[10][1]),(t[10][1],t[11][1]),
                       (t[11][1],t[12][1]),(t[12][1],t[13][1]),(t[13][1],t[14][1]),(t[14][1],t[15][1]),
                       (t[15][1],t[16][1]),(t[16][1],np.inf)),
                      ((-1000000, t[0][2]),( t[0][2], t[1][2]),( t[1][2], t[2][2]),( t[2][2], t[3][2]),
                        (t[3][2], t[4][2]),( t[4][2], t[5][2]),( t[5][2], t[6][2]),( t[6][2], t[7][2]),
                        (t[7][2], t[8][2]),( t[8][2], t[9][2]),( t[9][2],t[10][2]),(t[10][2],t[11][2]),
                       (t[11][2],t[12][2]),(t[12][2],t[13][2]),(t[13][2],t[14][2]),(t[14][2],t[15][2]),
                       (t[15][2],t[16][2]),(t[16][2],np.inf))]
 
            thrshs, day_threshs = [],[]
            for num in xrange(0,len(grouped)):
                test = grouped.ix[num]
                for each_variable,each_tuple_variable in zip(test,ranges):
                    y = 0
                    for each_tuple in each_tuple_variable:
                        if each_tuple[0] <= each_variable <= each_tuple[1]:
                            thrshs.append(y)
                            break
                        y += 1
                day_threshs.append(thrshs)
                thrshs = []
            each_company_descretized[each_company] = day_threshs
    return each_company_descretized 

In [23]:
def initialize_qtable(): 
    bands   = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17]
    momen   = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] 
    peratio = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] 
    action  = [-1,0,1]
 
    q_values = {} 
    state_list = []
    for each_band in bands:
        for each_momentum in momen:
            for each_pe in peratio:
                for each_action in action: 
                    state = (('bands', each_band),('momentum', each_momentum),  
                            ('peratio', each_pe), each_action) 
                    state_list.append(state) 

    for each in state_list: 
        q_values[each] = 0 
 
    return q_values

def lookup_actions(state, q_values): 
    action = [-1, 0, 1] 
    q_vals_list, random_list = [],[]
     
    momen  = state[0] 
    pe_r   = state[1]
    bands  = state[2]
     
    for each in action:
        q_val_state = (('bands', bands),('momentum', momen),
                       ('peratio', pe_r),each)
        q_vals = q_values[q_val_state] 
        q_vals_list.append([q_vals, each]) 
    max_action = max(q_vals_list) 
    
    #Boltmann method 
    prob_list = []
    choice_1 = q_vals_list[0][0]
    choice_2 = q_vals_list[1][0]
    choice_3 = q_vals_list[2][0]
    if choice_1 > 650:
        choice_1 = 650
    if choice_2 > 650:
        choice_2 = 650
    if choice_3 > 650:
        choice_3 = 650
    
    choices = [choice_1, choice_2, choice_3]
    
    q_sum_exp = exp(choice_1)+exp(choice_2)+exp(choice_3)
    
    for each, each_choice in zip(q_vals_list, choices): 
        boltzmann = exp(each_choice)/q_sum_exp 
        prob_list.append([boltzmann, each[1]])

    a = [prob_list[0][0], prob_list[0][1]]  
    b = [prob_list[1][0], prob_list[1][1]]  
    c = [prob_list[2][0], prob_list[2][1]]  
    q_range = [a,[b[0]+a[0], b[1]], [1,c[1]]] 
 
    x = random.uniform(0,1) 
    
    if x <= q_range[0][0]: 
        choice = q_range[0][1] 
    elif q_range[0][0] < x <= q_range[1][0]: 
        choice = q_range[1][1] 
    else: 
        choice = q_range[2][1]
        
    return choice, max_action 

def update_policy(state, action, reward, q_values, next_input): 
    each_vals_list = [] 
    discount, alpha = 0.5, 0.5 

    momen  = state[0] 
    pe_r   = state[1]
    bands  = state[2]

    q_val_state = (('bands', bands),('momentum', momen),
                   ('peratio', pe_r), action)
    q_state = q_values[q_val_state] 

    updated = next_input
    choice, max_action = lookup_actions(updated,q_values)  

    new_value = q_state*(1.-alpha) + alpha*(reward + (discount * max_action[0])) 

    q_values[q_val_state] = new_value
    return q_values

def compute_bolls(technical_indicators, closingPrices):
    bol_bands = technical_indicators[0]
    bol_list,bolBandsDict = [],{}
    for each_dict in bol_bands.keys():
        for each_comp in bol_bands[each_dict][0].keys():
            for upp,low,rol,adj in zip(bol_bands[each_dict][0][each_comp],
                                       bol_bands[each_dict][1][each_comp],
                                       bol_bands[each_dict][2][each_comp],
                                       closingPrices[each_dict][each_comp]):
                adj_rol = adj - rol
                bol_list.append(adj_rol)
            bolBandsDict[each_comp] = bol_list
            bol_list = []
    return bolBandsDict

In [24]:
newBollDict = compute_bolls(technical_indicators,closingPrices)
threshold = sort_by_timestep(technical_indicators,newBollDict)
company_inputs = descretize_indicators(technical_indicators,threshold,newBollDict)
dailyReturnsDict = retrieve_daily_returns(closingPrices)
print "Done"

Done


In [43]:
q_values = initialize_qtable()
try:
    del company_inputs['China Housing & Land Development']
except:
    pass

for each_repeat in xrange(0,2):
    for each_company in company_inputs.keys():
        if each_company in dailyReturnsDict['normNasdDict'].keys():
            returns = np.array(dailyReturnsDict['normNasdDict'][each_company][21:])   
        else:
            returns = np.array(dailyReturnsDict['normNyseDict'][each_company][21:])
        
        i = 0
        for each in returns:
            if each != 0.:
                break
            else:
                i += 1
        returns = returns[i:]

        for rounds in xrange(0, (len(returns)-2)):
            try:
                # Gather state 
                updated = company_inputs[each_company][i+rounds]
                rewards = returns[rounds]
                # TODO: Update state
                choice, max_action = lookup_actions(updated, q_values) 
                # TODO: Select action according to your policy 
                action = choice
                # Execute action and get reward 
                if action != 0:
                    reward = action*rewards
                else:
                    reward = rewards
                # TODO: Learn policy based on state, action, reward 
                #print "LearningAgent.update():inputs={},action={},reward={}".format(updated,action,reward)#[debug]
                next_input = company_inputs[each_company][i+rounds+1]
                q_values = update_policy(updated, action, reward, q_values, next_input)
            except:
                raise
print "Done"

Done


In [51]:
testing = other_stuff[0]
bols = testing[0]
moms = testing[1]
pers = testing[2]
dail = testing[3]
tech_ind = [bols,moms,pers]
#retrieve adjusted closings