In [218]:
import random 
import os
import math
import Quandl
import pickle
import numpy as np
import pandas as pd
import scipy.optimize as spo
import matplotlib.pyplot as plt
from math import exp 

def retrieve_data():
    """Retrieve the dictionaries filled with each companies dataframes in
       both the s&p 500 and dow jones indexes, and the third dictionary
       contains several measures of features. The data is taken from pickled
       data files that were retrieved with an earlier retrieve data function
       that retrieved the data from the Quandl server. If you need to add more
       data, you need to use the previous retrieval method which will be listed
       soon. You can also use Quandl.get("CODE") where CODE is a specific quandl
       code"""
    # Read from file
    dow_myfile = open('dowfile.pickle', 'rb')
    sp_myfile = open('spfile.pickle', 'rb')
    nasd_myfile = open('nasdtotalfile.pickle', 'rb')
    nyse_myfile = open('nysetotalfile.pickle', 'rb')
    feat_myfile = open('featfile.pickle', 'rb')
    adj_closing = open('adjclosingpricesfile.pickle', 'rb')
    norm_closing = open('closingpricesfile.pickle', 'rb')
    adj_volume = open('adjvolumefile.pickle', 'rb')
    norm_volume = open('volumefile.pickle', 'rb')
    bol_bands = open('bolbandsfile.pickle', 'rb')
    momentum = open('momentumfile.pickle', 'rb')
    pe_ratio = open('peratiofile.pickle', 'rb')
    inputs_file = open('companyinputsfile.pickle', 'rb')
    
    # Store the retrieved dictionaries in variables
    dowJonesDict = pickle.load(dow_myfile)
    sp500Dict = pickle.load(sp_myfile)
    nasdDict = pickle.load(nasd_myfile)
    nyseDict = pickle.load(nyse_myfile)
    featuresDict = pickle.load(feat_myfile)
    adjCloseDict = pickle.load(adj_closing)
    normCloseDict = pickle.load(norm_closing)
    adjVolumeDict = pickle.load(adj_volume)
    normVolumeDict = pickle.load(norm_volume)
    bolBandsDict = pickle.load(bol_bands)
    momentumDict = pickle.load(momentum)
    peRatioDict = pickle.load(pe_ratio)
    inputsDict = pickle.load(inputs_file)
    
    # Close the files
    dow_myfile.close()
    sp_myfile.close()
    nasd_myfile.close()
    nyse_myfile.close()
    feat_myfile.close()
    adj_closing.close()
    norm_closing.close()
    adj_volume.close()
    norm_volume.close()
    bol_bands.close()
    momentum.close()
    pe_ratio.close()
    inputs_file.close()
    
    index_dict_array = [dowJonesDict, sp500Dict, nasdDict, nyseDict]
    adjusted_dict_array = [adjCloseDict, normCloseDict, adjVolumeDict, normVolumeDict]
    technical_indicators = [bolBandsDict, momentumDict, peRatioDict]
    
    return index_dict_array, adjusted_dict_array, featuresDict, technical_indicators, inputsDict
index_dict_array, adjusted_dict_array, featuresDict, technical_indicators, inputsDict = retrieve_data()
print "Done"

Done


In [227]:
def retrieve_daily_returns(df):
    dailyReturnsDict = {}
    for each_dict in df:
        for each_key in each_dict.keys():
            each_index = each_dict[each_key]
            daily_returns = compute_daily_returns(each_index)
            dailyReturnsDict[each_key] = daily_returns
    return dailyReturnsDict

def compute_daily_returns(df):
    """Compute and return the daily return values."""
    daily_returns = (df / df.shift(1)) - 1
    daily_returns.ix[0,:] = 0 #Pandas leaves the 0th row full of NaNs
    return daily_returns

def sort_by_timestep(technical_indicators):
    momentumDict = technical_indicators[1]
    peRatioDict = technical_indicators[2]
    important_dicts = ['normNasdDict', 'normNyseDict']
    each_company_thresholds = {}
    for each_dict in important_dicts:
        for each_company in momentumDict[each_dict].keys():
            np1 = np.array(momentumDict[each_dict][each_company][21:])
            np2 = np.array(peRatioDict[each_dict][each_company][21:])
            t = {}
            step_size = np1.size/10
            np1.sort()
            np2.sort()
            for i in range(0, 10):
                mom = np1[(i+1)*step_size]
                pe = np2[(i+1)*step_size]
                t[i] = [mom,pe]
            each_company_thresholds[each_company] = t
    return each_company_thresholds

def descretize_indicators(technical_indicators, company_t):
    momentumDict = technical_indicators[1]
    peRatioDict = technical_indicators[2]
    important_dicts = ['normNasdDict', 'normNyseDict']
    each_company_descretized = {}
    each_group_length = {}
    
    for each_dict in important_dicts:
        for each_company in momentumDict[each_dict].keys():
            momentum = pd.DataFrame(momentumDict[each_dict][each_company][21:])
            peratio = pd.DataFrame(peRatioDict[each_dict][each_company][21:])
            
            grouped = momentum.join(peratio, how='outer', rsuffix='Pe')
            group_length = len(grouped)
            
            t = company_t[each_company]

            ranges = [((-1000000,t[0][0]),(t[0][0],t[1][0]),(t[1][0],t[2][0]), (t[2][0],t[3][0]),
                (t[3][0], t[4][0]),(t[4][0],t[5][0]),(t[5][0],t[6][0]),(t[6][0],t[7][0]),
                (t[7][0],t[8][0]),(t[8][0],np.inf)),
                ((-1000000,t[0][1]),(t[0][1],t[1][1]),(t[1][1],t[2][1]),(t[2][1],t[3][1]),
                (t[3][1], t[4][1]),(t[4][1],t[5][1]),(t[5][1],t[6][1]),(t[6][1],t[7][1]),
                (t[7][1],t[8][1]),(t[8][1],np.inf))]
 
            thrshs, day_threshs = [],[]
            for num in xrange(0,len(grouped)):
                test = grouped.ix[num]
                for each_variable,each_tuple_variable in zip(test,ranges):
                    y = 0
                    for each_tuple in each_tuple_variable:
                        if each_tuple[0] <= each_variable <= each_tuple[1]:
                            thrshs.append(y)
                            break
                        y += 1
                day_threshs.append(thrshs)
                thrshs = []
            each_company_descretized[each_company] = day_threshs
            each_group_length[each_company] = group_length
    return each_company_descretized, each_group_length

In [271]:
def initialize_qtable(): 
    band   = [1,0,-1]
    momen  = [0,1,2,3,4,5,6,7,8,9] 
    pe_r   = [0,1,2,3,4,5,6,7,8,9] 
    action = [-2,-1,0,1,2]
 
    q_values = {} 
    state_list = []
    for each_band in band:  
        for each_momentum in momen: 
            for each_pe_ratio in pe_r:  
                for each_action in action: 
                    state = (('band', each_band),('momentum', each_momentum),  
                            ('peratio', each_pe_ratio), each_action) 
                    state_list.append(state) 

    for each in state_list: 
        q_values[each] = 0 
 
    return q_values

def compute_bands(technical_indicators):
    bolBandsDict = technical_indicators[0]
    important_dicts = ['normNasdDict', 'normNyseDict']
    company_bands = {}
    for each_dict in important_dicts:
        for each_company in bolBandsDict[each_dict][0].keys():
            upper = bolBandsDict[each_dict][0][each_company][21:]
            lower = bolBandsDict[each_dict][1][each_company][21:]
            rolling = bolBandsDict[each_dict][2][each_company][21:]
            day_band = {}
            i = 0
            for each_upper, each_lower, each_rolling in zip(upper, lower, rolling):
                if each_upper > each_rolling:
                    day_band[i] = 1
                elif each_lower < each_rolling:
                    day_band[i] = -1
                else:
                    day_band[i] = 0
                i += 1
            company_bands[each_company] = day_band
    return company_bands

def lookup_actions(state, band, q_values): 
    action = [-2, -1, 0, 1, 2] 
    q_vals_list, random_list = [],[]
     
    momen = state[0] 
    pe_r  = state[1] 
     
    for each in action:
        q_val_state = (('band', band),('momentum', momen),('peratio', pe_r),each)
        q_vals = q_values[q_val_state] 
        q_vals_list.append([q_vals, each]) 
    max_action = max(q_vals_list) 

    #Boltmann method 
    prob_list = []
    choice_1 = q_vals_list[0][0]
    choice_2 = q_vals_list[1][0]
    choice_3 = q_vals_list[2][0]
    choice_4 = q_vals_list[3][0]
    choice_5 = q_vals_list[4][0]
    if choice_1 > 650:
        choice_1 = 650
    if choice_2 > 650:
        choice_2 = 650
    if choice_3 > 650:
        choice_3 = 650
    if choice_4 > 650:
        choice_4 = 650
    if choice_5 > 650:
        choice_5 = 650
    q_sum_exp = exp(choice_1)+exp(choice_2)+exp(choice_3)+exp(choice_4)+exp(choice_5)
        
    for each in q_vals_list: 
        boltzmann = exp(each[0])/q_sum_exp
        prob_list.append([boltzmann, each[1]]) 

    a = [prob_list[0][0], prob_list[0][1]]  
    b = [prob_list[1][0], prob_list[1][1]]  
    c = [prob_list[2][0], prob_list[2][1]]  
    d = [prob_list[3][0], prob_list[3][1]] 
    e = [prob_list[4][0], prob_list[4][1]]
    q_range = [a,[b[0]+a[0], b[1]], [c[0]+b[0]+a[0], c[1]], [d[0]+c[0]+b[0]+a[0], d[1]], [1,e[1]]] 
 
    x = random.uniform(0,1) 
    
    if x <= q_range[0][0]: 
        choice = q_range[0][1] 
    elif q_range[0][0] < x <= q_range[1][0]: 
        choice = q_range[1][1] 
    elif q_range[1][0] < x <= q_range[2][0]: 
        choice = q_range[2][1] 
    elif q_range[2][0] < x <= q_range[3][0]:
        choice = q_range[3][1]
    else: 
        choice = q_range[4][1]

    return choice, max_action 

def update_policy(state, band, action, reward, q_values, next_input): 
    each_vals_list = [] 
    discount, alpha = 0.5, 0.5 

    momen  = state[0] 
    pe_r   = state[1]

    q_val_state = (('band', band),('momentum', momen),('peratio', pe_r),action)
    q_state = q_values[q_val_state] 

    updated = next_input
    choice, max_action = lookup_actions(updated,band,q_values)  

    new_value = q_state*(1-alpha) + alpha*(reward + (discount * max_action[0])) 

    q_values[q_val_state] = new_value
    return q_values

In [229]:
# TODO: Initialize any additional variables here  
q_values = initialize_qtable()
total = 0
important_dicts = ['normNasdDict', 'normNyseDict']
df = [adjusted_dict_array[0],adjusted_dict_array[1]]
dailyReturnsDict = retrieve_daily_returns(df)
threshold = sort_by_timestep(technical_indicators)
company_inputs, grouped = descretize_indicators(technical_indicators, threshold)
complete_inputs = inputsDict
company_bands = compute_bands(technical_indicators)

0 0.0100647016535


KeyError: 1

In [273]:
totals = 0
for each_company in company_inputs.keys():
    if each_company in dailyReturnsDict['normNasdDict'].keys():
        returns = np.array(dailyReturnsDict['normNasdDict'][each_company][21:])   
    else:
        returns = np.array(dailyReturnsDict['normNyseDict'][each_company][21:])  
    
    group_length = grouped[each_company]
    for rounds in xrange(0, group_length):
        # Gather state 
        updated = company_inputs[each_company][rounds]
        bands = company_bands[each_company]
        band = bands[rounds]
        rewards = returns[rounds]
        # TODO: Update state
        choice, max_action = lookup_actions(updated, band, q_values) 
        # TODO: Select action according to your policy 
        action = choice 
        # Execute action and get reward 
        if action != 0:
            reward = action*rewards
        else:
            reward = rewards

        total = reward + total
        # TODO: Learn policy based on state, action, reward 
        #print "LearningAgent.update():inputs={},action={},reward={}".format(updated,action,reward)#[debug] 
        if rounds == group_length-1:
            break
        next_input = company_inputs[each_company][rounds+1]
        q_values = update_policy(updated, band, action, reward, q_values, next_input)
print total
print "Done"

OverflowError: math range error