In [1]:
import random 
import os
import math
import Quandl
import pickle
import numpy as np
import pandas as pd
import scipy.optimize as spo
import matplotlib.pyplot as plt
from math import exp 

def retrieve_data():
    """Retrieve the dictionaries filled with each companies dataframes in
       both the s&p 500 and dow jones indexes, and the third dictionary
       contains several measures of features. The data is taken from pickled
       data files that were retrieved with an earlier retrieve data function
       that retrieved the data from the Quandl server. If you need to add more
       data, you need to use the previous retrieval method which will be listed
       soon. You can also use Quandl.get("CODE") where CODE is a specific quandl
       code"""
    # Read from file
    adj_closing = open('adjclosingpricesfile.pickle', 'rb')
    norm_closing = open('closingpricesfile.pickle', 'rb')
    bol_bands = open('bolbandsfile.pickle', 'rb')
    momentum = open('momentumfile.pickle', 'rb')
    pe_ratio = open('peratiofile.pickle', 'rb')
    extra_bol_bands = open('extrabollingers.pickle', 'rb')
    extra_momentums = open('extramomentums.pickle', 'rb')
    extra_pe_ratios = open('extraperatios.pickle', 'rb')
    extra_adjusted = open('extrajusteddict.pickle', 'rb')
    extra_normal = open('extrnormaldict.pickle', 'rb')
    
    # Store the retrieved dictionaries in variables
    adjCloseDict = pickle.load(adj_closing)
    normCloseDict = pickle.load(norm_closing)
    bolBandsDict = pickle.load(bol_bands)
    momentumDict = pickle.load(momentum)
    peRatioDict = pickle.load(pe_ratio)
    extrBolBandsDict = pickle.load(extra_bol_bands)
    extrMomentumDict = pickle.load(extra_momentums)
    extrPeRatioDict = pickle.load(extra_pe_ratios)
    extrAdjustedDict = pickle.load(extra_adjusted)
    extrNormalDict = pickle.load(extra_normal)
    
    # Close the files
    adj_closing.close()
    norm_closing.close()
    bol_bands.close()
    momentum.close()
    pe_ratio.close()
    extra_bol_bands.close()
    extra_momentums.close()
    extra_pe_ratios.close()
    extra_adjusted.close()
    extra_normal.close()
    
    adjusted_dict_array = [adjCloseDict, normCloseDict]
    technical_indicators = [bolBandsDict, momentumDict, peRatioDict]
    extra_indicators = [extrBolBandsDict, extrMomentumDict, extrPeRatioDict]
    extra_closings = [extrAdjustedDict, extrNormalDict]
    
    return adjusted_dict_array, technical_indicators, extra_indicators, extra_closings
adjusted_dict_array, technical_indicators, extra_indicators, extra_closings = retrieve_data()
print "Done"

Done


In [16]:
def retrieve_daily_returns(df):
    dailyReturnsDict = {}
    for each_dict in df:
        for each_key in each_dict.keys():
            each_index = each_dict[each_key]
            daily_returns = compute_daily_returns(each_index)
            dailyReturnsDict[each_key] = daily_returns
    return dailyReturnsDict

def compute_daily_returns(df):
    """Compute and return the daily return values."""
    daily_returns = (df / df.shift(1)) - 1
    daily_returns.ix[0,:] = 0 #Pandas leaves the 0th row full of NaNs
    return daily_returns

def sort_by_timestep(technical_indicators):
    bollingerDict = technical_indicators[0]
    momentumDict = technical_indicators[1]
    peRatioDict = technical_indicators[2]
    important_dicts = ['normNasdDict', 'normNyseDict']
    each_company_thresholds = {}
    for each_dict in important_dicts:
        for each_company in momentumDict[each_dict].keys():
            np1 = np.array(bollingerDict[each_dict][0][each_company][21:])
            np2 = np.array(bollingerDict[each_dict][1][each_company][21:])
            np3 = np.array(bollingerDict[each_dict][2][each_company][21:])
            np4 = np.array(momentumDict[each_dict][each_company][21:])
            np5 = np.array(peRatioDict[each_dict][each_company][21:])
            t = {}
            step_size = np1.size/16
            step_size2 = np4.size/16
            np1.sort()
            np2.sort()
            np3.sort()
            np4.sort()
            np5.sort()
            for i in range(0, 16):
                upp = np1[(i+1)*step_size]
                low = np2[(i+1)*step_size]
                rol = np3[(i+1)*step_size]
                mom = np4[(i+1)*step_size]
                pe = np5[(i+1)*step_size]
                t[i] = [upp,low,rol,mom,pe]
            each_company_thresholds[each_company] = t
    return each_company_thresholds

def descretize_indicators(technical_indicators, company_t):
    bollingerDict = technical_indicators[0]
    momentumDict = technical_indicators[1]
    peRatioDict = technical_indicators[2]
    important_dicts = ['normNasdDict', 'normNyseDict']
    each_company_descretized = {}
    each_group_length = {}
    
    for each_dict in important_dicts:
        for each_company in momentumDict[each_dict].keys():
            upper = pd.DataFrame(bollingerDict[each_dict][0][each_company][21:])
            lower = pd.DataFrame(bollingerDict[each_dict][1][each_company][21:])
            rollin = pd.DataFrame(bollingerDict[each_dict][2][each_company][21:])
            momentum = pd.DataFrame(momentumDict[each_dict][each_company][21:])
            peratio = pd.DataFrame(peRatioDict[each_dict][each_company][21:])
            
            grouped = upper.join(lower, how='outer', rsuffix='Low')
            grouped = grouped.join(rollin, how='outer', rsuffix='Rol')
            grouped = grouped.join(momentum, how='outer', rsuffix='Moment')
            grouped = grouped.join(peratio, how='outer', rsuffix='Pe')
            
            t = company_t[each_company]

            ranges = [((-1000000, t[0][0]),( t[0][0], t[1][0]),( t[1][0], t[2][0]),( t[2][0], t[3][0]),
                        (t[3][0], t[4][0]),( t[4][0], t[5][0]),( t[5][0], t[6][0]),( t[6][0], t[7][0]),
                        (t[7][0], t[8][0]),( t[8][0], t[9][0]),( t[9][0],t[10][0]),(t[10][0],t[11][0]),
                       (t[11][0],t[12][0]),(t[12][0],t[13][0]),(t[13][0],t[14][0]),(t[14][0],np.inf)),
                      ((-1000000, t[0][1]),( t[0][1], t[1][1]),( t[1][1], t[2][1]),( t[2][1], t[3][1]),
                        (t[3][1], t[4][1]),( t[4][1], t[5][1]),( t[5][1], t[6][1]),( t[6][1], t[7][1]),
                        (t[7][1], t[8][1]),( t[8][1], t[9][1]),( t[9][1],t[10][1]),(t[10][1],t[11][1]),
                       (t[11][1],t[12][1]),(t[12][1],t[13][1]),(t[13][1],t[14][1]),(t[14][1],np.inf)), 
                      ((-1000000, t[0][2]),( t[0][2], t[1][2]),( t[1][2], t[2][2]),( t[2][2], t[3][2]),
                        (t[3][2], t[4][2]),( t[4][2], t[5][2]),( t[5][2], t[6][2]),( t[6][2], t[7][2]),
                        (t[7][2], t[8][2]),( t[8][2], t[9][2]),( t[9][2],t[10][2]),(t[10][2],t[11][2]),
                       (t[11][2],t[12][2]),(t[12][2],t[13][2]),(t[13][2],t[14][2]),(t[14][2],np.inf)),
                      ((-1000000, t[0][3]),( t[0][3], t[1][3]),( t[1][3], t[2][3]), (t[2][3], t[3][3]),
                        (t[3][3], t[4][3]),( t[4][3], t[5][3]),( t[5][3], t[6][3]),( t[6][3], t[7][3]),
                        (t[7][3], t[8][3]),( t[8][3], t[9][3]),( t[9][3],t[10][3]),(t[10][3],t[11][3]),
                       (t[11][3],t[12][3]),(t[12][3],t[13][3]),(t[13][3],t[14][3]),(t[14][3],np.inf)),
                      ((-1000000, t[0][4]),( t[0][4], t[1][4]),( t[1][4], t[2][4]),( t[2][4], t[3][4]),
                        (t[3][4], t[4][4]),( t[4][4], t[5][4]),( t[5][4], t[6][4]),( t[6][4], t[7][4]),
                        (t[7][4], t[8][4]),( t[8][4], t[9][4]),( t[9][4],t[10][4]),(t[10][4],t[11][4]),
                       (t[11][4],t[12][4]),(t[12][4],t[13][4]),(t[13][4],t[14][4]),(t[14][4],np.inf))]
 
            thrshs, day_threshs = [],[]
            for num in xrange(0,len(grouped)):
                test = grouped.ix[num]
                for each_variable,each_tuple_variable in zip(test,ranges):
                    y = 0
                    for each_tuple in each_tuple_variable:
                        if each_tuple[0] <= each_variable <= each_tuple[1]:
                            thrshs.append(y)
                            break
                        y += 1
                day_threshs.append(thrshs)
                thrshs = []
            each_company_descretized[each_company] = day_threshs
    return each_company_descretized

In [56]:
def initialize_qtable(): 
    upper   = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
    lower   = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
    rollin  = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
    momen   = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 
    peratio = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 
    action  = [-10,0,10]
 
    q_values = {} 
    state_list = []
    for each_upper in upper: 
        for each_lower in lower:
            for each_roll in rollin:
                for each_momentum in momen:
                    for each_pe in peratio:
                        for each_action in action: 
                            state = (('upper', each_upper),('lower', each_lower),
                                     ('rolling', each_roll),('momentum', each_momentum),  
                                    ('peratio', each_pe), each_action) 
                            state_list.append(state) 

    for each in state_list: 
        q_values[each] = 0 
 
    return q_values

def lookup_actions(state, q_values): 
    action = [-10, 0, 10] 
    q_vals_list, random_list = [],[]
     
    upper  = state[0]
    lower  = state[1]
    rollin = state[2]
    momen  = state[3] 
    pe_r   = state[4] 
     
    for each in action:
        q_val_state = (('upper', upper),('lower', lower),('rolling',rollin),
                       ('momentum', momen),('peratio', pe_r),each)
        q_vals = q_values[q_val_state] 
        q_vals_list.append([q_vals, each]) 
    max_action = max(q_vals_list) 
    """
    #Boltmann method 
    prob_list = []
    choice_1 = q_vals_list[0][0]
    choice_2 = q_vals_list[1][0]
    choice_3 = q_vals_list[2][0]
    
    choices = [choice_1, choice_2, choice_3]
    
    q_sum_exp = exp(choice_1)+exp(choice_2)+exp(choice_3)
    
    for each, each_choice in zip(q_vals_list, choices): 
        boltzmann = exp(each_choice)/q_sum_exp 
        prob_list.append([boltzmann, each[1]])

    a = [prob_list[0][0], prob_list[0][1]]  
    b = [prob_list[1][0], prob_list[1][1]]  
    c = [prob_list[2][0], prob_list[2][1]]  
    q_range = [a,[b[0]+a[0], b[1]], [1,c[1]]] 
 
    x = random.uniform(0,1) 
    
    if x <= q_range[0][0]: 
        choice = q_range[0][1] 
    elif q_range[0][0] < x <= q_range[1][0]: 
        choice = q_range[1][1] 
    else: 
        choice = q_range[2][1]
    """
    for each in q_vals_list:
        if each[0] == 0:
            random_list.append(each[1])

    if max_action[0] == 0:
        choice = random.choice(action)
    elif (len(random_list) > 0):
        choice = random.choice(random_list)
    else:
        choice = max_action[1]
        
    return choice, max_action 

def update_policy(state, action, reward, q_values, next_input): 
    each_vals_list = [] 
    discount, alpha = 0.5, 0.5 

    upper  = state[0]
    lower  = state[1]
    rollin = state[2]
    momen  = state[3] 
    pe_r   = state[4]

    q_val_state = (('upper', upper),('lower', lower),('rolling',rollin),
                       ('momentum', momen),('peratio', pe_r), action)
    q_state = q_values[q_val_state] 

    updated = next_input
    choice, max_action = lookup_actions(updated,q_values)  

    new_value = q_state*(1.-alpha) + alpha*(reward + (discount * max_action[0])) 

    q_values[q_val_state] = new_value
    return q_values

In [4]:
# TODO: Initialize any additional variables here  

threshold = sort_by_timestep(technical_indicators)
company_inputs = descretize_indicators(technical_indicators, threshold)

important_dicts = ['normNasdDict', 'normNyseDict']
dailyReturnsDict = retrieve_daily_returns(adjusted_dict_array)
print "Done"

Done


In [62]:
q_values = initialize_qtable()
try:
    dailyReturnsDict['normNyseDict'].drop('Stifel Financial Corporation', axis=1, inplace=True)
    dailyReturnsDict['normNasdDict'].drop('ModusLink Global Solutions', axis=1, inplace=True)
except:
    pass
x=0
for each_repeat in xrange(0,10):
    for each_company in company_inputs.keys():
        if each_company in dailyReturnsDict['normNasdDict'].keys():
            returns = np.array(dailyReturnsDict['normNasdDict'][each_company][21:])   
        elif each_company in dailyReturnsDict['normNyseDict'].keys():
            returns = np.array(dailyReturnsDict['normNyseDict'][each_company][21:])
        else:
            x += 1
            continue

        for rounds in xrange(0, 3999):
            # Gather state 
            updated = company_inputs[each_company][rounds]
            rewards = returns[rounds]
            # TODO: Update state
            choice, max_action = lookup_actions(updated, q_values) 
            # TODO: Select action according to your policy 
            action = choice
            # Execute action and get reward 
            if action != 0:
                reward = action*rewards
            else:
                reward = rewards
            # TODO: Learn policy based on state, action, reward 
            #print "LearningAgent.update():inputs={},action={},reward={}".format(updated,action,reward)#[debug]
            next_input = company_inputs[each_company][rounds+1]
            q_values = update_policy(updated, action, reward, q_values, next_input)
print x
print "Done"

0
Done


In [16]:
action = [-2, -1, 0, 1, 2]
upper = 9
lower = 9
rollin = 9
momen = 6
pe_r = 9
q_vals_list2 = []
for each in action:
    q_val_state = (('upper', upper),('lower', lower),('rolling',rollin),
                    ('momentum', momen),('peratio', pe_r),each)
    q_vals = q_values[q_val_state] 
    q_vals_list2.append([q_vals, each]) 
print q_vals_list2

15364158
Done


In [67]:
i,y,z=0,0,0
for each in q_values.values():
    if each > 0:
        i += 1
    if each < 0:
        y += 1
    if each == 0:
        z += 1
print i
print y
print z

106787
143539
2895402
