In [3]:
import random 
import os
import math
import Quandl
import pickle
import numpy as np
import pandas as pd
import scipy.optimize as spo
import matplotlib.pyplot as plt
from math import exp 

def retrieve_data():
    """Retrieve the dictionaries filled with each companies dataframes in
       both the s&p 500 and dow jones indexes, and the third dictionary
       contains several measures of features. The data is taken from pickled
       data files that were retrieved with an earlier retrieve data function
       that retrieved the data from the Quandl server. If you need to add more
       data, you need to use the previous retrieval method which will be listed
       soon. You can also use Quandl.get("CODE") where CODE is a specific quandl
       code"""
    # Read from file
    adj_closing = open('adjclosingpricesfile.pickle', 'rb')
    norm_closing = open('closingpricesfile.pickle', 'rb')
    bol_bands = open('bolbandsfile.pickle', 'rb')
    momentum = open('momentumfile.pickle', 'rb')
    pe_ratio = open('peratiofile.pickle', 'rb')
    extra_bol_bands = open('extrabollingers.pickle', 'rb')
    extra_momentums = open('extramomentums.pickle', 'rb')
    extra_pe_ratios = open('extraperatios.pickle', 'rb')
    extra_adj_close = open('extrajusteddict.pickle', 'rb')
    extra_norm_close = open('extrnormaldict.pickle', 'rb')
    testing_ground = open('testingground.pickle', 'rb')
    features = open('featfile.pickle', 'rb')
    
    # Store the retrieved dictionaries in variables
    adjCloseDict = pickle.load(adj_closing)
    normCloseDict = pickle.load(norm_closing)
    bolBandsDict = pickle.load(bol_bands)
    momentumDict = pickle.load(momentum)
    peRatioDict = pickle.load(pe_ratio)
    extrBolBandsDict = pickle.load(extra_bol_bands)
    extrMomentumDict = pickle.load(extra_momentums)
    extrPeRatioDict = pickle.load(extra_pe_ratios)
    extraAdjClose = pickle.load(extra_adj_close)
    extraNormClose = pickle.load(extra_norm_close)
    testingGround = pickle.load(testing_ground)
    featureDict = pickle.load(features)
    
    # Close the files
    adj_closing.close()
    norm_closing.close()
    bol_bands.close()
    momentum.close()
    pe_ratio.close()
    extra_bol_bands.close()
    extra_momentums.close()
    extra_pe_ratios.close()
    extra_adj_close.close()
    extra_norm_close.close()
    testing_ground.close()
    features.close()
    
    adjusted_dict_array = [adjCloseDict, normCloseDict]
    technical_indicators = [bolBandsDict, momentumDict, peRatioDict]
    extra_indicators = [extrBolBandsDict, extrMomentumDict, extrPeRatioDict]
    extra_closings = [extraAdjClose, extraNormClose]
    other_stuff = [testingGround, featureDict]
    
    return adjusted_dict_array, technical_indicators, extra_indicators, extra_closings, other_stuff
adjusted_dict_array, technical_indicators, extra_indicators, extra_closings, other_stuff = retrieve_data()
print "Done"

Done


In [71]:
def retrieve_daily_returns(df):
    dailyReturnsDict = {}
    for each_dict in df:
        for each_key in each_dict.keys():
            each_index = each_dict[each_key]
            daily_returns = compute_daily_returns(each_index)
            dailyReturnsDict[each_key] = daily_returns
    return dailyReturnsDict

def compute_daily_returns(df):
    """Compute and return the daily return values."""
    daily_returns = (df / df.shift(1)) - 1
    daily_returns.ix[0,:] = 0 #Pandas leaves the 0th row full of NaNs
    return daily_returns

def sort_by_timestep(technical_indicators):
    momentumDict = technical_indicators[1]
    peRatioDict = technical_indicators[2]
    each_company_thresholds = {}
    for each_dict in momentumDict.keys():
        for each_company in momentumDict[each_dict].keys():
            np1 = np.array(momentumDict[each_dict][each_company][21:])
            np2 = np.array(peRatioDict[each_dict][each_company][21:])
            t = {}
            step_size = np1.size/18
            np1.sort()
            np2.sort()
            for i in range(0, 18):
                mom = np1[(i+1)*step_size]
                pe = np2[(i+1)*step_size]
                t[i] = [mom,pe]
            each_company_thresholds[each_company] = t
    return each_company_thresholds

def descretize_indicators(technical_indicators, company_t):
    momentumDict = technical_indicators[1]
    peRatioDict = technical_indicators[2]
    each_company_descretized = {}
    each_group_length = {}
    
    for each_dict in momentumDict.keys():
        for each_company in momentumDict[each_dict].keys():
            momentum = pd.DataFrame(momentumDict[each_dict][each_company][21:])
            peratio = pd.DataFrame(peRatioDict[each_dict][each_company][21:])
            
            grouped = momentum.join(peratio, how='outer', rsuffix='Pe')
            
            t = company_t[each_company]

            ranges = [((-1000000, t[0][0]),( t[0][0], t[1][0]),( t[1][0], t[2][0]),( t[2][0], t[3][0]),
                        (t[3][0], t[4][0]),( t[4][0], t[5][0]),( t[5][0], t[6][0]),( t[6][0], t[7][0]),
                        (t[7][0], t[8][0]),( t[8][0], t[9][0]),( t[9][0],t[10][0]),(t[10][0],t[11][0]),
                       (t[11][0],t[12][0]),(t[12][0],t[13][0]),(t[13][0],t[14][0]),(t[14][0],t[15][0]),
                       (t[15][0],t[16][0]),(t[16][0],np.inf)),
                      ((-1000000, t[0][1]),( t[0][1], t[1][1]),( t[1][1], t[2][1]),( t[2][1], t[3][1]),
                        (t[3][1], t[4][1]),( t[4][1], t[5][1]),( t[5][1], t[6][1]),( t[6][1], t[7][1]),
                        (t[7][1], t[8][1]),( t[8][1], t[9][1]),( t[9][1],t[10][1]),(t[10][1],t[11][1]),
                       (t[11][1],t[12][1]),(t[12][1],t[13][1]),(t[13][1],t[14][1]),(t[14][1],t[15][1]),
                       (t[15][1],t[16][1]),(t[16][1],np.inf))]
 
            thrshs, day_threshs = [],[]
            for num in xrange(0,len(grouped)):
                test = grouped.ix[num]
                for each_variable,each_tuple_variable in zip(test,ranges):
                    y = 0
                    for each_tuple in each_tuple_variable:
                        if each_tuple[0] <= each_variable <= each_tuple[1]:
                            thrshs.append(y)
                            break
                        y += 1
                day_threshs.append(thrshs)
                thrshs = []
            each_company_descretized[each_company] = day_threshs
    return each_company_descretized

In [75]:
def initialize_qtable(): 
    bands   = [-1,0,1]
    momen   = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] 
    peratio = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17] 
    action  = [-1,0,1]
 
    q_values = {} 
    state_list = []
    for each_band in bands:
        for each_momentum in momen:
            for each_pe in peratio:
                for each_action in action: 
                    state = (('bands', each_band),('momentum', each_momentum),  
                            ('peratio', each_pe), each_action) 
                    state_list.append(state) 

    for each in state_list: 
        q_values[each] = 0 
 
    return q_values

def lookup_actions(state, q_values, band_state): 
    action = [-1, 0, 1] 
    q_vals_list, random_list = [],[]
     
    bands  = band_state
    momen  = state[0] 
    pe_r   = state[1] 
     
    for each in action:
        q_val_state = (('bands', bands),('momentum', momen),
                       ('peratio', pe_r),each)
        q_vals = q_values[q_val_state] 
        q_vals_list.append([q_vals, each]) 
    max_action = max(q_vals_list) 
    
    #Boltmann method 
    prob_list = []
    choice_1 = q_vals_list[0][0]
    choice_2 = q_vals_list[1][0]
    choice_3 = q_vals_list[2][0]
    if choice_1 > 650:
        choice_1 = 650
    if choice_2 > 650:
        choice_2 = 650
    if choice_3 > 650:
        choice_3 = 650
    
    choices = [choice_1, choice_2, choice_3]
    
    q_sum_exp = exp(choice_1)+exp(choice_2)+exp(choice_3)
    
    for each, each_choice in zip(q_vals_list, choices): 
        boltzmann = exp(each_choice)/q_sum_exp 
        prob_list.append([boltzmann, each[1]])

    a = [prob_list[0][0], prob_list[0][1]]  
    b = [prob_list[1][0], prob_list[1][1]]  
    c = [prob_list[2][0], prob_list[2][1]]  
    q_range = [a,[b[0]+a[0], b[1]], [1,c[1]]] 
 
    x = random.uniform(0,1) 
    
    if x <= q_range[0][0]: 
        choice = q_range[0][1] 
    elif q_range[0][0] < x <= q_range[1][0]: 
        choice = q_range[1][1] 
    else: 
        choice = q_range[2][1]
        
    return choice, max_action 

def update_policy(state, action, reward, q_values, next_input, band_state): 
    each_vals_list = [] 
    discount, alpha = 0.5, 0.5 

    bands  = band_state
    momen  = state[0] 
    pe_r   = state[1]

    q_val_state = (('bands', bands),('momentum', momen),
                   ('peratio', pe_r), action)
    q_state = q_values[q_val_state] 

    updated = next_input
    choice, max_action = lookup_actions(updated,q_values,bands)  

    new_value = q_state*(1.-alpha) + alpha*(reward + (discount * max_action[0])) 

    q_values[q_val_state] = new_value
    return q_values

def compute_bolls(technical_indicators):
    bol_bands = technical_indicators[0]
    bol_list,bolBandsDict = [],{}
    for each_dict in bol_bands.keys():
        for each_comp in bol_bands[each_dict][0].keys():
            for upp,low,rol in zip(bol_bands[each_dict][0][each_comp],
                                   bol_bands[each_dict][1][each_comp],
                                   bol_bands[each_dict][2][each_comp]):
                if rol > upp:
                    bol = 1
                elif rol < low:
                    bol = -1
                else:
                    bol = 0
                bol_list.append(bol)
            bolBandsDict[each_comp] = bol_list
            bol_list = []
    return bolBandsDict

In [73]:
threshold = sort_by_timestep(technical_indicators)
company_inputs = descretize_indicators(technical_indicators, threshold)
newBollDict = compute_bolls(technical_indicators)
dailyReturnsDict = retrieve_daily_returns(adjusted_dict_array)

threshold2 = sort_by_timestep(extra_indicators)
company_inputs2 = descretize_indicators(extra_indicators, threshold2)
newBollDict2 = compute_bolls(extra_indicators)
dailyReturnsDict2 = retrieve_daily_returns(extra_closings)

print "Done"

Done


In [81]:
q_values = initialize_qtable()
try:
    dailyReturnsDict['normNyseDict'].drop('Stifel Financial Corporation', axis=1, inplace=True)
    dailyReturnsDict['normNasdDict'].drop('ModusLink Global Solutions', axis=1, inplace=True)
except:
    pass

for each_repeat in xrange(0,2):
    for each_company in company_inputs.keys():
        if each_company in dailyReturnsDict['normNasdDict'].keys():
            returns = np.array(dailyReturnsDict['normNasdDict'][each_company][21:])   
        else:
            returns = np.array(dailyReturnsDict['normNyseDict'][each_company][21:])
        bands = np.array(newBollDict[each_company][21:])
        for rounds in xrange(0, 3999):
            # Gather state 
            updated = company_inputs[each_company][rounds]
            band_state = bands[rounds]
            rewards = returns[rounds]
            # TODO: Update state
            choice, max_action = lookup_actions(updated, q_values, band_state) 
            # TODO: Select action according to your policy 
            action = choice
            # Execute action and get reward 
            if action != 0:
                reward = action*rewards
            else:
                reward = rewards
            # TODO: Learn policy based on state, action, reward 
            #print "LearningAgent.update():inputs={},action={},reward={}".format(updated,action,reward)#[debug]
            next_input = company_inputs[each_company][rounds+1]
            band_state = bands[rounds+1]
            q_values = update_policy(updated, action, reward, q_values, next_input, band_state)

try:
    dailyReturnsDict2['normNasdDict'].drop('Central Valley Community Bancorp', axis=1, inplace=True)
    dailyReturnsDict2['normNasdDict'].drop('InterCloud Systems', axis=1, inplace=True)
    dailyReturnsDict2['normNasdDict'].drop('One Horizon Group', axis=1, inplace=True)
    dailyReturnsDict2['normNasdDict'].drop('Unity Bancorp', axis=1, inplace=True)
except:
    pass
for each_repeat in xrange(0,2):
    for each_company2 in company_inputs2.keys():
        if each_company2 in dailyReturnsDict2['normNasdDict'].keys():
            returns2 = np.array(dailyReturnsDict2['normNasdDict'][each_company2][21:])   
        elif each_company2 in dailyReturnsDict2['normNyseDict'].keys():
            returns2 = np.array(dailyReturnsDict2['normNyseDict'][each_company2][21:])
        elif each_company2 in dailyReturnsDict2['adjDowDict'].keys():
            returns2 = np.array(dailyReturnsDict2['adjDowDict'][each_company2][21:])
        else:
            returns2 = np.array(dailyReturnsDict2['adjSpDict'][each_company2][21:])
        bands2 = np.array(newBollDict2[each_company2][21:])    
        for rounds2 in xrange(0, len(returns2)):
            try:
                updated2 = company_inputs2[each_company2][rounds2]
                band_state2 = bands2[rounds2]
                rewards2 = returns2[rounds2]
                choice2, max_action2 = lookup_actions(updated2, q_values, band_state2)  
                action2 = choice2
                if action2 != 0:
                    reward2 = action2*rewards2
                else:
                    reward2 = rewards2
                next_input2 = company_inputs2[each_company2][rounds2+1]
                band_state2 = bands2[rounds2+1]
                q_values = update_policy(updated2, action2, reward2, q_values, next_input2, band_state2)
            except:
                pass
print "Done"

Done


In [82]:
i,y,z=0,0,0
for each in q_values.values():
    if each > 0:
        i += 1
    if each < 0:
        y += 1
    if each == 0:
        z += 1
print i
print y
print z

673
299
1944


In [53]:
otherDict = other_stuff[0]
featureDict = other_stuff[1]
bols = otherDict[0]
moms = otherDict[1]
pes = otherDict[2]
dail = otherDict[3]
testing = [bols,moms,pes]

thresholds = sort_by_timestep(testing)
company_inputs = descretize_indicators(testing, thresholds)

In [None]:
print company_inputs['Chevron'][4000:]
print dail['adjDowDict']['Chevron'][21:][4000:]

action=[-1,0,1]
new_list = []
for each in action:
    q_val_state = (('bands', ??),('momentum', 17),('peratio', 10), each)
    q_vals = q_values[q_val_state]
    new_list.append([q_vals, each]) 
print new_list

In [80]:
print len(newBollDict2)
print len(company_inputs2['Taiwan Semiconductor Manufacturing Company Ltd.'])

2501


KeyError: 'Taiwan Semiconductor Manufacturing Company Ltd.'