In [9]:
import pandas as pd, numpy as np, random

In [10]:
def build_q_dict(states_df):
    import itertools, random
    
    list_of_values = [list(states_df[col].unique()) for col in states_df.columns]
        
    state_list = list(itertools.product(*list_of_values))
    
    q_dict = {'long':{}, 'short':{}, None:{None:{None:0}}}
    
    for position in ['long','short',None]:
        for state_tuple in state_list:
            q_dict[position][state_tuple] = {'buy':random.random()/10000, 
                                           'sell':random.random()/10000, None:random.random()/10000}
    
    return q_dict
        

In [19]:
class QLearner():
    '''Creates a Q-learning agent to use on stock market data'''
    
    def __init__(self, states_df, price_series, date_col='Date', train_fraction=.8):
        '''states_df: a pandas dataframe where each column is a time series of states to be monitored 
        by the Q-learner. Each state should be an integer value. The df must be indexed by date. 
        
        price_series: A time series of prices. Must be a pandas series indexed by date and correspond to dates
        in the states_df'''
        
        assert len(states_df) == len(price_series), "Length of price_series and states_df are not equal."        
        
        self.states_df = states_df
        self.price_series = price_series
        self.states_train, self.states_test = self.train_test_split()
        # Instantiate with dummy states for parsimony of q_dict
        self.curr_state = (0, 0, 0, 0, 0, 0, 1, 0)
        self.prev_state = (0, 0, 0, 0, 0, 0, 1, 0)
        self.prev_action = None
        self.prev_price = None
        self.position = None
        self.prev_position = None
        self.prev_reward = 0
        self.cum_reward = 0
        self.alpha = 0.7
        self.gamma = 0.7
        self.epsilon = 0.1
        # TODO fix below for train/test
        self.max_train_time = len(self.states_train)
        self.max_test_time = len(self.states_test)
        self.q_dict = build_q_dict(states_df)
    
    def train_test_split(self, train_fraction=.8):
        states_df = self.states_df
        price_series = self.price_series
        
        assert states_df.index.all() == price_series.index.all(), \
            "All dates in the states_df must match all dates in the price_series"
        
        end_train = int(len(states_df)*train_fraction)
        end_test = len(states_df) - 1
        states_temp = states_df.copy()
        states_train = states_temp[:end_train]
        
        states_temp_2 = states_df.copy()
        states_test = states_temp_2[end_train + 1:end_test]
        
        self.states_train = states_train
        self.states_test = states_test
        
        return self.states_train, self.states_test
                    
    def print_attributes(self, print_q_dict=False):
        # For debugging
        print "The current state is {}".format(self.curr_state)
        print "The previous state is {}".format(self.prev_state)
        print "The previous action is {}".format(self.prev_action)
        print "The previous price is {}".format(self.prev_price)
        print "The the current position is {}".format(self.position)
        print "The previous reward is {}".format(self.prev_reward)
        print "Alpha is {}".format(self.alpha)
        print "Gamma is {}".format(self.gamma)
        print "Epsilon is {}".format(self.epsilon)
        
        if print_q_dict:
            print self.q_dict
        
    def update(self, t, train=True):
        if t==0:
            self.cum_reward = 0
        
        if train:
            inputs = self.states_train.iloc[t]
        else:
            inputs = self.states_test.iloc[t]
        
        self.curr_state = tuple(inputs.values)
        
        if random.random() > self.epsilon/(t + 1):
            action = max(self.q_dict[self.position][self.curr_state], 
                         key=self.q_dict[self.position][self.curr_state].get)
        else:               
            action = random.choice(['buy','sell',None])
        
        self.position = self.update_position(action)
                
        reward = self.calc_reward(t)
        
        Qsa = self.q_dict[self.prev_position][self.prev_state][self.prev_action]
        Qs_prime_a_prime = self.q_dict[self.position][self.curr_state][max(self.q_dict[self.position][self.curr_state])]
        
        Qsa += self.alpha * (self.prev_reward + self.gamma * Qs_prime_a_prime -  Qsa)
        self.q_dict[self.position][self.prev_state][self.prev_action] = Qsa        
        
        self.cum_reward += reward
        self.prev_position = self.position
        self.prev_reward = reward
        self.prev_action = action
        self.prev_state = self.curr_state
        self.prev_price = self.price_series.iloc[t]
        
        #print "QLearner.update(): inputs = {}, position = {}, action = {}, \n \
        #        reward (from prev action) = {}, cumulative reward = {}".format(
        #        inputs.values, self.prev_position, action, reward, self.cum_reward)
        
    def calc_reward(self, t): 
        prev_price = self.prev_price
        price_today = self.price_series.iloc[t]
        
        if prev_price != None:
            diff = price_today - prev_price
            if self.position == 'long':
                reward = diff
            elif self.position == 'short':
                reward = -diff
            else:
                reward = 0
        else:
            reward = 0        
        return reward
        
    def update_position(self, action):
        curr_position = self.position
        if curr_position == 'long':
            if action == 'sell':
                self.position = None
                return self.position
            else:
                pass
        elif curr_position == 'short':
            if action == 'buy':
                self.position = None
                return self.position
            else:
                pass
        else:
            if action == 'sell':
                self.position = 'short'
                return self.position
            elif action == 'buy':
                self.position = 'long'
                return self.position
            else:
                pass
        
    def train(self, n_trials=1):
        n = 0
        while n < n_trials:
            t = 0
            print "Training trial number {}...".format(n)
            while t < self.max_train_time:
                self.update(t)        
                t += 1
            tot_hold = self.price_series[self.max_train_time - 1] - self.price_series[0]
            excess_return = self.cum_reward - tot_hold
            print "The excess return is {}".format(excess_return)   
            n += 1
            
    def test(self, n_trials=1, verbose=True):
        n = 0
        while n < n_trials:
            t = 0
            print "Testing . . ."
            while t < self.max_test_time:
                self.update(t)        
                t += 1
            tot_hold = self.price_series[self.max_test_time - 1] - self.price_series[0]
            excess_return = self.cum_reward - tot_hold
            print "The excess return is {}".format(excess_return)   
            n += 1 
        if verbose:
            from prettytable import PrettyTable
            t = PrettyTable(['Metric','Model','Market','Diff'])
            t.add_row(['Sharpe Ratio',2,0.6,1.4])
            print t

In [15]:
df = pd.read_csv('market_direction_int.csv')
df['Date'].apply(pd.Timestamp)
df.head()

Unnamed: 0,Date,BullBearSpread,OneTenDiff,Hi-Lo,Adv-Dec,VIX,PE_Ratio,OthMktsMinusMA,SP,SP_Minus_MA
0,2006-08-02,0,0,0,0,0,0,1,1277.410034,0
1,2006-08-03,0,0,0,1,0,0,1,1280.27002,0
2,2006-08-04,0,0,0,1,0,0,1,1279.359985,0
3,2006-08-07,0,0,0,0,1,0,1,1275.77002,0
4,2006-08-08,0,0,0,0,1,0,1,1271.47998,0


In [17]:
df.set_index('Date',inplace=True)
states_df = df[['BullBearSpread','OneTenDiff','Hi-Lo','Adv-Dec','VIX','PE_Ratio','OthMktsMinusMA','SP_Minus_MA']]
price_series = df['SP']

In [20]:
agent = QLearner(states_df, price_series)
#agent.print_attributes()
agent.train(n_trials = 80)
agent.test()

Training trial number 0...
The excess return is -760.630937
Training trial number 1...
The excess return is -680.519782
Training trial number 2...
The excess return is 364.988947
Training trial number 3...
The excess return is 816.321233
Training trial number 4...
The excess return is -141.820868
Training trial number 5...
The excess return is 734.089963
Training trial number 6...
The excess return is 1934.058467
Training trial number 7...
The excess return is 1781.940072
Training trial number 8...
The excess return is 1821.139527
Training trial number 9...
The excess return is 2079.9292
Training trial number 10...
The excess return is 1858.118774
Training trial number 11...
The excess return is 2054.239495
Training trial number 12...
The excess return is 1338.148562
Training trial number 13...
The excess return is 1965.729367
Training trial number 14...
The excess return is 1408.499513
Training trial number 15...
The excess return is 2175.209352
Training trial number 16...
The excess 

In [276]:
agent.q_dict

{None: {(2, 0, 0, 1, 1, 0, 0, 1): {None: 4.6613256959910695e-06,
   'buy': 1.4528809708818812e-05,
   'sell': 5.1978110286793664e-05},
  (0, 2, 1, 0, 2, 0, 1, 0): {None: 3.175439702851461e-05,
   'buy': 2.0866105586205462e-05,
   'sell': 5.8433442843976314e-05},
  (2, 1, 0, 0, 0, 0, 1, 0): {None: 7.211668056694915e-05,
   'buy': 8.405717206016895e-05,
   'sell': 2.8555454918569178e-05},
  (0, 1, 1, 1, 2, 1, 0, 0): {None: 8.854573479561896e-05,
   'buy': 2.3160892642615717e-05,
   'sell': 6.704258874683046e-05},
  (0, 2, 0, 1, 2, 0, 0, 0): {None: 4.447232476491394e-05,
   'buy': 5.342809819965001e-05,
   'sell': 9.686558967837293e-05},
  (2, 1, 1, 0, 2, 0, 0, 0): {None: 1.2414386374837439e-05,
   'buy': 1.279170643119094e-05,
   'sell': 5.4777928894757514e-05},
  (2, 1, 1, 1, 0, 0, 0, 0): {None: 9.8603597539857e-05,
   'buy': 1.8874146563738182e-05,
   'sell': 3.405750750332104e-05},
  (0, 1, 1, 0, 0, 1, 1, 0): {None: 4.388365308420443e-05,
   'buy': 3.20522389217141e-05,
   'sell': 8.1

In [283]:
int(len(states_df)*.8)

2155

In [284]:
len(states_df) - 2155

539

In [285]:
len(states_df)

2694

In [287]:
states_df.iloc[2693]

BullBearSpread    1
OneTenDiff        0
Hi-Lo             1
Adv-Dec           1
VIX               0
PE_Ratio          2
OthMktsMinusMA    0
SP_Minus_MA       1
Name: 2016-11-18, dtype: int64

In [7]:
from prettytable import PrettyTable

t = PrettyTable(['Name', 'Age'])
t.add_row(['Alice', 24])
t.add_row(['Bob', 19])
print t

+-------+-----+
|  Name | Age |
+-------+-----+
| Alice |  24 |
|  Bob  |  19 |
+-------+-----+
