In [1]:
import datetime as dt
import pandas as pd
import matplotlib.pyplot as plt

from util import create_df_benchmark, get_data
from marketsim import compute_portvals_single_symbol, market_simulator

In [2]:
# QLearner
# def query_set_state(s):
#     '''Find next action to take in state S. Update latest state and action without updating Q Table'''
# def query(s_prime, r)
#     '''Find next action to take'''
    

In [24]:
# Deep Q-learning Agent
# num_states=3000, num_actions=3
# 
from collections import deque
from keras import Sequential
from keras.layers import Dense, Conv1D, Input, Embedding
from keras.optimizers import Adam
import numpy as np
import random

class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = [] #deque(maxlen=2000)
        self.gamma = 0.95    # discount rate
        self.epsilon = 1.0  # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()
        self.s = 0
        self.a = 0
        
    def _build_model(self):
        # Neural Net for Deep-Q learning Model
        model = Sequential()
        model.add(Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(3, activation='softmax'))
        model.compile(loss='mse',
                      optimizer=Adam(lr=self.learning_rate))
        return model
    
    def vectorize(self, state):
        a = np.zeros((1, self.state_size))
        a[:, state] = 1.0
        return a
    
    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))
        
    def act(self, s_prime, r, update=False, done=False):
        if update:
            self.remember(self.s, self.a, r, s_prime, done)
        
        self.s = s_prime
        if np.random.rand() <= self.epsilon:
            self.a = random.randrange(self.action_size)
        else:
            act_values = self.model.predict(self.vectorize(s_prime))
            self.a = np.argmax(act_values[0])  # returns action
        return self.a
    
    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
              target = reward + self.gamma * \
                       np.amax(self.model.predict(self.vectorize(next_state))[0])
            target_f = self.model.predict(self.vectorize(state))
            target_f[0][action] = target
            
            self.model.fit(self.vectorize(state), target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

In [25]:
import datetime as dt
import pandas as pd
import matplotlib.pyplot as plt

from util import create_df_benchmark, get_data
from marketsim import compute_portvals_single_symbol, market_simulator
from strategy import StrategyLearner

In [26]:
# Let's train up a portfolio given a really tough time market wise

train_start_date = dt.datetime(2007, 1, 1)
train_end_date   = dt.datetime(2007, 12, 31)
portfolio_start_date = dt.datetime(2008, 1, 1)
portfolio_end_date = dt.datetime(2008, 12, 31)

start_val = 100000
symbol = "SPY"
commission = 0.00
impact = 0.0
num_shares = 1000

df_benchmark_trades = create_df_benchmark(symbol, train_start_date, train_end_date, 
                                              num_shares)
print(df_benchmark_trades)

            Shares
Date              
2007-01-03    1000
2007-12-31   -1000


In [27]:
# Train and test a StrategyLearner
# Set verbose to True will print out and plot the cumulative return for each training epoch
learner = DQNAgent(state_size=3000, action_size=3)
stl = StrategyLearner(num_shares=num_shares, impact=impact, 
                      commission=commission, verbose=True,
                      learner=learner)
stl.add_evidence(symbol=symbol, start_val=start_val, 
                 start_date=train_start_date, end_date=train_end_date)
df_trades = stl.test_policy(symbol=symbol, 
                            start_date=train_start_date,
                            end_date=train_end_date)

1 -0.08690000000000053
2 -0.07689999999999997
3 -0.12340000000000029
4 -0.18510000000000004
5 -0.049000000000000044
6 0.13860000000000006
7 0.035700000000000065
8 -0.14799999999999947


KeyboardInterrupt: 

In [22]:
spy_df = get_data(['SPY'], pd.date_range(train_start_date, train_end_date), addSPY=False)
spy_df.rolling(2).apply(lambda x: x[1]/x[0]-1)

  


Unnamed: 0,SPY
2007-01-03,
2007-01-04,0.002133
2007-01-05,-0.007963
2007-01-08,0.004609
2007-01-09,-0.000870
2007-01-10,0.003325
2007-01-11,0.004419
2007-01-12,0.007542
2007-01-16,-0.001949
2007-01-17,0.000469


In [28]:
learner.memory

[(1545, 0, -0.010270996286639855, 555, False),
 (555, 1, -0.004966887417218402, 446, False),
 (446, 0, -0.016053405388222197, 446, False),
 (446, 1, -0.019539593386599163, 967, False),
 (967, 1, -0.008471892319873442, 124, False),
 (124, 2, 0.0, 1123, False),
 (1123, 0, 0.0007856693903205425, 122, False),
 (122, 1, 0.003119151590767366, 443, False),
 (443, 0, -0.0055473083834673975, 864, False),
 (864, 0, -0.011089418196017053, 986, False),
 (986, 2, 0.0, 1986, False),
 (1986, 2, 0.014233205599436971, 2777, False),
 (2777, 1, 0.01765121204989417, 2666, False),
 (2666, 0, 0.0, 1775, False),
 (1775, 1, 0.0, 1567, False),
 (1567, 0, -0.011776713511815862, 336, False),
 (336, 0, -0.009273084479371363, 125, False),
 (125, 0, -0.018320490643182863, 446, False),
 (446, 2, 0.0, 1856, False),
 (1856, 1, 0.0, 1855, False),
 (1855, 1, 0.0, 1654, False),
 (1654, 2, 0.008484381025838816, 2755, False),
 (2755, 1, 0.0077878016809314055, 2655, False),
 (2655, 0, 0.0, 1554, False),
 (1554, 2, 0.0006153