# DQN Trading Agent

## Read database

In [1]:
#TO READ THE DATABASE
import h5py

DB_FILE = "../data/dataset_1h_1000.hdf5"
DB = h5py.File(DB_FILE, "r")

for item in DB:
    print(item)

bitcoin_usd
etherium_usd
ripple_usd


In [2]:
# Show the values of the first timestep
print(DB["bitcoin_usd"][0:2])

[[  1.51543080e+12   1.47930000e+04   1.46940000e+04   1.48600000e+04
    1.46690000e+04   7.86847895e+02]
 [  1.51542720e+12   1.44000000e+04   1.47890000e+04   1.49230000e+04
    1.43470000e+04   4.20724394e+03]]


## Define packages

In [3]:
import random
import numpy as np

## Define DQN Agent

In [4]:
from collections import deque
from keras.models import Model 
from keras.layers import Input, Dense, Flatten, Reshape
from keras.optimizers import Adam
from keras import backend as K
import keras

K.set_image_dim_ordering('tf')

class DQNAgent:
    def __init__(self, frame_size, frame_parameter_number, aux_number, action_size):
        self.frame_size = frame_size
        self.frame_parameter_number =  frame_parameter_number
        self.aux_number = aux_number
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95    # discount rate
        self.epsilon = 1.0  # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()

    def _build_model(self):
        # Neural Net for Deep-Q learning Model
        # Main input (the time frame)
        main_input = Input(shape=(frame_parameter_number,self.frame_size))
        main = Flatten()(main_input)
        
        # Additional input (other inputs)
        aux_input = Input(shape=(aux_number,))
        
        x = keras.layers.concatenate([main, aux_input])
        x = Dense(32, activation='relu')(x)
        x = Dense(32, activation='relu')(x)
        x = Dense(16, activation='relu')(x)
        out = Dense(self.action_size, activation='linear')(x)
        
        model = Model(inputs=[main_input, aux_input], outputs=out)
        model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))
        print(model.summary())
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            # The agent acts randomly
            return random.randrange(self.action_size)
        
        # Predict the reward value based on the given state
        act_values = self.model.predict(state)
        
        # Pick the action based on the predicted reward
        return np.argmax(act_values[0])  # returns action

    def replay(self, batch_size):
        # Sample minibatch from the memory
        minibatch = random.sample(self.memory, batch_size)
        
        # Extract informations from each memory
        for state, action, reward, next_state, done in minibatch:
            
            # if done, make our target reward
            target = reward
            if not done:
                # predict the future discounted reward
                target = (reward + self.gamma *
                          np.amax(self.model.predict(next_state)[0]))
                
            # make the agent to approximately map
            # the current state to future discounted reward
            # We'll call that target_f
            target_f = self.model.predict(state)
            target_f[0][action] = target
            
            # Train the Neural Net with the state and target_f
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)

Using TensorFlow backend.


## Define Trading Game

In [5]:
class TradingGame:
    def __init__(self, start_capital, frame_size, database):
        self.start_capital = start_capital
        self.frame_size = frame_size
        
        self.database = np.asarray(database["bitcoin_usd"])
        self.capital = start_capital
        self.timestep = 0
        self.time_frame = self.database[0:self.frame_size]
        
        self.position = 0
        self.state = None
    
    ### Reset function
    def reset(self):
        # Set everything to the initial start value
        self.capital = self.start_capital
        self.timestep = 0
        self.time_frame = self.database[0:self.frame_size]
        
        self.position = 0.0
        
        # state => (time_frame, capital, position)
        frame = np.reshape(self.time_frame, [1, 6, frame_size])
        aux = np.reshape([self.capital, self.position], [1, 2])
        self.state = [frame, aux]
        
        return self.state
    
    
    ### Next frame function
    def get_next_frame(self):
        self.timestep += 1
        
        if (self.frame_size+self.timestep) < self.database.shape[0]:
            next_frame = self.database[self.timestep:self.frame_size+self.timestep]
            done = False
        else:
            next_frame = self.time_frame
            done = True
            
        return next_frame, done
    
    ### Reward function
    def calc_reward(self, action, next_frame):
        # Get new price
        new_price = next_frame[self.frame_size-1, 2]
        
        # Check the action and calculate the reward
        #hold
        if action == 0: 
            reward = 0
            
        #buy
        elif action == 1: 
            if self.position is 0:
                reward = 0
                self.position = new_price
            else:
                reward = -100
                
        #sell
        elif action == 2: 
            if self.position is not 0:
                reward = new_price - self.position
                self.position = 0
            else:
                reward = -100
        return reward
    
    ### Next step function
    def step(self, action):
        # Get next time frame
        next_frame, done = self.get_next_frame()
        
        # Get reward
        reward = self.calc_reward(action, next_frame)
        
        # check if done
        self.capital += reward
        if self.capital <= 0:
            done = True
            
        # Next state
        next_frame = np.reshape(next_frame, [1, 6, frame_size])
        next_aux = np.reshape([self.capital, self.position], [1, 2])
        next_state = [next_frame, next_aux]
        
        # update state
        self.state = next_state
        
        return next_state, reward, done
    
    def get_score(self):
        return self.capital

## Train the agent

### Set parameters

In [6]:
# Parameter for Agent
frame_size = 100 # Time frame
frame_parameter_number = 6
aux_number = 2 # Additional parameters
#state_attributes = 8
action_size = 3 # hold, buy, sell

# Parameter for Trading
batch_size = 64
EPISODES = 1000
CAPITAL = 10000
#POSITION = 0

### Training

In [7]:
# Create agent
agent = DQNAgent(frame_size, frame_parameter_number, aux_number, action_size)

# Initialize Environment
env = TradingGame(CAPITAL, frame_size, DB)

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, 6, 100)        0                                            
____________________________________________________________________________________________________
flatten_1 (Flatten)              (None, 600)           0                                            
____________________________________________________________________________________________________
input_2 (InputLayer)             (None, 2)             0                                            
____________________________________________________________________________________________________
concatenate_1 (Concatenate)      (None, 602)           0                                            
___________________________________________________________________________________________

In [8]:
#Episodes to train
for e in range(EPISODES):
    
    #Create initial state from time frame
    state = env.reset()
    #state = np.asarray(state)
    #state = np.reshape(state, [1, 6, frame_size,])

    #time frames
    for time_frame in range(frame_size, 1000):
        
        #Agent takes action
        action = agent.act(state)

        #Calc reward
        next_state, reward, done = env.step(action)
        #print(next_state)
        #next_state = np.reshape(next_state, [1, 6, frame_size])
        
        #Remember action
        agent.remember(state, action, reward, next_state, done)
        
        #Override state with next state
        state = next_state
        
        if done:
            print("episode: {}/{}, score: {}, e: {:.2}"
                  .format(e, EPISODES, env.get_score(), agent.epsilon))
            break
       
    #Replay
    if len(agent.memory) > batch_size:
        agent.replay(batch_size)
    # if e % 10 == 0:
    #     agent.save("./save/cartpole-dqn.h5")
        
    

episode: 0/1000, score: -88.36150334999911, e: 1.0
episode: 1/1000, score: -90.15182759999698, e: 0.99
episode: 2/1000, score: -276.6328506600021, e: 0.99
episode: 3/1000, score: -198.0339092000013, e: 0.99
episode: 4/1000, score: -17.93682326000271, e: 0.98
episode: 5/1000, score: -61.94404779999604, e: 0.98
episode: 6/1000, score: -142.58679401000154, e: 0.97
episode: 7/1000, score: -3.0440589400004683, e: 0.97
episode: 8/1000, score: -57.00018748999901, e: 0.96
episode: 9/1000, score: -70.64710756000022, e: 0.96
episode: 10/1000, score: -43.204291679991, e: 0.95
episode: 11/1000, score: -26.18975236999904, e: 0.95
episode: 12/1000, score: -27.561153900001955, e: 0.94
episode: 13/1000, score: -70.00811117000194, e: 0.94
episode: 14/1000, score: -94.12197295999977, e: 0.93
episode: 15/1000, score: -40.0129502999971, e: 0.93
episode: 16/1000, score: -24.35073328999897, e: 0.92
episode: 17/1000, score: -531.3749491899998, e: 0.92
episode: 18/1000, score: -43.40953909999371, e: 0.91
epis

episode: 156/1000, score: 1030.2919639699994, e: 0.46
episode: 157/1000, score: 5651.535939089996, e: 0.46
episode: 158/1000, score: 3365.2138856999973, e: 0.45
episode: 159/1000, score: 10437.13544089, e: 0.45
episode: 160/1000, score: 9559.892046009996, e: 0.45
episode: 161/1000, score: 5536.858533669998, e: 0.45
episode: 162/1000, score: 12549.009331440007, e: 0.44
episode: 163/1000, score: 8452.227692240007, e: 0.44
episode: 164/1000, score: -41.542372949999844, e: 0.44
episode: 165/1000, score: 11907.14159323, e: 0.44
episode: 166/1000, score: 9245.054889710002, e: 0.44
episode: 167/1000, score: 4400.214973079996, e: 0.43
episode: 168/1000, score: 6734.521878520003, e: 0.43
episode: 169/1000, score: 7361.844324629999, e: 0.43
episode: 170/1000, score: 558.2857337300029, e: 0.43
episode: 171/1000, score: 11116.427702539999, e: 0.42
episode: 172/1000, score: 12013.730724890003, e: 0.42
episode: 173/1000, score: 18934.886372740002, e: 0.42
episode: 174/1000, score: 6676.299999999999,

episode: 313/1000, score: 13382.48703139, e: 0.21
episode: 314/1000, score: 16834.513996069996, e: 0.21
episode: 315/1000, score: 16275.0, e: 0.21
episode: 316/1000, score: 15775.252229349999, e: 0.21
episode: 317/1000, score: 15897.183525300003, e: 0.2
episode: 318/1000, score: 8578.271202180003, e: 0.2
episode: 319/1000, score: 12747.97203702, e: 0.2
episode: 320/1000, score: 5569.104767150002, e: 0.2
episode: 321/1000, score: 12331.00608098, e: 0.2
episode: 322/1000, score: 21948.127347020003, e: 0.2
episode: 323/1000, score: 16490.371891519997, e: 0.2
episode: 324/1000, score: 18727.500000000004, e: 0.2
episode: 325/1000, score: 15795.530880389999, e: 0.2
episode: 326/1000, score: 17786.52259588, e: 0.2
episode: 327/1000, score: 17481.2, e: 0.19
episode: 328/1000, score: 14897.9, e: 0.19
episode: 329/1000, score: 12925.783831469998, e: 0.19
episode: 330/1000, score: 17132.805291180004, e: 0.19
episode: 331/1000, score: 18595.47899748999, e: 0.19
episode: 332/1000, score: 20591.2010

episode: 476/1000, score: 20309.798368490003, e: 0.092
episode: 477/1000, score: 10611.758019090004, e: 0.092
episode: 478/1000, score: 14588.7, e: 0.091
episode: 479/1000, score: 17630.48039625, e: 0.091
episode: 480/1000, score: 19404.899028400003, e: 0.09
episode: 481/1000, score: 15906.156932799999, e: 0.09
episode: 482/1000, score: 22878.491963970002, e: 0.089
episode: 483/1000, score: 24487.0, e: 0.089
episode: 484/1000, score: 25979.342741369997, e: 0.088
episode: 485/1000, score: 20117.592708289998, e: 0.088
episode: 486/1000, score: 21690.5, e: 0.088
episode: 487/1000, score: 16545.4, e: 0.087
episode: 488/1000, score: 20299.4, e: 0.087
episode: 489/1000, score: 17679.9, e: 0.086
episode: 490/1000, score: 16127.609741009997, e: 0.086
episode: 491/1000, score: 24071.9011404, e: 0.085
episode: 492/1000, score: 27715.0, e: 0.085
episode: 493/1000, score: 19060.08198568, e: 0.084
episode: 494/1000, score: 18332.731896440004, e: 0.084
episode: 495/1000, score: 18617.4, e: 0.084
epi

episode: 646/1000, score: 26416.23122289, e: 0.039
episode: 647/1000, score: 20071.0, e: 0.039
episode: 648/1000, score: 16453.0, e: 0.039
episode: 649/1000, score: 28629.0, e: 0.039
episode: 650/1000, score: 23811.0, e: 0.038
episode: 651/1000, score: 20213.0, e: 0.038
episode: 652/1000, score: 25092.76654711, e: 0.038
episode: 653/1000, score: 26205.0, e: 0.038
episode: 654/1000, score: 21065.1, e: 0.038
episode: 655/1000, score: 21399.0, e: 0.038
episode: 656/1000, score: 21411.59263752, e: 0.037
episode: 657/1000, score: 28324.0, e: 0.037
episode: 658/1000, score: 25830.030880390004, e: 0.037
episode: 659/1000, score: 19546.0, e: 0.037
episode: 660/1000, score: 17731.0, e: 0.037
episode: 661/1000, score: 19439.0, e: 0.036
episode: 662/1000, score: 22672.22274994, e: 0.036
episode: 663/1000, score: 28054.0, e: 0.036
episode: 664/1000, score: 25485.657258630003, e: 0.036
episode: 665/1000, score: 22413.698349860002, e: 0.036
episode: 666/1000, score: 23603.0, e: 0.035
episode: 667/10

episode: 827/1000, score: 26272.0, e: 0.016
episode: 828/1000, score: 28108.0, e: 0.016
episode: 829/1000, score: 23322.0, e: 0.016
episode: 830/1000, score: 21424.0, e: 0.016
episode: 831/1000, score: 27387.0, e: 0.016
episode: 832/1000, score: 27325.0, e: 0.015
episode: 833/1000, score: 24050.0, e: 0.015
episode: 834/1000, score: 26887.0, e: 0.015
episode: 835/1000, score: 22522.10076046, e: 0.015
episode: 836/1000, score: 26545.21734077, e: 0.015
episode: 837/1000, score: 26036.0, e: 0.015
episode: 838/1000, score: 17829.1943398, e: 0.015
episode: 839/1000, score: 24872.0, e: 0.015
episode: 840/1000, score: 21090.0, e: 0.015
episode: 841/1000, score: 30155.69836849, e: 0.015
episode: 842/1000, score: 23808.0, e: 0.015
episode: 843/1000, score: 16249.0, e: 0.015
episode: 844/1000, score: 27566.0, e: 0.015
episode: 845/1000, score: 18551.0, e: 0.014
episode: 846/1000, score: 13641.0, e: 0.014
episode: 847/1000, score: 16223.0, e: 0.014
episode: 848/1000, score: 16894.0, e: 0.014
episo

def calc_next_state(state):
#Calc Capital (action == 0 then sell, action == 1 then buy) 5 coins * state_close_price
        #Calc position (current position -/+ differ from action)
        #Get newest state item and append capital and position
        #Remove latest entry and add new state item