# DQN Trading Agent

## Read database

In [1]:
#TO READ THE DATABASE
import h5py

DB_FILE = "../data/dataset_1h_1000.hdf5"
DB = h5py.File(DB_FILE, "r")

for item in DB:
    print(item)

bitcoin_usd
etherium_usd
ripple_usd


In [2]:
# Show the values of the first timestep
print(DB["bitcoin_usd"][0:2])

[[  1.51543080e+12   1.47930000e+04   1.46940000e+04   1.48600000e+04
    1.46690000e+04   7.86847895e+02]
 [  1.51542720e+12   1.44000000e+04   1.47890000e+04   1.49230000e+04
    1.43470000e+04   4.20724394e+03]]


## Define packages

In [3]:
import random
import numpy as np

## Define DQN Agent

In [4]:
from collections import deque
from keras.models import Model 
from keras.layers import Input, Dense, Flatten, Reshape
from keras.optimizers import Adam
from keras import backend as K

K.set_image_dim_ordering('tf')

class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95    # discount rate
        self.epsilon = 1.0  # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()

    def _build_model(self):
        # Neural Net for Deep-Q learning Model
        inputs = Input(shape=(6,100))
        x = Flatten()(inputs)
        x = Dense(32, activation='relu')(x)
        x = Dense(32, activation='relu')(x)
        x = Dense(16, activation='relu')(x)
        out = Dense(self.action_size, activation='linear')(x)
        
        model = Model(inputs=inputs, outputs=out)
        model.compile(loss='mse',
                      optimizer=Adam(lr=self.learning_rate))
        print(model.summary())
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            # The agent acts randomly
            return random.randrange(self.action_size)
        
        # Predict the reward value based on the given state
        act_values = self.model.predict(state)
        
        # Pick the action based on the predicted reward
        return np.argmax(act_values[0])  # returns action

    def replay(self, batch_size):
        # Sample minibatch from the memory
        minibatch = random.sample(self.memory, batch_size)
        
        # Extract informations from each memory
        for state, action, reward, next_state, done in minibatch:
            
            # if done, make our target reward
            target = reward
            if not done:
                # predict the future discounted reward
                target = (reward + self.gamma *
                          np.amax(self.model.predict(next_state)[0]))
                
            # make the agent to approximately map
            # the current state to future discounted reward
            # We'll call that target_f
            target_f = self.model.predict(state)
            target_f[0][action] = target
            
            # Train the Neural Net with the state and target_f
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)

Using TensorFlow backend.


## Define Trading Game

In [5]:
class TradingGame:
    def __init__(self, start_capital, state_size, database):
        self.start_capital = start_capital
        self.state_size = state_size
        
        self.database = np.asarray(database["bitcoin_usd"])
        self.capital = start_capital
        self.timestep = 0
        self.state = self.database[0:self.state_size]
        
    def reset(self):
        self.capital = self.start_capital
        self.timestep = 0
        self.state = self.database[0:self.state_size]
        
        return self.state
    
    
    ### Next state function
    def calc_next_state(self):
        self.timestep += 1
        
        if (self.state_size+self.timestep) < self.database.shape[0]:
            next_state = self.database[self.timestep:self.state_size+self.timestep]
            done = False
        else:
            next_state = self.state
            done = True
            
        return next_state, done
    
    ### Reward function
    def calc_reward(self, action, next_state):
        # Calculate price difference
        price = self.state[self.state_size-1, 2]
        last_price = next_state[self.state_size-1, 2]
        diff = last_price - price
        
        # give -1 when wrong and 1 when correct
        
        if (diff < 0 and action == 0):
            reward = 1
        elif (diff > 0 and action == 1):
            reward = 1
        else:
            reward = -1
        
        return reward
    
    ### Next step function
    def step(self, action):
        # Get next state
        next_state, done = self.calc_next_state()
        
        # Get reward
        reward = self.calc_reward(action, next_state)
        
        # check if done
        self.capital += reward
        if self.capital == 0:
            done = True
        
        # update state
        self.state = next_state
        
        return next_state, reward, done
    
    def get_score(self):
        return self.capital

## Train the agent

### Set parameters

In [6]:
# Parameter for Agent
state_size = 100 # Time frame
#state_attributes = 8
action_size = 2 # buy, sell, Hold

# Parameter for Trading
batch_size = 32
EPISODES = 5000
CAPITAL = 100
#POSITION = 0

### Training

In [7]:
# Create agent
agent = DQNAgent(state_size, action_size)

# Initialize Environment
env = TradingGame(CAPITAL, state_size, DB)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 6, 100)            0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 600)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 32)                19232     
_________________________________________________________________
dense_2 (Dense)              (None, 32)                1056      
_________________________________________________________________
dense_3 (Dense)              (None, 16)                528       
_________________________________________________________________
dense_4 (Dense)              (None, 2)                 34        
Total params: 20,850
Trainable params: 20,850
Non-trainable params: 0
_________________________________________________________________
None


In [8]:
#Episodes to train
for e in range(EPISODES):
    
    #Create initial state from time frame
    state = env.reset()
    state = np.reshape(state, [1, 6, state_size])

    #time frames
    for time_frame in range(state_size, 1000):
        
        #Agent takes action
        action = agent.act(state)

        #Calc reward
        next_state, reward, done = env.step(action)
        next_state = np.reshape(next_state, [1, 6, state_size])
        
        #Remember action
        agent.remember(state, action, reward, next_state, done)
        
        #Override state with next state
        state = next_state
        
        if done:
            print("episode: {}/{}, score: {}, e: {:.2}"
                  .format(e, EPISODES, env.get_score(), agent.epsilon))
            break
       
    #Replay
    if len(agent.memory) > batch_size:
        agent.replay(batch_size)
    # if e % 10 == 0:
    #     agent.save("./save/cartpole-dqn.h5")
        
    

episode: 0/5000, score: 128, e: 1.0
episode: 1/5000, score: 140, e: 0.99
episode: 2/5000, score: 92, e: 0.99
episode: 3/5000, score: 106, e: 0.99
episode: 4/5000, score: 88, e: 0.98
episode: 5/5000, score: 96, e: 0.98
episode: 6/5000, score: 84, e: 0.97
episode: 7/5000, score: 82, e: 0.97
episode: 8/5000, score: 114, e: 0.96
episode: 9/5000, score: 70, e: 0.96
episode: 10/5000, score: 44, e: 0.95
episode: 11/5000, score: 44, e: 0.95
episode: 12/5000, score: 112, e: 0.94
episode: 13/5000, score: 102, e: 0.94
episode: 14/5000, score: 74, e: 0.93
episode: 15/5000, score: 148, e: 0.93
episode: 16/5000, score: 98, e: 0.92
episode: 17/5000, score: 128, e: 0.92
episode: 18/5000, score: 56, e: 0.91
episode: 19/5000, score: 96, e: 0.91
episode: 20/5000, score: 56, e: 0.9
episode: 21/5000, score: 110, e: 0.9
episode: 22/5000, score: 90, e: 0.9
episode: 23/5000, score: 92, e: 0.89
episode: 24/5000, score: 88, e: 0.89
episode: 25/5000, score: 126, e: 0.88
episode: 26/5000, score: 130, e: 0.88
epis

episode: 216/5000, score: 122, e: 0.34
episode: 217/5000, score: 114, e: 0.34
episode: 218/5000, score: 80, e: 0.34
episode: 219/5000, score: 140, e: 0.33
episode: 220/5000, score: 142, e: 0.33
episode: 221/5000, score: 110, e: 0.33
episode: 222/5000, score: 154, e: 0.33
episode: 223/5000, score: 98, e: 0.33
episode: 224/5000, score: 164, e: 0.33
episode: 225/5000, score: 118, e: 0.32
episode: 226/5000, score: 148, e: 0.32
episode: 227/5000, score: 118, e: 0.32
episode: 228/5000, score: 84, e: 0.32
episode: 229/5000, score: 116, e: 0.32
episode: 230/5000, score: 124, e: 0.32
episode: 231/5000, score: 126, e: 0.31
episode: 232/5000, score: 102, e: 0.31
episode: 233/5000, score: 112, e: 0.31
episode: 234/5000, score: 150, e: 0.31
episode: 235/5000, score: 140, e: 0.31
episode: 236/5000, score: 130, e: 0.31
episode: 237/5000, score: 140, e: 0.3
episode: 238/5000, score: 140, e: 0.3
episode: 239/5000, score: 122, e: 0.3
episode: 240/5000, score: 142, e: 0.3
episode: 241/5000, score: 136, e

episode: 427/5000, score: 162, e: 0.12
episode: 428/5000, score: 144, e: 0.12
episode: 429/5000, score: 134, e: 0.12
episode: 430/5000, score: 136, e: 0.12
episode: 431/5000, score: 130, e: 0.12
episode: 432/5000, score: 130, e: 0.11
episode: 433/5000, score: 134, e: 0.11
episode: 434/5000, score: 142, e: 0.11
episode: 435/5000, score: 154, e: 0.11
episode: 436/5000, score: 158, e: 0.11
episode: 437/5000, score: 130, e: 0.11
episode: 438/5000, score: 128, e: 0.11
episode: 439/5000, score: 128, e: 0.11
episode: 440/5000, score: 144, e: 0.11
episode: 441/5000, score: 126, e: 0.11
episode: 442/5000, score: 132, e: 0.11
episode: 443/5000, score: 116, e: 0.11
episode: 444/5000, score: 138, e: 0.11
episode: 445/5000, score: 152, e: 0.11
episode: 446/5000, score: 134, e: 0.11
episode: 447/5000, score: 160, e: 0.11
episode: 448/5000, score: 144, e: 0.11
episode: 449/5000, score: 110, e: 0.11
episode: 450/5000, score: 126, e: 0.1
episode: 451/5000, score: 146, e: 0.1
episode: 452/5000, score: 1

episode: 634/5000, score: 136, e: 0.042
episode: 635/5000, score: 138, e: 0.041
episode: 636/5000, score: 146, e: 0.041
episode: 637/5000, score: 140, e: 0.041
episode: 638/5000, score: 144, e: 0.041
episode: 639/5000, score: 146, e: 0.041
episode: 640/5000, score: 146, e: 0.04
episode: 641/5000, score: 150, e: 0.04
episode: 642/5000, score: 142, e: 0.04
episode: 643/5000, score: 132, e: 0.04
episode: 644/5000, score: 144, e: 0.04
episode: 645/5000, score: 124, e: 0.039
episode: 646/5000, score: 134, e: 0.039
episode: 647/5000, score: 144, e: 0.039
episode: 648/5000, score: 140, e: 0.039
episode: 649/5000, score: 146, e: 0.039
episode: 650/5000, score: 134, e: 0.038
episode: 651/5000, score: 122, e: 0.038
episode: 652/5000, score: 134, e: 0.038
episode: 653/5000, score: 150, e: 0.038
episode: 654/5000, score: 144, e: 0.038
episode: 655/5000, score: 162, e: 0.038
episode: 656/5000, score: 134, e: 0.037
episode: 657/5000, score: 158, e: 0.037
episode: 658/5000, score: 144, e: 0.037
episo

episode: 840/5000, score: 140, e: 0.015
episode: 841/5000, score: 146, e: 0.015
episode: 842/5000, score: 140, e: 0.015
episode: 843/5000, score: 142, e: 0.015
episode: 844/5000, score: 138, e: 0.015
episode: 845/5000, score: 140, e: 0.014
episode: 846/5000, score: 148, e: 0.014
episode: 847/5000, score: 148, e: 0.014
episode: 848/5000, score: 136, e: 0.014
episode: 849/5000, score: 144, e: 0.014
episode: 850/5000, score: 136, e: 0.014
episode: 851/5000, score: 138, e: 0.014
episode: 852/5000, score: 144, e: 0.014
episode: 853/5000, score: 136, e: 0.014
episode: 854/5000, score: 142, e: 0.014
episode: 855/5000, score: 138, e: 0.014
episode: 856/5000, score: 150, e: 0.014
episode: 857/5000, score: 150, e: 0.014
episode: 858/5000, score: 140, e: 0.014
episode: 859/5000, score: 136, e: 0.013
episode: 860/5000, score: 136, e: 0.013
episode: 861/5000, score: 156, e: 0.013
episode: 862/5000, score: 144, e: 0.013
episode: 863/5000, score: 138, e: 0.013
episode: 864/5000, score: 148, e: 0.013


episode: 1048/5000, score: 144, e: 0.01
episode: 1049/5000, score: 142, e: 0.01
episode: 1050/5000, score: 146, e: 0.01
episode: 1051/5000, score: 142, e: 0.01
episode: 1052/5000, score: 134, e: 0.01
episode: 1053/5000, score: 140, e: 0.01
episode: 1054/5000, score: 140, e: 0.01
episode: 1055/5000, score: 142, e: 0.01
episode: 1056/5000, score: 144, e: 0.01
episode: 1057/5000, score: 136, e: 0.01
episode: 1058/5000, score: 140, e: 0.01
episode: 1059/5000, score: 140, e: 0.01
episode: 1060/5000, score: 148, e: 0.01
episode: 1061/5000, score: 150, e: 0.01
episode: 1062/5000, score: 148, e: 0.01
episode: 1063/5000, score: 144, e: 0.01
episode: 1064/5000, score: 152, e: 0.01
episode: 1065/5000, score: 152, e: 0.01
episode: 1066/5000, score: 138, e: 0.01
episode: 1067/5000, score: 144, e: 0.01
episode: 1068/5000, score: 134, e: 0.01
episode: 1069/5000, score: 144, e: 0.01
episode: 1070/5000, score: 140, e: 0.01
episode: 1071/5000, score: 146, e: 0.01
episode: 1072/5000, score: 148, e: 0.01


KeyboardInterrupt: 

def calc_next_state(state):
#Calc Capital (action == 0 then sell, action == 1 then buy) 5 coins * state_close_price
        #Calc position (current position -/+ differ from action)
        #Get newest state item and append capital and position
        #Remove latest entry and add new state item