In [1]:
#numpy
import numpy as np

#記錄用的小工具
import logging

#keras
import keras.backend as K
from keras.models import Model
from keras.layers import Dense, Activation, Flatten, Permute
from keras.optimizers import Adam
from keras.layers import Input, Dense, concatenate, Lambda, Conv2D, Reshape

#OpenAI gym
import gym
from gym import error, spaces, utils
from gym.utils import seeding
import math

#keras-rl
from rl.agents.dqn import DQNAgent
from rl.policy import LinearAnnealedPolicy, BoltzmannQPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory
from rl.agents.dqn import DQNAgent
from rl.core import Env
from rl.callbacks import FileLogger, ModelIntervalCheckpoint

#讀資料用
import pandas
import os, sys, csv
import pandas as pd
import numpy as np
import pickle



Using TensorFlow backend.


In [2]:
#這邊要放你的資料路徑
ACIM_data_frame = pandas.read_csv(filepath_or_buffer = os.getcwd()+"/etf_data/ACIM.csv")

#這是我們的Data，長的超級可愛，如果你沒看過，現在讓你看看:
ACIM_data_frame




Unnamed: 0,Date,Open,High,Low,Close,Volume,Adj Close
0,2017-02-15,68.290001,68.519997,68.150002,68.459999,5900,68.459999
1,2017-02-14,67.930000,68.239998,67.860001,68.129997,6300,68.129997
2,2017-02-13,68.040001,68.169998,67.820000,68.120003,6400,68.120003
3,2017-02-10,67.669998,67.809998,67.470001,67.750000,2800,67.750000
4,2017-02-09,67.059998,67.559998,67.059998,67.500000,6700,67.500000
5,2017-02-08,67.019997,67.050003,66.959999,66.959999,1300,66.959999
6,2017-02-07,66.949997,67.080002,66.730003,66.930000,8800,66.930000
7,2017-02-06,67.180000,67.180000,66.889999,67.080002,8900,67.080002
8,2017-02-03,67.339996,67.339996,66.930000,67.110001,23400,67.110001
9,2017-02-02,66.779999,66.790001,66.570000,66.790001,11000,66.790001


In [3]:
#整理data的函式
def drop(data):
    '''
    input: etf dataframe
    output: train dataframe, test dataframe, train date, test date
    '''
    data_new = data.loc[(data["Open"]!=0)&
                        (data["High"]!=0)&
                        (data["Low"]!=0)&
                        (data["Close"]!=0)&
                        (data["Volume"]!=0)&
                        (data["Adj Close"]!=0)]
    data_new = data_new.reset_index(drop=True)
    
    data_new_reverse = data_new.iloc[::-1]
    data_new_reverse = data_new_reverse.reset_index(drop=True)
    
    train_df = data_new_reverse[data_new_reverse.Date<'2016']
    test_df  = data_new_reverse[data_new_reverse.Date>'2016']
    train_df = train_df.reset_index(drop=True)
    test_df = test_df.reset_index(drop=True)
        
    train_date = list(train_df['Date'])
    del train_df['Date']
    
    test_date = list(test_df['Date'])
    del test_df['Date']
    
    return train_df, test_df, train_date, test_date


# In[5]:

#整理資料
ACIM_train_df, ACIM_test_df, ACIM_train_date, ACIM_train_date = drop(ACIM_data_frame)

#準備當作輸入的資料
ACIM_train_array = ACIM_train_df.values

#來看一下整理後的資料形狀
ACIM_train_array.shape




(758, 6)

In [4]:
#重頭戲來了，我們需要定義一個完整的RL模型，讓keras-rl跟OpenAI gym可以幫我們跑

#裡面有些東西是一定要填的，是OpenAI環境模板的規定

class ETF_Game(Env):
    
    #環境的初始化（毫不猶豫，一定要填）
    def __init__(self):
        
        #狀態空間、動作空間，以及reward的定義必須依照gym的資料結構
        self.action_space = spaces.Discrete(21)
        self.observation_space = spaces.Box(low = 0, high = math.inf, shape = (20, 6))
        self.reward_range = (-(math.inf), math.inf)
        
        #隨便取個名字，方便我們存資料
        self.name = "ETF"
        
        #設定隨機的seed
        self.seed()
        
        #重設遊戲
        self.reset()
        
    #盤面的重設，一定要填    
    def reset(self):
        
        #取得資料作為遊戲盤面
        self.board = ACIM_train_array
        
        #初始化日期(由於需要觀察之前的資料作為data，故從第20天開始)
        self.day = 20
        
        #最大天數 ＝ 矩陣的最大長度
        self.max_day = self.board.shape[0]
        
        
        #初始現金
        self.cash = 100000
        
        #初始股票數
        self.num_of_stock = 0
        self.financial_assets = 0
        
        #初始資產價值
        self.assets = self.cash+self.financial_assets
        self.assets_last_time = self.assets
        
        #初始股票價格 
        self.price = self.board[self.day-1][3]

        #表示遊戲結束與否        
        self.DONE = False

        #回傳初始盤面（gym的規定）
        return self.get_observation()
        
    #每一回合的執行（包括選擇動作、更新現有資產情況、計算reward等等）（一定要填）    
    def step(self, action):
    
        #動作會介在0~20之間，分別set as 0:Don't move, 1~10:buy 1~10, 11~20:sell 1~10。(測試)
        if action > 10:
            action = (action-10)*(-1)                

        # “現金” < "當前股票價格“ 時，不得購買。
        if self.cash < self.price*action :
            action = int(self.cash/self.price)
           
        # “股票數” = 0 時，不得賣出
        if self.num_of_stock < action*(-1):
            action = self.num_of_stock*-1
        
        #紀錄
        logging.debug("Action {}".format(action))
        
        #本次購買量
        purchance_quantity = 0
        
        # action
        if action == 0:
            purchance_quantity = 0
            logging.debug("Hold")
        elif action > 0:
            purchance_quantity = action
            logging.debug("Buy")
        elif action < 0:
            purchance_quantity = action
            logging.debug("Sell")
         
        #已購買的股票張數更動
        self.num_of_stock += purchance_quantity
        
        #現存股票價值更動
        self.financial_assets = self.num_of_stock*self.price
        
        #現金更動
        self.cash -= purchance_quantity*self.price
        
        #總資產價值更動
        self.assets = self.cash+self.financial_assets
        
        #reward for this time
        reward = self.assets-self.assets_last_time
        
        #record today's asset
        self.assets_last_time = self.assets
        
        #天數+1
        self.day +=1

        #update股票價格 
        self.price = self.board[self.day-1][3]
        
        #get now observation for today
        observation = self.get_observation()
        
        #當自身資產歸零，或是到達最大天數時，遊戲結束，其餘均繼續進行。
        done = None
        if self.assets == 0 or self.day == self.max_day:
            done = True
            self.DONE = True
        else:
            done = False
        
        #這裡必須回傳特定資料作為紀錄（格式是字典檔），因為我們目前沒有需要，所以隨便設個空的字典檔。
        info = dict()
        
        #這裡要回傳什麼，回傳的順序，都是gym規定的
        return observation, reward, done, info
            
            
    #定義一個方式，讓環境可以roll出隨機的數字（一定要填）    
    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]
    
    
    #取得當前狀態
    def get_observation(self):
        #get board for last 20 days
        return self.board[self.day-20:self.day]

    #這裡是拿來做test時候的顯示（也是一定要填）        
    def render(self, mode='human', close=False):
        if close:
            return
        outfile = None
        if self.DONE == True:
            outfile = StringIO() if mode == 'ansi' else sys.stdout
            s = "total assets:" + str(self.assets) + "\n"
            s += "cash left" + str(self.cash) + "\n"
            s += "num of stock left" + str(self.num_of_stock) + "\n"
            s += "final price of ETF" + str(self.price) + "\n"
            s += "financial_assets left" + str(self.financial_assets) + "\n"
            outfile.write(s)
        return outfile
        
        
        




# 這裡回傳的狀態只有二十天前的股價，也許該傳一些別的？

像是現在的資產配置和數量之類的...... 

In [5]:
#把我們辛苦架好的遊戲環境作為測試環境
ETF_env = ETF_Game()
nb_actions = ETF_env.action_space.n

#這裡的window_length 是指當我需要傳入包括前幾次畫面作為資料時的東西，
#他是把它當作CNN的channel數一樣的東西
#本來這裡是不需要加的，只是keras-rl寫死了所以我只好傳進去。
BOARD_INPUT_SHAPE = (20, 6)
WINDOW_LENGTH = 1


#另外，由於資料最後一步是keras-rl處理的，他的變數順序這樣寫，
#我也只好這樣寫
input_shape = (WINDOW_LENGTH,) + BOARD_INPUT_SHAPE 

#設定輸入層的形狀
model_input = Input(shape = input_shape)

#視不同的backend要排一下順序
if K.image_dim_ordering() == 'tf':
    # (width, height, channels)
    permute = Permute((2, 3, 1), input_shape=input_shape)
elif K.image_dim_ordering() == 'th':
    # (channels, width, height)
    permute = Permute((1, 2, 3), input_shape=input_shape)
    
#把排列的結果套用上去，喬一下我們的原始input
preprocessed_input = permute(model_input)

#橫著看～
conv_horizontal_1 =  Conv2D(filters = 32, kernel_size = (1, 6), padding='valid', activation = "relu")

#直的看～
conv_vertical_1 = Conv2D(filters = 32, kernel_size = (5, 1), padding='valid', activation = "relu")

#把處理過的input塞進去
layer_horizontal_1 = conv_horizontal_1(preprocessed_input)
layer_vertical_1 = conv_vertical_1(preprocessed_input)

#再橫著看
conv_horizontal_2 =  Conv2D(filters = 64, kernel_size = (1, 6), padding='valid', activation = "relu")

#再直著看
conv_vertical_2 = Conv2D(filters = 64, kernel_size = (5, 1), padding='valid', activation = "relu")

#交錯塞
layer_h_then_v_2 = conv_vertical_2(layer_horizontal_1)
layer_v_then_h_2 = conv_horizontal_2(layer_vertical_1)

#把上面兩個拉直
flat_h_then_v = Flatten()(layer_h_then_v_2)
flat_v_then_h = Flatten()(layer_v_then_h_2)

#接在一起～
conv_merge = concatenate([flat_h_then_v, flat_v_then_h], name = "Merge_Layer")

#動作有21種，所以最後輸出是21維
action = Dense(21)
output_action = action(conv_merge)

#把整個model包起來
model = Model(model_input, output_action)

#看看我們包出來的結果
model.summary()






____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
input_1 (InputLayer)             (None, 1, 20, 6)      0                                            
____________________________________________________________________________________________________
permute_1 (Permute)              (None, 20, 6, 1)      0           input_1[0][0]                    
____________________________________________________________________________________________________
conv2d_1 (Conv2D)                (None, 20, 1, 32)     224         permute_1[0][0]                  
____________________________________________________________________________________________________
conv2d_2 (Conv2D)                (None, 16, 6, 32)     192         permute_1[0][0]                  
___________________________________________________________________________________________

In [6]:
#來測試一下model可不可以動：

#隨便產生一組資料(這樣是50天份，理論上要出50天的動作)
test_board = np.random.random_sample(size = (50, 1, 20, 6))

#把數字變大一點
test_board *= 60

#送進去跑看看～
model.predict(test_board)


array([[  3.49328279, -10.50983334,   2.77022576, ...,  -5.18065023,
         -3.05029416,   2.98283577],
       [  0.62337494,  -8.16610432,   3.1692512 , ...,  -0.74925971,
         -4.79799891,   1.02587056],
       [  4.15790319,  -6.2525816 ,   2.42277956, ...,  -2.84172678,
         -3.91748452,   3.69269323],
       ..., 
       [  5.57332706,  -6.02099848,   3.75633383, ...,  -2.68594265,
         -2.38195658,   3.94506836],
       [  6.00819826, -10.56619167,   0.34325862, ...,  -2.65285468,
         -4.11935949,   3.91282988],
       [  2.33501816,  -6.80638409,   0.99258757, ...,  -2.23634434,
         -3.11558533,  -0.62830734]], dtype=float32)

In [7]:
#確認一下形狀
model.predict(test_board).shape

(50, 21)

In [None]:
#準備要實地測試了：

#你可以自己決定模式跟步數
mode = input("Mode?")
step = int(input("Step?"))

#設定記憶體
memory = SequentialMemory(limit=10000, window_length=1)

#設定策略
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05Sam, nb_steps=step)

#DQN設定
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory = memory, policy = policy,
               nb_steps_warmup=1000000, gamma=.90, target_model_update=738)

dqn.compile(Adam(lr=.00001), metrics=['mae'])


#實際跑看看
if mode == 'train':

    #儲存權重的一些設定：
    weights_filename = 'dqn_{}_weights.h5f'.format(ETF_env.name)
    checkpoint_weights_filename = 'dqn_' + ETF_env.name + '_weights2_{step}.h5f'
    log_filename = 'dqn_{}_log.json'.format(ETF_env.name)
    callbacks = [ModelIntervalCheckpoint(checkpoint_weights_filename, interval=100000)]
    callbacks += [FileLogger(log_filename, interval=100)]
    

    #訓練開始
    dqn.fit(ETF_env, callbacks=callbacks, nb_steps=step, log_interval=10000)

    #把權重存起來
    dqn.save_weights(weights_filename, overwrite=True)


    
elif mode == 'test':
    
    #讀取權重
    weights = "dqn_"+ETF_env.name+"_weights2_" + str(step) + ".h5f"
    if weights:
        weights_filename = weights
    dqn.load_weights(weights_filename)
    dqn.test(ETF_env, nb_episodes=10, visualize=True)


