[[Python] Keras-RLで簡単に強化学習(DQN)を試す](http://qiita.com/inoory/items/e63ade6f21766c7c2393)を参考に、エージェントを作成する。FXの自動取引を行い、利益を出すのが目標。

In [None]:
import matplotlib as mpl
mpl.use('tkagg')
import numpy as np
import pandas as pd
import talib
from logging import getLogger, DEBUG, INFO, WARN, ERROR, CRITICAL
import os
import logging
from logging import StreamHandler, LogRecord

from hist_data import HistData, BitcoinHistData
from fx_trade import FXTrade
from bitcoin_trade import BitcoinTrade
from deep_fx import DeepFX
from debug_tools import DebugTools

Using TensorFlow backend.


In [None]:
import crcmod
class LogRecordWithHexThereadID(logging.LogRecord):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.hex_threadid = self._calc_hex(self.process)

    def _calc_hex(self, digit_value):
        return hex(digit_value)

def init_logger(sd_loglevel=logging.WARN, stream_loglevel=logging.CRITICAL):
    logging.setLogRecordFactory(LogRecordWithHexThereadID)
    logger = logging.getLogger('deepfx')
    logger.setLevel(sd_loglevel)
    formatter = logging.Formatter('[%(hex_threadid)s] %(message)s')

    if sd_loglevel:
        import google
        from google.cloud.logging import Client
        from google.cloud.logging.handlers import CloudLoggingHandler
        client = google.cloud.logging.Client \
            .from_service_account_json(os.environ.get('GOOGLE_SERVICE_ACCOUNT_JSON_PATH'))
        handler = CloudLoggingHandler(client, name='deepfx')
        handler.setLevel(sd_loglevel)
        handler.setFormatter(formatter)
        logger.addHandler(handler)
        handler = None

    if stream_loglevel:
        handler = StreamHandler()
        handler.setLevel(stream_loglevel)
        handler.setFormatter(formatter)
        logger.addHandler(handler)
        handler = None

    return logger

In [None]:
deepfx_logger = init_logger(stream_loglevel=None)
deepfx_logger.critical('DeepFX Started: %s' % DebugTools.now_str())
deepfx_logger.debug   ('loglevel debug    test')
deepfx_logger.info    ('loglevel info     test')
deepfx_logger.warning ('loglevel warn     test')
deepfx_logger.error   ('loglevel error    test')
deepfx_logger.critical('loglevel critical test')

In [None]:
is_for_fx = False
is_for_bitcoin = True

In [None]:
if is_for_fx:
    hd = HistData(csv_path = 'historical_data/DAT_ASCII_USDJPY_M1_201710_h1.csv',
                     begin_date='2017-10-02T00:00:00',
                     end_date='2017-10-02T01:59:59')
elif is_for_bitcoin:
    hd = HistData(csv_path = 'historical_data/coincheckJPY_1-min_data_2014-10-31_to_2017-10-20_h1.csv',
                     begin_date='2017-09-01T00:00:00',
                     end_date='2017-09-30T23:59:59')

header is included


In [None]:
hd.data()
len(hd.data())

2

In [None]:
if is_for_fx:
    env = FXTrade(1000000, 0.08, hd, logger=deepfx_logger)
    #env = FXTrade(1000000, 0.08, h, logger=logger)
    prepared_model_filename = None #'Keras-RL_DQN_FX_model_meanq1.440944e+06_episode00003.h5'
    dfx = DeepFX(env, prepared_model_filename=prepared_model_filename, steps = 100000, logger=deepfx_logger)
elif is_for_bitcoin:
    env = BitcoinTrade(10000000, None, hd, logger=deepfx_logger, amount_unit=0.001)
    #env = FXTrade(1000000, 0.08, h, logger=logger)
    prepared_model_filename = None #'Keras-RL_DQN_FX_model_meanq1.440944e+06_episode00003.h5'
    dfx = DeepFX(env, prepared_model_filename=prepared_model_filename, steps = 10000000, logger=deepfx_logger)
    #dfx = DeepFX(env, prepared_model_filename=prepared_model_filename, steps = 1000, logger=deepfx_logger)

In [None]:
is_to_train = True
if is_to_train:
    dfx.train(is_for_time_measurement=True)
else:
    dfx.test(1, [EpisodeLogger()])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 2)                 0         
_________________________________________________________________
dense_1 (Dense)              (None, 3)                 9         
_________________________________________________________________
dense_2 (Dense)              (None, 3)                 12        
Total params: 21
Trainable params: 21
Non-trainable params: 0
_________________________________________________________________
Training for 500000 steps ...
Training for 500000 steps ...
      1/500000: episode: 1, duration: 0.635s, episode steps: 1, steps per second: 2, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: --, mean_q: --
      1/500000: episode: 1, duration: 0.636s, episode steps: 1, steps per second: 2

     22/500000: episode: 22, duration: 0.015s, episode steps: 1, steps per second: 65, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: --, mean_q: --
     22/500000: episode: 22, duration: 0.019s, episode steps: 1, steps per second: 52, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: --, mean_q: --
     23/500000: episode: 23, duration: 0.013s, episode steps: 1, steps per second: 76, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: --, mean_q: --
     23/500000: episode: 23, duration: 0.015s, episode steps: 1, steps per second: 69, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [

     39/500000: episode: 39, duration: 0.018s, episode steps: 1, steps per second: 56, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: --, mean_q: --
     39/500000: episode: 39, duration: 0.021s, episode steps: 1, steps per second: 48, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: --, mean_q: --
     40/500000: episode: 40, duration: 0.012s, episode steps: 1, steps per second: 82, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: --, mean_q: --
     40/500000: episode: 40, duration: 0.014s, episode steps: 1, steps per second: 70, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [

     58/500000: episode: 58, duration: 0.012s, episode steps: 1, steps per second: 85, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: --, mean_q: --
     58/500000: episode: 58, duration: 0.016s, episode steps: 1, steps per second: 64, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: --, mean_q: --
     59/500000: episode: 59, duration: 0.014s, episode steps: 1, steps per second: 69, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: --, mean_q: --
     59/500000: episode: 59, duration: 0.017s, episode steps: 1, steps per second: 59, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [

     74/500000: episode: 74, duration: 0.011s, episode steps: 1, steps per second: 95, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: --, mean_q: --
     74/500000: episode: 74, duration: 0.012s, episode steps: 1, steps per second: 85, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: --, mean_q: --
     75/500000: episode: 75, duration: 0.008s, episode steps: 1, steps per second: 118, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: --, mean_q: --
     75/500000: episode: 75, duration: 0.010s, episode steps: 1, steps per second: 101, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000

    101/500000: episode: 101, duration: 0.271s, episode steps: 1, steps per second: 4, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: --, mean_q: --
    101/500000: episode: 101, duration: 0.271s, episode steps: 1, steps per second: 4, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: --, mean_q: --
    102/500000: episode: 102, duration: 0.269s, episode steps: 1, steps per second: 4, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: 908800016524299468800.000000, mean_q: 193248.000000
    102/500000: episode: 102, duration: 0.270s, episode steps: 1, steps per second: 4, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000

    115/500000: episode: 115, duration: 0.206s, episode steps: 1, steps per second: 5, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: 48315731279872.000000, mean_q: 209729.390625
    115/500000: episode: 115, duration: 0.207s, episode steps: 1, steps per second: 5, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: 48315731279872.000000, mean_q: 209729.390625
    116/500000: episode: 116, duration: 0.188s, episode steps: 1, steps per second: 5, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: 48043554504704.000000, mean_q: 210898.890625
    116/500000: episode: 116, duration: 0.189s, episode steps: 1, steps per second: 5, episode reward: 

    130/500000: episode: 130, duration: 0.191s, episode steps: 1, steps per second: 5, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: 48133878841344.000000, mean_q: 228509.015625
    130/500000: episode: 130, duration: 0.192s, episode steps: 1, steps per second: 5, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: 48133878841344.000000, mean_q: 228509.015625
    131/500000: episode: 131, duration: 0.172s, episode steps: 1, steps per second: 6, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: 47989976465408.000000, mean_q: 229845.515625
    131/500000: episode: 131, duration: 0.173s, episode steps: 1, steps per second: 6, episode reward: 

    146/500000: episode: 146, duration: 0.186s, episode steps: 1, steps per second: 5, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: 908799523943090225152.000000, mean_q: 249187.875000
    146/500000: episode: 146, duration: 0.187s, episode steps: 1, steps per second: 5, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: 908799523943090225152.000000, mean_q: 249187.875000
    147/500000: episode: 147, duration: 0.185s, episode steps: 1, steps per second: 5, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: 47790101102592.000000, mean_q: 250557.156250
    147/500000: episode: 147, duration: 0.185s, episode steps: 1, steps per second: 5, ep

    160/500000: episode: 160, duration: 0.175s, episode steps: 1, steps per second: 6, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: 47488664862720.000000, mean_q: 268094.656250
    160/500000: episode: 160, duration: 0.176s, episode steps: 1, steps per second: 6, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: 47488664862720.000000, mean_q: 268094.656250
    161/500000: episode: 161, duration: 0.193s, episode steps: 1, steps per second: 5, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: 908799242468113514496.000000, mean_q: 269372.156250
    161/500000: episode: 161, duration: 0.194s, episode steps: 1, steps per second: 5, episode r

    174/500000: episode: 174, duration: 0.172s, episode steps: 1, steps per second: 6, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: 47452602236928.000000, mean_q: 285648.093750
    174/500000: episode: 174, duration: 0.174s, episode steps: 1, steps per second: 6, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: 47452602236928.000000, mean_q: 285648.093750
    175/500000: episode: 175, duration: 0.172s, episode steps: 1, steps per second: 6, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: 47306204250112.000000, mean_q: 286914.093750
    175/500000: episode: 175, duration: 0.173s, episode steps: 1, steps per second: 6, episode reward: 

    189/500000: episode: 189, duration: 0.184s, episode steps: 1, steps per second: 5, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 0.000 [0.000, 0.000], mean observation: 0.500 [0.000, 1.000], loss: 1817597781248785252352.000000, mean_q: 305647.531250
    189/500000: episode: 189, duration: 0.185s, episode steps: 1, steps per second: 5, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 0.000 [0.000, 0.000], mean observation: 0.500 [0.000, 1.000], loss: 1817597781248785252352.000000, mean_q: 305647.531250
    190/500000: episode: 190, duration: 0.186s, episode steps: 1, steps per second: 5, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: 908798890624392626176.000000, mean_q: 307004.718750
    190/500000: episode: 190, duration: 0.187s, episode steps: 1, steps per seco

    204/500000: episode: 204, duration: 0.204s, episode steps: 1, steps per second: 5, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: 46790535544832.000000, mean_q: 326268.531250
    204/500000: episode: 204, duration: 0.205s, episode steps: 1, steps per second: 5, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: 46790535544832.000000, mean_q: 326268.531250
    205/500000: episode: 205, duration: 0.191s, episode steps: 1, steps per second: 5, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: 908798679518160093184.000000, mean_q: 327551.406250
    205/500000: episode: 205, duration: 0.193s, episode steps: 1, steps per second: 5, episode r

    218/500000: episode: 218, duration: 0.195s, episode steps: 1, steps per second: 5, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: 908798538780671737856.000000, mean_q: 345497.031250
    218/500000: episode: 218, duration: 0.197s, episode steps: 1, steps per second: 5, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: 908798538780671737856.000000, mean_q: 345497.031250
    219/500000: episode: 219, duration: 0.174s, episode steps: 1, steps per second: 6, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: 2726395616342015213568.000000, mean_q: 347001.093750
    219/500000: episode: 219, duration: 0.175s, episode steps: 1, steps per secon

    232/500000: episode: 232, duration: 0.186s, episode steps: 1, steps per second: 5, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: 46686139318272.000000, mean_q: 366198.000000
    232/500000: episode: 232, duration: 0.188s, episode steps: 1, steps per second: 5, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: 46686139318272.000000, mean_q: 366198.000000
    233/500000: episode: 233, duration: 0.192s, episode steps: 1, steps per second: 5, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: 908798257305695027200.000000, mean_q: 367492.218750
    233/500000: episode: 233, duration: 0.193s, episode steps: 1, steps per second: 5, episode r

    246/500000: episode: 246, duration: 0.182s, episode steps: 1, steps per second: 5, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: 46650970079232.000000, mean_q: 384996.531250
    246/500000: episode: 246, duration: 0.184s, episode steps: 1, steps per second: 5, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: 46650970079232.000000, mean_q: 384996.531250
    247/500000: episode: 247, duration: 0.182s, episode steps: 1, steps per second: 5, episode reward: 10000000.000, mean reward: 10000000.000 [10000000.000, 10000000.000], mean action: 1.000 [1.000, 1.000], mean observation: 0.500 [0.000, 1.000], loss: 46211562209280.000000, mean_q: 386306.468750
    247/500000: episode: 247, duration: 0.183s, episode steps: 1, steps per second: 5, episode reward: 

In [None]:
deepfx_logger.critical('DeepFX Finished: %s' % DebugTools.now_str())

In [None]:
import os
import subprocess
if os.environ.get('SLACK_WEBHOOK_URL') and os.environ.get('GOOGLE_STACKDRIVER_URL'):
    google_stackdriver_url = os.environ.get('GOOGLE_STACKDRIVER_URL')
    payload = '{"username":"deepfx","icon_emoji":":+1:","channel":"deepfx","attachments":[{"color":"#36a64f","title":"DeepFX Finished","title_link":"%s","text":"<@%s> DeepFX Finished"}]}' % (google_stackdriver_url, os.environ.get('SLACK_NOTIFY_RECIEVE_USER'))
    command = ['curl']
    command.append('-XPOST')
    command.append('-HContent-Type: application/json')
    command.append("-d%s" % payload)
    command.append(os.environ.get('SLACK_WEBHOOK_URL'))
    print(command)
    subprocess.run(command)
else:
    print('Skipped Slack Notification.')

In [None]:
%matplotlib notebook
import matplotlib.pyplot as plt
import numpy as np
data = hd.data()['Close']
x = data.index
y = data.values
sd = 1
upper, middle, lower = talib.BBANDS(data.values, timeperiod=20, matype=talib.MA_Type.SMA, nbdevup=sd, nbdevdn=sd)
[plt.plot(x, val) for val in [y, upper, middle, lower]]

In [None]:
data.values

## References

- [Deep Q-LearningでFXしてみた](http://recruit.gmo.jp/engineer/jisedai/blog/deep-q-learning/)
- [slide](https://www.slideshare.net/JunichiroKatsuta/deep-qlearningfx)

## TODO

足の配列について、indexの外を読み出そうとしている節があるので直す。

```json
{
 insertId:  "1l630l2g1k8tnms"  
 jsonPayload: {…}  
 logName:  "projects/deep-fx/logs/deepfx"  
 receiveTimestamp:  "2017-11-18T17:12:18.459939016Z"  
 resource: {…}  
 severity:  "WARNING"  
 timestamp:  "2017-11-18T17:12:18.459939016Z"  
}
```