In [2]:
import pandas as pd

# データフレームを読み込む
df = pd.read_csv('USDJPY_TrailingStop2.csv', index_col='Date')
df.index = pd.to_datetime(df.index)
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,ATR,Buy,Sell,Buy_cat,Sell_cat,Market
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014-01-02 07:21:00,105.351,105.368,105.351,105.354,0.0174,-0.0162,0.0342,-1,1,-1
2014-01-02 07:22:00,105.355,105.355,105.355,105.355,0.01658,-0.01474,0.03074,-1,1,-1
2014-01-02 07:23:00,105.355,105.355,105.351,105.351,0.015951,-0.033853,0.032853,-1,1,-1
2014-01-02 07:24:00,105.354,105.355,105.349,105.355,0.015453,-0.03636,0.02736,-1,1,-1
2014-01-02 07:25:00,105.355,105.381,105.354,105.365,0.016031,-0.023092,0.019092,-1,1,-1


In [12]:
import time
import numpy as np
from numba import jit
from keras.utils.np_utils import to_categorical

# 特徴量を[Close[20], ATR(20)[20]]、目的変数をTrailingStop損益結果の4パターンとする
@jit
def gen_xy(close, atr, y, window_len=20):
    X, Y = [], []
    for i in range(len(close) - window_len):
        # 特徴量を正規化しながら追加
        C = close[i : i + window_len].copy()
        C -= np.min(C)
        C /= np.max(C)
        A = atr[i : i + window_len].copy()
        A -= np.min(A)
        A /= np.max(A)
        X.append([C, A])
        # 目的変数を追加
        Y.append(y[i + window_len - 1])
    X, Y = np.array(X), np.array(Y)
    # 目的変数を正規化
    Y = np.where(Y > 0, Y + 1, Y + 2)
    Y = to_categorical(Y.astype('int32'))
    return X, Y

# 学習データとテストデートに分ける
train, test = df[df.index < '2018'], df[df.index >= '2018']
# 学習データ準備
start = time.time()
Xtrain, Ytrain = gen_xy(train.loc[:, 'Close'].values, train.loc[:, 'ATR'].values, train.loc[:, 'Buy_cat'].values)
print('Elapsed Time: {0} sec'.format(time.time() - start))
Xtrain.shape, Xtrain, Ytrain.shape, Ytrain

Elapsed Time: 34.18354845046997 sec


((1491698, 2, 20),
 array([[[0.59259259, 0.62962963, 0.48148148, ..., 0.18518519,
          0.11111111, 0.25925926],
         [1.        , 0.84474123, 0.7256464 , ..., 0.18599441,
          0.08090292, 0.        ]],
 
        [[0.62962963, 0.48148148, 0.62962963, ..., 0.11111111,
          0.25925926, 0.11111111],
         [0.90353672, 0.78677601, 0.69441625, ..., 0.15466876,
          0.07535145, 0.        ]],
 
        [[0.48148148, 0.62962963, 1.        , ..., 0.25925926,
          0.11111111, 0.51851852],
         [0.78677601, 0.69441625, 0.80158504, ..., 0.07535145,
          0.        , 0.06763796]],
 
        ...,
 
        [[0.18181818, 0.40909091, 0.63636364, ..., 0.59090909,
          0.68181818, 0.5       ],
         [0.55534469, 0.57311044, 0.51306721, ..., 0.90346236,
          0.90382222, 0.88493392]],
 
        [[0.40909091, 0.63636364, 0.63636364, ..., 0.68181818,
          0.5       , 0.40909091],
         [0.57311044, 0.51306721, 0.35987529, ..., 0.90382222,
         

In [32]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Conv1D
from keras.layers import MaxPooling1D
from keras.layers import Flatten
from keras.layers import Dropout
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint

# CNNで訓練
model = Sequential()
model.add(Conv1D(32, 3, activation='relu', padding='valid', input_shape=(20, 2))) # 入力は [Close[20], ATR(20)[20]]
model.add(MaxPooling1D(pool_size=2))
model.add(Conv1D(64, 3, activation='relu', padding='valid'))
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(4, activation='softmax')) # 出力は 損益の4パターン
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['acc'])

early_stopping = EarlyStopping(monitor='val_acc', mode='auto', patience=8)
model_checkpoint = ModelCheckpoint(filepath="18.Buyer.h5")

# 訓練実行
model.fit(Xtrain.reshape(Xtrain.shape[0], 20, 2), Ytrain,
          batch_size=2048, # 訓練データが多い場合は、ミニバッチサイズを大きくしないとオーバーフローが起きる
          epochs=256,
          shuffle=True,
          validation_split=0.1, # 訓練データのうち10％を検証データとして仕様
          callbacks=[early_stopping, model_checkpoint]
         )

Train on 1342528 samples, validate on 149170 samples
Epoch 1/256
Epoch 2/256
Epoch 3/256
Epoch 4/256
Epoch 5/256
Epoch 6/256
Epoch 7/256
Epoch 8/256
Epoch 9/256


<keras.callbacks.History at 0x28f85fce518>

In [33]:
Xtest, Ytest = gen_xy(test.loc[:, 'Close'].values, test.loc[:, 'ATR'].values, test.loc[:, 'Buy_cat'].values)
model.evaluate(Xtest.reshape(Xtest.shape[0], 20, 2), Ytest)



[1.214872032103936, 0.48251799618040253]

In [35]:
''' テクニカル指標を追加 '''
import talib

rsi2 = talib.RSI(df['Close'].values, timeperiod=2)
len(df.index), len(rsi2), rsi2

(1777632,
 1777632,
 array([        nan,         nan, 20.        , ..., 50.10694406,
        55.51786675, 29.72679789]))

In [36]:
ema3  = talib.EMA(df['Close'].values, timeperiod=3)
ema9  = talib.EMA(df['Close'].values, timeperiod=9)
ema27 = talib.EMA(df['Close'].values, timeperiod=27)
ema3, ema9, ema27

(array([         nan,          nan, 105.35333333, ..., 113.71796483,
        113.71898242, 113.71549121]),
 array([         nan,          nan,          nan, ..., 113.70911665,
        113.71129332, 113.71143465]),
 array([         nan,          nan,          nan, ..., 113.69566548,
        113.69740366, 113.69844625]))

In [37]:
df['RSI'], df['EMA3'], df['EMA9'], df['EMA27'] = rsi2, ema3, ema9, ema27
csv = df.dropna(how='any')
del csv['Market']
csv.to_csv('USDJPY_Technical.csv')
csv.head()

Unnamed: 0_level_0,Open,High,Low,Close,ATR,Buy,Sell,Buy_cat,Sell_cat,RSI,EMA3,EMA9,EMA27
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2014-01-02 07:50:00,105.347,105.369,105.347,105.357,0.011706,-0.027117,0.014117,-1,1,74.059924,105.353628,105.349644,105.347815
2014-01-02 07:51:00,105.356,105.367,105.356,105.357,0.01167,-0.027011,0.014011,-1,1,74.059924,105.355314,105.351115,105.348471
2014-01-02 07:52:00,105.358,105.364,105.357,105.358,0.011437,-0.02731,0.01231,-1,1,79.819138,105.356657,105.352492,105.349152
2014-01-02 07:53:00,105.36,105.36,105.358,105.358,0.010965,-0.025895,0.010895,-1,1,79.819138,105.357328,105.353594,105.349784
2014-01-02 07:54:00,105.353,105.358,105.353,105.358,0.010667,-0.025,0.01,-1,1,79.819138,105.357664,105.354475,105.35037


In [40]:
'''
特徴量: (10, 5)
    [ATR(20)[0:10], RSI(2)[0:10], EMA(3)[0:10], EMA(9)[0:30:3], EMA(27)[0:90:9]]
目的変数: TrailingStop損益結果の4パターン
'''
@jit
def gen_xy(atr, rsi, ema3, ema9, ema27, y, window_len=10):
    X, Y = [], []
    for i in range(len(atr) - window_len * 9):
        # 特徴量を正規化しながら追加
        A, R, E3, E9, E27 = atr[i : i + window_len].copy(), rsi[i : i + window_len].copy(), ema3[i : i + window_len].copy(), ema9[i : i + window_len * 3 : 3].copy(), ema27[i : i + window_len * 9 : 9].copy()
        A, R, E3, E9, E27 = A - np.min(A), R - np.min(R), E3 - np.min(E3), E9 - np.min(E9), E27 - np.min(E27)
        A, R, E3, E9, E27 = A / np.max(A), R / np.max(R), E3 / np.max(E3), E9 / np.max(E9), E27 / np.max(E27)
        X.append([A, R, E3, E9, E27])
        # 目的変数を追加
        Y.append(y[i + window_len - 1])
    X, Y = np.array(X), np.array(Y)
    # 目的変数を正規化
    Y = np.where(Y > 0, Y + 1, Y + 2)
    Y = to_categorical(Y.astype('int32'))
    return X.reshape(X.shape[0], window_len, 5), Y

# 学習データとテストデートに分ける
train, test = csv[csv.index < '2018'], csv[csv.index >= '2018']
# 学習データ準備
start = time.time()
Xtrain, Ytrain = gen_xy(train.loc[:, 'ATR'].values, train.loc[:, 'RSI'].values, train.loc[:, 'EMA3'].values, train.loc[:, 'EMA9'].values, train.loc[:, 'EMA27'].values, train.loc[:, 'Buy_cat'].values)
print('Elapsed Time: {0} sec'.format(time.time() - start))
Xtrain.shape, Xtrain, Ytrain.shape, Ytrain



Elapsed Time: 79.20339560508728 sec


((1491602, 10, 5),
 array([[[1.        , 0.97123335, 0.78083393, 0.39611563, 0.15293655],
         [0.28882639, 0.37715397, 0.0941552 , 0.19221633, 0.        ],
         [0.64764955, 0.64764955, 0.73068753, 0.73068753, 0.73068753],
         ...,
         [0.55176272, 0.52352679, 0.61730889, 1.        , 0.80584486],
         [0.76866714, 0.8720357 , 0.91163094, 0.99257532, 1.        ],
         [0.77688939, 0.47359473, 0.36936674, 0.40242755, 0.        ]],
 
        [[1.        , 0.8078264 , 0.41952319, 0.17407811, 0.3112342 ],
         [0.40038484, 0.11474902, 0.21372391, 0.01971645, 0.        ],
         [0.68597663, 0.75998212, 0.75998212, 0.75998212, 1.        ],
         ...,
         [0.55481917, 0.59173521, 0.65422986, 1.        , 0.79280194],
         [0.79106804, 0.88429934, 0.93127807, 0.99922089, 1.        ],
         [0.75655367, 0.44946135, 0.39747069, 0.38324605, 0.        ]],
 
        [[0.68777439, 0.35717737, 0.14820816, 0.26498133, 0.34088319],
         [0.09769603, 0.

In [41]:
# CNNで訓練
model = Sequential()
model.add(Conv1D(64, 3, activation='relu', padding='valid', input_shape=(10, 5)))
model.add(MaxPooling1D(pool_size=2))
model.add(Conv1D(128, 3, activation='relu', padding='valid'))
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(4, activation='softmax')) # 出力は 損益の4パターン
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['acc'])

early_stopping = EarlyStopping(monitor='val_acc', mode='auto', patience=8)
model_checkpoint = ModelCheckpoint(filepath="18.Buyer.h5")

# 訓練実行
model.fit(Xtrain, Ytrain,
          batch_size=2048, # 訓練データが多い場合は、ミニバッチサイズを大きくしないとオーバーフローが起きる
          epochs=256,
          shuffle=True,
          validation_split=0.1, # 訓練データのうち10％を検証データとして仕様
          callbacks=[early_stopping, model_checkpoint]
         )

Train on 1342441 samples, validate on 149161 samples
Epoch 1/256
Epoch 2/256
Epoch 3/256
Epoch 4/256
Epoch 5/256
Epoch 6/256
Epoch 7/256
Epoch 8/256
Epoch 9/256


<keras.callbacks.History at 0x28f9deb7ac8>

In [42]:
Xtest, Ytest = gen_xy(test.loc[:, 'ATR'].values, test.loc[:, 'RSI'].values, test.loc[:, 'EMA3'], test.loc[:, 'EMA9'], test.loc[:, 'EMA27'], test.loc[:, 'Buy_cat'].values)
model.evaluate(Xtest, Ytest)



[1.2152984628242811, 0.4825556986117331]

In [46]:
''' 目的変数を「勝てる・負ける」の2パターンにして学習 '''

Y = np.argmax(Ytrain, axis=1) # 目的変数を0～3のカテゴリに戻す
Y = np.where(Y > 1, 1, 0) # 売買結果を「勝てる・負ける」の2パターンに変換
Y = to_categorical(Y.astype('int32'))

model = Sequential()
model.add(Conv1D(64, 3, activation='relu', padding='valid', input_shape=(10, 5)))
model.add(MaxPooling1D(pool_size=2))
model.add(Conv1D(128, 3, activation='relu', padding='valid'))
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax')) # 出力は 損益の2パターン
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['acc'])

early_stopping = EarlyStopping(monitor='val_acc', mode='auto', patience=8)
model_checkpoint = ModelCheckpoint(filepath="18.Buyer2.h5")

# 訓練実行
model.fit(Xtrain, Y,
          batch_size=2048, # 訓練データが多い場合は、ミニバッチサイズを大きくしないとオーバーフローが起きる
          epochs=256,
          shuffle=True,
          validation_split=0.1, # 訓練データのうち10％を検証データとして仕様
          callbacks=[early_stopping, model_checkpoint]
         )

Train on 1342441 samples, validate on 149161 samples
Epoch 1/256
Epoch 2/256
Epoch 3/256
Epoch 4/256
Epoch 5/256
Epoch 6/256
Epoch 7/256
Epoch 8/256
Epoch 9/256


<keras.callbacks.History at 0x28f8b30fe10>

特徴量にテクニカル指標を追加してみてもあまり変わらなかった。

基本的に20期間以上の終値データがあれば大体予測できるが、それ以上の特徴量があっても精度は上がらないと思われる。

したがって、プライスアクショントレードの精度は、48％（4パターン予測）が限界といえる。また、「勝てる・負ける」の2パターンの予測精度は62％が限界といえる。