In [16]:
import numpy as np
import pandas as pd
import talib
import quandl
from copy import deepcopy
import optuna
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, log_loss
import matplotlib.pyplot as plt
%matplotlib inline

In [341]:
# BTC/USDの日足を取得
df = quandl.get('BCHAIN/MKPRU')
date = df.index
v = np.array(df['Value'])
v = v[v > 0]
train, test = v[:-300], v[-300:]

def getTrain(data):
    return data[:-300]

def getTest(data):
    return data[-300:]

In [121]:
# 分足を取得
df = {}
for freq in ['15m', '1h', '2h', '4h', '8h', '12h', '1d']:
    df[freq] = pd.read_csv('bitcoincharts-ohlcv/scripts/bitflyerJPY_{0}.csv'.format(freq), names=('DATE', 'OPEN', 'HIGH', 'LOW', 'CLOSE', 'VOLUME'), 
                dtype={'OPEN': np.float, 'HIGH': np.float, 'LOW': np.float, 'CLOSE': np.float, 'VOLUME': np.float}, 
                parse_dates=[0])

freq = '15m'
v = np.array(df[freq]['CLOSE'])
v_all = np.matrix([df[freq]['OPEN'], df[freq]['HIGH'], df[freq]['LOW'], df[freq]['CLOSE'], df[freq]['VOLUME']]).T

def split_train_test(data, test_split=0.1):
    index = int(len(data) * 0.1)
    return data[:-index], data[-index:]

In [113]:
# heyhey logic

# EMA
_length = 7
_, ema = split_train_test(talib.EMA(v, timeperiod=_length))
_, test = split_train_test(v)

# simulation
profit = 0.0  # 利益
position = 0  # -1 : 売りポジ, 0 : ポジなし, 1 : 買いポジ
p_price = 0.0  # ポジションの額
for i in range(2, len(test)):
    if ema[i-2] >= ema[i-1] and ema[i-1] < ema[i]:  # buy
        if position < 0:  # 売りポジ持ち
            profit += (p_price - test[i]) / test[i]
        print ('{0} : BUY, profit={1}'.format(i, profit))
        p_price = test[i]
        position = +1
    elif ema[i-2] <= ema[i-1] and ema[i-1] > ema[i]:  # sell
        if position > 0:  # 買いポジ持ち
            profit += (test[i] - p_price) / p_price
        print ('{0} : SELL, profit={1}'.format(i, profit))
        p_price = test[i]
        position = -1

2 : SELL, profit=0.0
15 : BUY, profit=-0.0026005629258887904
20 : SELL, profit=-0.005480102862414615
21 : BUY, profit=-0.007291248737080556
23 : SELL, profit=-0.010193229798854456
35 : BUY, profit=-0.00679849442125971
36 : SELL, profit=-0.012168854485815332
42 : BUY, profit=-0.015518903209994654
43 : SELL, profit=-0.017764816025081166
59 : BUY, profit=0.12902495392539015
61 : SELL, profit=0.12921563369074188
62 : BUY, profit=0.12542201788172178
67 : SELL, profit=0.12652802366870156
68 : BUY, profit=0.12232115361177387
69 : SELL, profit=0.11635057312720594
79 : BUY, profit=0.12379505725840391
80 : SELL, profit=0.11730466413429486
81 : BUY, profit=0.11190723383298262
84 : SELL, profit=0.11327793308288316
85 : BUY, profit=0.11078457561821374
92 : SELL, profit=0.1119799110822927
93 : BUY, profit=0.10589854049618429
97 : SELL, profit=0.10443155717774379
98 : BUY, profit=0.10230972619519119
100 : SELL, profit=0.09413916472974297
118 : BUY, profit=0.2824048366903209
120 : SELL, profit=0.26245

In [122]:
# DNN (train)
from tensorflow.keras.layers import Input, Dense, LSTM, BatchNormalization, Conv1D
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
from tensorflow.keras import backend as K

# optunaでハイパーパラメタチューニングしてもよい
input_length = 7
batch_size = 32

def create_model():
    inputs = Input(shape=(input_length, 1))
    x = LSTM(100, activation='relu')(inputs)
#     x = BatchNormalization()(x)
#     x = LSTM(100, activation='relu')(x)
    outputs = Dense(4, activation='softmax')(x)
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

def create_data(data):
    x = []
    y = []
    label = np.eye(4)
    for i in range(input_length, len(data)):
        base = data[i-1]
        x.append((data[i-input_length:i] - base) / base)
        if (data[i] - base) / base >= 0.005:
            y.append(label[0])
        elif (data[i] - base) / base >= 0:
            y.append(label[1])
        elif (data[i] - base) / base >= -0.005:
            y.append(label[2])
        else:
            y.append(label[3])
    x = np.asarray(x).reshape((-1, input_length, 1))  # for LSTM
    y = np.asarray(y).reshape((-1, 4))
    return x, y

def sign_accuracy(y_true, y_pred):
    return K.equal(K.sign(K.get_value(K.argmax(y_true) - 1.5)), K.sign(K.get_value(K.argmax(y_pred) - 1.5)))
    

# optimize
model = create_model()
train, _ = split_train_test(v)
x, y = create_data(train)
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.25, shuffle=True)
es_cb = EarlyStopping(monitor='val_loss', patience=0, verbose=0, mode='auto')
tb_cb = TensorBoard(log_dir="tflog/", histogram_freq=1)
model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=100, batch_size=batch_size, callbacks=[es_cb])
# model.save('model.h5')

Train on 35584 samples, validate on 11862 samples
Epoch 1/100




Epoch 2/100




Epoch 3/100


<tensorflow.python.keras.callbacks.History at 0x1577abf28>

In [123]:
# DNN (test)
_, test = split_train_test(v)
x_test, y_test = create_data(test)
pred = model.predict(x_test)
actual = test[input_length-1:-1]

# simulation
profit = 0.0  # 利益
position = 0  # -1 : 売りポジ, 0 : ポジなし, 1 : 買いポジ
p_price = 0.0  # ポジションの額
for i in range(len(pred)):
    if position < 0 and np.argmax(pred[i]) <= 1:  # 売りポジclose
        profit += (p_price - actual[i]) / actual[i]
        position = 0
        print ('{0} : CLOSE, profit={1}'.format(i, profit))
    if position > 0 and np.argmax(pred[i]) >= 2:  # 買いポジclose
        profit += (actual[i] - p_price) / p_price
        position = 0
        print ('{0} : CLOSE, profit={1}'.format(i, profit))
    if position == 0 and np.argmax(pred[i]) == 3:  # 売り
        p_price = actual[i]
        position = -1        
        print ('{0} : SELL'.format(i))
    if position == 0 and np.argmax(pred[i]) == 0:  # 買い
        p_price = actual[i]
        position = +1
        print ('{0} : BUY'.format(i))

382 : BUY
390 : CLOSE, profit=-0.02506865671641791
394 : BUY
395 : CLOSE, profit=-0.027426367823277092
395 : SELL
398 : CLOSE, profit=0.002871713826239769
398 : BUY
401 : CLOSE, profit=0.031706577604901934
454 : BUY
455 : CLOSE, profit=0.034176873311447004
455 : SELL
456 : CLOSE, profit=0.03554253374742277
456 : BUY
459 : CLOSE, profit=0.040950160734426345
472 : BUY
473 : CLOSE, profit=0.08676360611257761
830 : BUY
837 : CLOSE, profit=0.09880911323805319
851 : SELL
852 : CLOSE, profit=0.0984636906912873
873 : SELL
874 : CLOSE, profit=0.10055602563182905
874 : BUY
876 : CLOSE, profit=0.11440390761905728
883 : BUY
889 : CLOSE, profit=0.1231335372486869
912 : SELL
913 : CLOSE, profit=0.14097254010308186
913 : BUY
925 : CLOSE, profit=0.10982718998585467
925 : SELL
926 : CLOSE, profit=0.12948369405089533
926 : BUY
929 : CLOSE, profit=0.14492068592081403
929 : SELL
936 : CLOSE, profit=0.15001865176404053
952 : SELL
959 : CLOSE, profit=0.14624936838384447
962 : BUY
965 : CLOSE, profit=0.14244

In [119]:
# DNN (train all features)
from tensorflow.keras.layers import Input, Dense, LSTM, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
from tensorflow.keras import backend as K
from sklearn.preprocessing import MinMaxScaler

# optunaでハイパーパラメタチューニングしてもよい
input_length = 7
batch_size = 64

def create_model():
    inputs = Input(shape=(input_length, v_all.shape[1]))
    x = BatchNormalization()(inputs)
    x = LSTM(500, activation='relu')(x)
    outputs = Dense(1, activation='linear')(x)
    model = Model(inputs=inputs, outputs=outputs)
    model.compile(loss='mse', optimizer='adam', metrics=[sign_accuracy])
    return model

def create_data(data):
    x = []
    y = []
    for i in range(input_length, len(data)):
        x_tmp = deepcopy(data[i-input_length:i])
        x_tmp[:,:-1] = (x_tmp[:,:-1] - x_tmp[-1, 3]) / x_tmp[-1, 3]  # ohlc 騰落率
        x_tmp[:,-1] = (x_tmp[:,-1] - np.min(x_tmp[:,-1])) / (np.max(x_tmp[:,-1]) - np.min(x_tmp[:,-1]) + 0.001)  # volume 0-1scaling
        x.append(x_tmp)
        y.append((data[i,3] - data[i-1,3]) / data[i-1,3])
    x = np.asarray(x).reshape((-1, input_length, data.shape[1]))  # for LSTM
    y = np.asarray(y).reshape((-1, 1))
    return x, y

def sign_accuracy(y_true, y_pred):
    return K.equal(K.sign(y_true), K.sign(y_pred))
    

# optimize
model = create_model()
train, _ = split_train_test(v_all)
x, y = create_data(train)
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.25, shuffle=True)
es_cb = EarlyStopping(monitor='val_loss', patience=0, verbose=0, mode='auto')
tb_cb = TensorBoard(log_dir="tflog/", histogram_freq=1)
model.fit(x_train, y_train, validation_data=(x_val, y_val), epochs=100, batch_size=batch_size, callbacks=[es_cb])
# model.save('model.h5')

Train on 4444 samples, validate on 1482 samples
Epoch 1/100


KeyboardInterrupt: 