In [2]:
import numpy as np
import pandas as pd
import pybithumb
import os
import matplotlib.pyplot as plt
import warnings
from sklearn.preprocessing import MinMaxScaler
warnings.filterwarnings("ignore")
pd.set_option('display.max_rows', 500)

Scaler = MinMaxScaler()

dir = 'ohlcv/'
ohlcv_list = os.listdir(dir)


def min_max_scaler(price):
    Scaler = MinMaxScaler()
    Scaler.fit(price)

    return Scaler.transform(price)


def low_high(Coin, input_data_length, ip_limit=None, trade_limit=None):

    #   거래 제한은 고점과 저점을 분리한다.

    #   User-Agent Configuration
    #   IP - Change
    if ip_limit is None:
        ohlcv_excel = pybithumb.get_ohlcv(Coin, 'KRW', 'minute1')
    else:
        ohlcv_excel = pybithumb.get_ohlcv(Coin, 'KRW', 'minute1', 'proxyon')

    price_gap = ohlcv_excel.close.max() / ohlcv_excel.close.min()
    if (price_gap < 1.07) and (trade_limit is not None):
        return None, None

    obv = [0] * len(ohlcv_excel)
    for m in range(1, len(ohlcv_excel)):
        if ohlcv_excel['close'].iloc[m] > ohlcv_excel['close'].iloc[m - 1]:
            obv[m] = obv[m - 1] + ohlcv_excel['volume'].iloc[m]
        elif ohlcv_excel['close'].iloc[m] == ohlcv_excel['close'].iloc[m - 1]:
            obv[m] = obv[m - 1]
        else:
            obv[m] = obv[m - 1] - ohlcv_excel['volume'].iloc[m]
    ohlcv_excel['OBV'] = obv

    closeprice = ohlcv_excel['close'].iloc[-1]

    # ----------- dataX, dataY 추출하기 -----------#
    #   OBV :
    ohlcv_data = ohlcv_excel.values[1:].astype(np.float)

    # 결측 데이터 제외
    if len(ohlcv_data) != 0:

        #          데이터 전처리         #
        #   Fixed X_data    #
        price = ohlcv_data[:, :4]
        volume = ohlcv_data[:, [4]]
        OBV = ohlcv_data[:, [-1]]

        scaled_price = min_max_scaler(price)
        scaled_volume = min_max_scaler(volume)
        scaled_OBV = min_max_scaler(OBV)
        # print(scaled_MA60.shape)

        x = np.concatenate((scaled_price, scaled_volume, scaled_OBV), axis=1)  # axis=1, 세로로 합친다

        if (x[-1][1] > 0.3) and (trade_limit is not None):
            return None, None

        # print(x.shape)  # (258, 6)
        # quit()

        dataX = []  # input_data length 만큼 담을 dataX 그릇
        for i in range(input_data_length, len(ohlcv_data) + 1):  # 마지막 데이터까지 다 긇어모은다.
            group_x = x[i - input_data_length:i]
            dataX.append(group_x)  # dataX 리스트에 추가

        if (len(dataX) < 100) and (trade_limit is not None):
            return None, None

        X_test = np.array(dataX)
        row = X_test.shape[1]
        col = X_test.shape[2]

        X_test = X_test.astype('float32').reshape(-1, row, col, 1)

        return X_test, closeprice


def made_x(file, input_data_length, model_num, check_span, Range_fluc, get_fig):

    ohlcv_excel = pd.read_excel(dir + file, index_col=0)

    obv = [0] * len(ohlcv_excel)
    for m in range(1, len(ohlcv_excel)):
        if ohlcv_excel['close'].iloc[m] > ohlcv_excel['close'].iloc[m - 1]:
            obv[m] = obv[m - 1] + ohlcv_excel['volume'].iloc[m]
        elif ohlcv_excel['close'].iloc[m] == ohlcv_excel['close'].iloc[m - 1]:
            obv[m] = obv[m - 1]
        else:
            obv[m] = obv[m - 1] - ohlcv_excel['volume'].iloc[m]
    ohlcv_excel['OBV'] = obv

    #   이후 check_span 데이터와 현재 포인트를 비교해서 현재 포인트가 저가인지 고가인지 예측한다.
    #   진입, 저점, 고점, 거래 안함의 y_label 인 trade_state  >> [1, 2, 3, 0]
    #   저점과 고점은 최대 3개의 중복 값을 허용한다.
    trade_state = [np.NaN] * len(ohlcv_excel)
    for i in range(len(ohlcv_excel) - check_span):
        #   진입
        if ohlcv_excel['close'][i + 1:i + 1 + check_span].max() / ohlcv_excel['close'][i + 1:i + 1 + check_span].min() > Range_fluc:
            trade_state[i] = 1
            #   진입 & 저점 = 저점
            if ohlcv_excel['close'][i + 1:i + 1 + check_span].min() >= ohlcv_excel['close'][i]:
                if ohlcv_excel['close'][i:i + 1 + check_span].value_counts().sort_index().iloc[0] <= 3:
                    trade_state[i] = 2
        #   저점
        elif ohlcv_excel['close'][i + 1:i + 1 + check_span].min() >= ohlcv_excel['close'][i]:
            if ohlcv_excel['close'][i:i + 1 + check_span].value_counts().sort_index().iloc[0] <= 3:
                trade_state[i] = 2
            else:
                trade_state[i] = 0
        #   고점
        elif ohlcv_excel['close'][i + 1:i + 1 + check_span].max() <= ohlcv_excel['close'][i]:
            if ohlcv_excel['close'][i:i + 1 + check_span].value_counts().sort_index().iloc[-1] <= 3:
                trade_state[i] = 3
            else:
                trade_state[i] = 0
        #   거래 안함
        else:
            trade_state[i] = 0

    ohlcv_excel['trade_state'] = trade_state

    # ----------- dataX, dataY 추출하기 -----------#
    # print(ohlcv_excel.info())
    # ohlcv_excel.to_excel('test.xlsx')
    # quit()

    # NaN 제외하고 데이터 자르기 (데이터가 PIXEL 로 들어간다고 생각하면 된다)
    #   OBV : -CHECK_SPAN
    ohlcv_data = ohlcv_excel.values[1: -check_span].astype(np.float)
    # print(pd.DataFrame(ohlcv_data).info())
    # print(pd.DataFrame(ohlcv_data).to_excel('test.xlsx'))
    # print(list(map(float, ohlcv_data[0])))
    # quit()

    # 결측 데이터 제외
    if len(ohlcv_data) != 0:

        #          데이터 전처리         #
        #   Fixed X_data    #
        price = ohlcv_data[:, :4]
        volume = ohlcv_data[:, [4]]
        OBV = ohlcv_data[:, [-2]]

        #   Flexible Y_data    #
        trade_state = ohlcv_data[:, [-1]]

        scaled_price = min_max_scaler(price)
        scaled_volume = min_max_scaler(volume)
        scaled_OBV = min_max_scaler(OBV)

        x = np.concatenate((scaled_price, scaled_volume, scaled_OBV), axis=1)  # axis=1, 세로로 합친다
        y = trade_state
        # print(x.shape, y_low.shape)  # (258, 6) (258, 1)
        # quit()

        dataX = []  # input_data length 만큼 담을 dataX 그릇
        dataY = []  # Target 을 담을 그릇

        for i in range(input_data_length, len(ohlcv_data)):
            # group_x >> 이전 완성된 데이터를 사용해보도록 한다. (진입하는 시점은 데이터가 완성되어있지 않으니까)
            group_x = x[i - input_data_length: i]  # group_y 보다 1개 이전 데이터
            group_y = y[i]
            # print(group_x.shape)  # (28, 6)
            # print(group_y.shape)  # (1,)
            # quit()
            # if i == len(y) - 1:
            #     # print(group_x, "->", group_y)
            #     print(group_x[-1])
            #     print(x[i - 1])
            #     quit()
            dataX.append(group_x)  # dataX 리스트에 추가
            dataY.append(group_y)

        if len(dataX) < 100:
            return None

        #       Exstracting fiexd X_data       #
        sliced_ohlcv = ohlcv_data[input_data_length:, :6]

        #                      Get Figure                     #
        if get_fig == 1:
            spanlist = []
            spanlist_low = []
            spanlist_high = []

            for m in range(len(trade_state)):
                if (trade_state[m] > 0.5) and (trade_state[m] < 1.5):
                    if m + 1 < len(trade_state):
                        spanlist.append((m, m + 1))
                    else:
                        spanlist.append((m - 1, m))

            for m in range(len(trade_state)):
                if (trade_state[m] > 1.5) and (trade_state[m] < 2.5):
                    if m + 1 < len(trade_state):
                        spanlist_low.append((m, m + 1))
                    else:
                        spanlist_low.append((m - 1, m))

            for m in range(len(trade_state)):
                if (trade_state[m] > 2.5) and (trade_state[m] < 3.5):
                    if m + 1 < len(trade_state):
                        spanlist_high.append((m, m + 1))
                    else:
                        spanlist_high.append((m - 1, m))

            # ----------- 인덱스 초기화 됨 -----------#

            # ----------- 공통된 Chart 그리기 -----------#

            plt.subplot(311)
            plt.plot(min_max_scaler(ohlcv_data[:, 1:2]), 'r', label='close')
            plt.plot(scaled_OBV, 'b', label='OBV')
            plt.legend(loc='upper right')
            for i in range(len(spanlist)):
                plt.axvspan(spanlist[i][0], spanlist[i][1], facecolor='g', alpha=0.5)

            plt.subplot(312)
            plt.plot(min_max_scaler(ohlcv_data[:, 1:2]), 'r', label='close')
            plt.plot(scaled_OBV, 'b', label='OBV')
            plt.legend(loc='upper right')
            for i in range(len(spanlist_low)):
                plt.axvspan(spanlist_low[i][0], spanlist_low[i][1], facecolor='m', alpha=0.5)

            plt.subplot(313)
            plt.plot(min_max_scaler(ohlcv_data[:, 1:2]), 'r', label='close')
            plt.plot(scaled_OBV, 'b', label='OBV')
            plt.legend(loc='upper right')
            for i in range(len(spanlist_high)):
                plt.axvspan(spanlist_high[i][0], spanlist_high[i][1], facecolor='c', alpha=0.5)

            Date = file.split()[0]
            Coin = file.split()[1].split('.')[0]
            plt.savefig('./Figure_data/%s_%s/%s %s.png' % (input_data_length, model_num, Date, Coin), dpi=500)
            plt.close()
            # plt.show()
            # ----------- Chart 그리기 -----------#

        return dataX, dataY, sliced_ohlcv


if __name__ == '__main__':

    # ----------- Params -----------#
    input_data_length = 54
    model_num = input('Press model number : ')

    #       Make folder      #
    try:
        os.mkdir('./Figure_data/%s_%s/' % (input_data_length, model_num))

    except Exception as e:
        pass
    check_span = 30
    Range_fluc = 1.035
    get_fig = 0

    Made_X = []
    Made_Y = []

    for file in ohlcv_list:

        if int(file.split()[0].split('-')[1]) != 1:
            continue

        # file = '2019-10-27 LAMB ohlcv.xlsx'

        result = made_x(file, input_data_length, model_num, check_span, Range_fluc, get_fig)
        # result = low_high('FX', input_data_length)
        # quit()

        # ------------ 데이터가 있으면 dataX, dataY 병합하기 ------------#
        if result is not None:

            Made_X += result[0]
            Made_Y += result[1]

            # 누적 데이터량 표시
            print(file, len(Made_X))

    # SAVING X, Y
    X = np.array(Made_X)
    Y = np.array(Made_Y)

    np.save('./Made_X/Made_X %s_%s' % (input_data_length, model_num), X)
    np.save('./Made_X/Made_Y %s_%s' % (input_data_length, model_num), Y)



Press model number : 20
2020-01-16 XLM ohlcv.xlsx 1005
2020-01-23 TRX ohlcv.xlsx 1730
2020-01-16 SNT ohlcv.xlsx 1947
2020-01-11 FX ohlcv.xlsx 2265
2020-01-22 LUNA ohlcv.xlsx 3589
2020-01-16 LINK ohlcv.xlsx 4585
2020-01-13 FCT ohlcv.xlsx 4792
2020-01-20 IPX ohlcv.xlsx 6146
2020-01-20 SNT ohlcv.xlsx 6398
2020-01-21 EOS ohlcv.xlsx 7560
2020-01-24 FAB ohlcv.xlsx 7727
2020-01-22 TRV ohlcv.xlsx 7918
2020-01-22 MTL ohlcv.xlsx 8295
2020-01-22 BTG ohlcv.xlsx 9304
2020-01-13 DAC ohlcv.xlsx 10576
2020-01-25 REP ohlcv.xlsx 10890
2020-01-11 APIS ohlcv.xlsx 11120
2020-01-26 XRP ohlcv.xlsx 12188
2020-01-13 APIS ohlcv.xlsx 12349
2020-01-16 ETH ohlcv.xlsx 13676
2020-01-22 STRAT ohlcv.xlsx 13821
2020-01-12 BCH ohlcv.xlsx 14959
2020-01-22 ZRX ohlcv.xlsx 15080
2020-01-11 ETH ohlcv.xlsx 16148
2020-01-26 TMTG ohlcv.xlsx 17328
2020-01-21 MXC ohlcv.xlsx 17541
2020-01-16 POWR ohlcv.xlsx 17710
2020-01-16 BCD ohlcv.xlsx 18890
2020-01-12 BCD ohlcv.xlsx 19596
2020-01-23 APIS ohlcv.xlsx 19735
2020-01-20 BSV ohlcv.x

2020-01-13 FX ohlcv.xlsx 152812
2020-01-25 FCT ohlcv.xlsx 153518
2020-01-15 ADA ohlcv.xlsx 153953
2020-01-15 XMR ohlcv.xlsx 154134
2020-01-11 SOC ohlcv.xlsx 155170
2020-01-19 XVG ohlcv.xlsx 155304
2020-01-12 STEEM ohlcv.xlsx 155499
2020-01-22 FAB ohlcv.xlsx 155972
2020-01-20 OMG ohlcv.xlsx 156148
2020-01-12 REP ohlcv.xlsx 156457
2020-01-15 OGO ohlcv.xlsx 156791
2020-01-14 LAMB ohlcv.xlsx 156904
2020-01-13 MIX ohlcv.xlsx 157067
2020-01-15 WAVES ohlcv.xlsx 157202
2020-01-25 DAC ohlcv.xlsx 158553
2020-01-12 HC ohlcv.xlsx 158660
2020-01-13 ADA ohlcv.xlsx 158927
2020-01-16 LAMB ohlcv.xlsx 159381
2020-01-12 FCT ohlcv.xlsx 159801
2020-01-15 ARN ohlcv.xlsx 160332
2020-01-24 BCH ohlcv.xlsx 161252
2020-01-13 ETC ohlcv.xlsx 161772
2020-01-25 LUNA ohlcv.xlsx 163052
2020-01-15 LINK ohlcv.xlsx 163715
2020-01-12 BTC ohlcv.xlsx 165062
2020-01-16 DAD ohlcv.xlsx 165605
2020-01-24 INS ohlcv.xlsx 165753
2020-01-16 RNT ohlcv.xlsx 165887
2020-01-25 LTC ohlcv.xlsx 166468
2020-01-25 APIS ohlcv.xlsx 166644
202

2020-01-22 XRP ohlcv.xlsx 294227
2020-01-20 IOST ohlcv.xlsx 294366
2020-01-25 STRAT ohlcv.xlsx 294530
2020-01-25 ADA ohlcv.xlsx 294926
2020-01-26 XSR ohlcv.xlsx 295615
2020-01-22 ADA ohlcv.xlsx 296299
2020-01-19 BTT ohlcv.xlsx 296744
2020-01-16 OCN ohlcv.xlsx 297020
2020-01-20 BTG ohlcv.xlsx 298098
2020-01-13 PLY ohlcv.xlsx 299000
2020-01-24 BSV ohlcv.xlsx 300253
2020-01-20 INS ohlcv.xlsx 300791
2020-01-20 XMR ohlcv.xlsx 300896
2020-01-19 WAVES ohlcv.xlsx 301115
2020-01-26 XLM ohlcv.xlsx 301371
2020-01-11 PLY ohlcv.xlsx 301788
2020-01-14 CON ohlcv.xlsx 302661
2020-01-23 QTUM ohlcv.xlsx 303290
2020-01-16 DAC ohlcv.xlsx 304530
2020-01-23 XSR ohlcv.xlsx 305475
2020-01-15 EOS ohlcv.xlsx 306578
2020-01-24 DAD ohlcv.xlsx 307063
2020-01-20 KNC ohlcv.xlsx 307178
2020-01-20 APIS ohlcv.xlsx 307340
2020-01-16 XEM ohlcv.xlsx 307546
2020-01-12 OGO ohlcv.xlsx 307742
2020-01-26 TRX ohlcv.xlsx 308130
2020-01-19 APIS ohlcv.xlsx 308287
2020-01-12 FX ohlcv.xlsx 308397
2020-01-21 SOC ohlcv.xlsx 309698
202

2020-01-12 WAVES ohlcv.xlsx 423780
2020-01-22 HC ohlcv.xlsx 423890
2020-01-16 IPX ohlcv.xlsx 424902
2020-01-22 MBL ohlcv.xlsx 426244
2020-01-20 ETZ ohlcv.xlsx 426352
2020-01-15 WAXP ohlcv.xlsx 426584
2020-01-21 BCH ohlcv.xlsx 427690
2020-01-16 AE ohlcv.xlsx 428214
2020-01-25 AMO ohlcv.xlsx 428832
2020-01-26 WPX ohlcv.xlsx 429170
2020-01-11 QTUM ohlcv.xlsx 429611
2020-01-19 PIVX ohlcv.xlsx 429715
2020-01-16 THETA ohlcv.xlsx 429848
2020-01-23 BTG ohlcv.xlsx 430764
2020-01-16 ETC ohlcv.xlsx 431951
2020-01-12 PAY ohlcv.xlsx 432085
2020-01-16 ZIL ohlcv.xlsx 432467
2020-01-21 TRX ohlcv.xlsx 433286
2020-01-14 QTUM ohlcv.xlsx 434002
2020-01-24 MTL ohlcv.xlsx 434377
2020-01-12 RDN ohlcv.xlsx 434583
2020-01-14 BHP ohlcv.xlsx 435519
2020-01-20 FAB ohlcv.xlsx 436078
2020-01-13 CON ohlcv.xlsx 437050
2020-01-11 FNB ohlcv.xlsx 437615
2020-01-19 ELF ohlcv.xlsx 437814
2020-01-22 BTT ohlcv.xlsx 438000
2020-01-13 RDN ohlcv.xlsx 438173
2020-01-23 DASH ohlcv.xlsx 438353
2020-01-14 FCT ohlcv.xlsx 438543
202