In [1]:
import numpy as np
import pandas as pd
import pybithumb
import os
import matplotlib.pyplot as plt
import warnings
from sklearn.preprocessing import MinMaxScaler
warnings.filterwarnings("ignore")
pd.set_option('display.max_rows', 500)

Scaler = MinMaxScaler()

home_dir = os.path.expanduser('~')
dir = home_dir + '/OneDrive/CoinBot/ohlcv/'
ohlcv_list = os.listdir(dir)


def min_max_scaler(price):
    Scaler = MinMaxScaler()
    Scaler.fit(price)

    return Scaler.transform(price)


def low_high(Coin, input_data_length, trade_limit=0):

    #   거래 제한은 고점과 저점을 분리한다.

    #   User-Agent Configuration
    ohlcv_excel = pybithumb.get_ohlcv(Coin, 'KRW', 'minute1')

    price_gap = ohlcv_excel.close.max() / ohlcv_excel.close.min()
    if (price_gap < 1.07) and (trade_limit == 1):
        return None, None

    obv = [0] * len(ohlcv_excel)
    for m in range(1, len(ohlcv_excel)):
        if ohlcv_excel['close'].iloc[m] > ohlcv_excel['close'].iloc[m - 1]:
            obv[m] = obv[m - 1] + ohlcv_excel['volume'].iloc[m]
        elif ohlcv_excel['close'].iloc[m] == ohlcv_excel['close'].iloc[m - 1]:
            obv[m] = obv[m - 1]
        else:
            obv[m] = obv[m - 1] - ohlcv_excel['volume'].iloc[m]
    ohlcv_excel['OBV'] = obv

    closeprice = ohlcv_excel['close'].iloc[-1]

    # ----------- dataX, dataY 추출하기 -----------#
    #   OBV :
    ohlcv_data = ohlcv_excel.values[1:].astype(np.float)

    # 결측 데이터 제외
    if len(ohlcv_data) != 0:

        #          데이터 전처리         #
        #   Fixed X_data    #
        price = ohlcv_data[:, :4]
        volume = ohlcv_data[:, [4]]
        OBV = ohlcv_data[:, [-1]]

        scaled_price = min_max_scaler(price)
        scaled_volume = min_max_scaler(volume)
        scaled_OBV = min_max_scaler(OBV)
        # print(scaled_MA60.shape)

        x = np.concatenate((scaled_price, scaled_volume, scaled_OBV), axis=1)  # axis=1, 세로로 합친다

        if (x[-1][1] > 0.3) and (trade_limit == 1):
            return None, None

        # print(x.shape)  # (258, 6)
        # quit()

        dataX = []  # input_data length 만큼 담을 dataX 그릇
        for i in range(input_data_length, len(ohlcv_data) + 1):  # 마지막 데이터까지 다 긇어모은다.
            group_x = x[i - input_data_length:i]
            dataX.append(group_x)  # dataX 리스트에 추가

        if (len(dataX) < 100) and (trade_limit == 1):
            return None, None

        X_test = np.array(dataX)
        row = X_test.shape[1]
        col = X_test.shape[2]

        X_test = X_test.astype('float32').reshape(-1, row, col, 1)

        return X_test, closeprice


def made_x(file, input_data_length, model_num, check_span, get_fig):

    ohlcv_excel = pd.read_excel(dir + file, index_col=0)

    obv = [0] * len(ohlcv_excel)
    for m in range(1, len(ohlcv_excel)):
        if ohlcv_excel['close'].iloc[m] > ohlcv_excel['close'].iloc[m - 1]:
            obv[m] = obv[m - 1] + ohlcv_excel['volume'].iloc[m]
        elif ohlcv_excel['close'].iloc[m] == ohlcv_excel['close'].iloc[m - 1]:
            obv[m] = obv[m - 1]
        else:
            obv[m] = obv[m - 1] - ohlcv_excel['volume'].iloc[m]
    ohlcv_excel['OBV'] = obv

    #   이후 check_span 데이터와 현재 포인트를 비교해서 현재 포인트가 저가인지 고가인지 예측한다.
    #   최대 3개의 중복 값을 허용한다.
    #   고저점을 잡아주는 함수 구현
    list_low_check = [np.NaN] * len(ohlcv_excel)
    list_high_check = [np.NaN] * len(ohlcv_excel)
    for i in range(len(ohlcv_excel) - check_span):
        if ohlcv_excel['close'][i + 1:i + 1 + check_span].min() >= ohlcv_excel['close'][i]:
            if ohlcv_excel['close'][i:i + 1 + check_span].value_counts().sort_index().iloc[0] <= 3:
                list_low_check[i] = 1
            else:
                list_low_check[i] = 0
        else:
            list_low_check[i] = 0

        if ohlcv_excel['close'][i + 1:i + 1 + check_span].max() <= ohlcv_excel['close'][i]:
            if ohlcv_excel['close'][i:i + 1 + check_span].value_counts().sort_index().iloc[-1] <= 3:
                list_high_check[i] = 1
            else:
                list_high_check[i] = 0
        else:
            list_high_check[i] = 0

    ohlcv_excel['low_check'] = list_low_check
    ohlcv_excel['high_check'] = list_high_check

    # ----------- dataX, dataY 추출하기 -----------#
    # print(ohlcv_excel.info())
    # ohlcv_excel.to_excel('test.xlsx')
    # quit()

    # NaN 제외하고 데이터 자르기 (데이터가 PIXEL 로 들어간다고 생각하면 된다)
    #   OBV : -CHECK_SPAN
    ohlcv_data = ohlcv_excel.values[1: -check_span].astype(np.float)
    # print(pd.DataFrame(ohlcv_data).info())
    # print(pd.DataFrame(ohlcv_data).to_excel('test.xlsx'))
    # print(list(map(float, ohlcv_data[0])))
    # quit()

    # 결측 데이터 제외
    if len(ohlcv_data) != 0:

        #          데이터 전처리         #
        #   Fixed X_data    #
        price = ohlcv_data[:, :4]
        volume = ohlcv_data[:, [4]]
        OBV = ohlcv_data[:, [-3]]

        #   Flexible Y_data    #
        low_check = ohlcv_data[:, [-2]]
        high_check = ohlcv_data[:, [-1]]

        scaled_price = min_max_scaler(price)
        scaled_volume = min_max_scaler(volume)
        scaled_OBV = min_max_scaler(OBV)

        x = np.concatenate((scaled_price, scaled_volume, scaled_OBV), axis=1)  # axis=1, 세로로 합친다
        y_low = low_check
        y_high = high_check
        # print(x.shape, y_low.shape)  # (258, 6) (258, 1)
        # quit()

        dataX = []  # input_data length 만큼 담을 dataX 그릇
        dataY_low = []  # Target 을 담을 그릇
        dataY_high = []  # Target 을 담을 그릇

        for i in range(input_data_length, len(ohlcv_data)):
            # group_x >> 이전 완성된 데이터를 사용해보도록 한다. (진입하는 시점은 데이터가 완성되어있지 않으니까)
            group_x = x[i - input_data_length: i]  # group_y 보다 1개 이전 데이터
            group_y_low = y_low[i]
            group_y_high = y_high[i]
            # print(group_x.shape)  # (28, 6)
            # print(group_y.shape)  # (1,)
            # quit()
            # if i == len(y) - 1:
            #     # print(group_x, "->", group_y)
            #     print(group_x[-1])
            #     print(x[i - 1])
            #     quit()
            dataX.append(group_x)  # dataX 리스트에 추가
            dataY_low.append(group_y_low)  # dataY 리스트에 추가
            dataY_high.append(group_y_high)  # dataY 리스트에 추가

        if len(dataX) < 100:
            return None

        #       Exstracting fiexd X_data       #
        sliced_ohlcv = ohlcv_data[input_data_length:, :6]

        #                      Get Figure                     #
        if get_fig == 1:
            spanlist_low = []
            spanlist_high = []

            for m in range(len(low_check)):
                if low_check[m] > 0.5:
                    if m + 1 < len(low_check):
                        spanlist_low.append((m, m + 1))
                    else:
                        spanlist_low.append((m - 1, m))

            for m in range(len(high_check)):
                if high_check[m] > 0.5:
                    if m + 1 < len(high_check):
                        spanlist_high.append((m, m + 1))
                    else:
                        spanlist_high.append((m - 1, m))

            # ----------- 인덱스 초기화 됨 -----------#

            # ----------- 공통된 Chart 그리기 -----------#

            plt.subplot(211)
            plt.plot(min_max_scaler(ohlcv_data[:, 1:2]), 'r', label='close')
            plt.plot(scaled_OBV, 'b', label='MA60')
            plt.legend(loc='upper right')
            for i in range(len(spanlist_low)):
                plt.axvspan(spanlist_low[i][0], spanlist_low[i][1], facecolor='m', alpha=0.5)

            plt.subplot(212)
            plt.plot(min_max_scaler(ohlcv_data[:, 1:2]), 'r', label='close')
            plt.plot(scaled_OBV, 'b', label='MA60')
            plt.legend(loc='upper right')
            for i in range(len(spanlist_high)):
                plt.axvspan(spanlist_high[i][0], spanlist_high[i][1], facecolor='c', alpha=0.5)

            Date = file.split()[0]
            Coin = file.split()[1].split('.')[0]
            plt.savefig('./Figure_data/%s_%s/%s %s.png' % (input_data_length, model_num, Date, Coin), dpi=500)
            plt.close()
            # plt.show()
            # ----------- Chart 그리기 -----------#

        return dataX, dataY_low, dataY_high, sliced_ohlcv


if __name__ == '__main__':

    # ----------- Params -----------#
    input_data_length = 54
    model_num = input('Press model number : ')

    #       Make folder      #
    try:
        os.mkdir('./Figure_data/%s_%s/' % (input_data_length, model_num))

    except Exception as e:
        pass
    check_span = 30
    get_fig = 0

    Made_X = []
    Made_Y = []
    Made_Y_low = []
    Made_Y_high = []

    for file in ohlcv_list:

        if int(file.split()[0].split('-')[1]) == 1:
            continue

        # file = '2019-10-27 LAMB ohlcv.xlsx'

        result = made_x(file, input_data_length, model_num, check_span, get_fig)
        # result = low_high('FX', input_data_length)
        # quit()

        # ------------ 데이터가 있으면 dataX, dataY 병합하기 ------------#
        if result is not None:

            Made_X += result[0]
            Made_Y_low += result[1]
            Made_Y_high += result[2]

            # 누적 데이터량 표시
            print(file, len(Made_X))

    # SAVING X, Y
    X = np.array(Made_X)
    Y_low = np.array(Made_Y_low)
    Y_high = np.array(Made_Y_high)

    # np.save('./Made_X/Made_X %s_%s' % (input_data_length, model_num), X)
    # np.save('./Made_X_low/Made_Y %s_%s' % (input_data_length, model_num), Y_low)
    # np.save('./Made_X_high/Made_Y %s_%s' % (input_data_length, model_num), Y_high)



Press model number : 8
2019-10-25 FAB ohlcv.xlsx 244
2019-11-02 PAY ohlcv.xlsx 395
2019-11-13 DVP ohlcv.xlsx 538
2019-11-02 IOST ohlcv.xlsx 712
2019-10-28 FNB ohlcv.xlsx 1692
2019-11-14 QTUM ohlcv.xlsx 2260
2019-11-20 LBA ohlcv.xlsx 2821
2019-10-30 LTC ohlcv.xlsx 3545
2019-10-31 FX ohlcv.xlsx 3913
2019-10-16 STRAT ohlcv.xlsx 4168
2019-10-26 WTC ohlcv.xlsx 4460
2019-10-31 ICX ohlcv.xlsx 4584
2019-10-10 CHR ohlcv.xlsx 4781
2019-11-15 CTXC ohlcv.xlsx 5842
2019-11-01 FX ohlcv.xlsx 5944
2019-11-02 CMT ohlcv.xlsx 6333
2019-10-26 PLY ohlcv.xlsx 7276
2019-10-26 BCH ohlcv.xlsx 8524
2019-10-22 BSV ohlcv.xlsx 9519
2019-10-25 GXC ohlcv.xlsx 9707
2019-11-02 LOOM ohlcv.xlsx 9862
2019-11-20 CTXC ohlcv.xlsx 10542
2019-10-31 BHP ohlcv.xlsx 11224
2019-10-30 RNT ohlcv.xlsx 11327
2019-10-26 LTC ohlcv.xlsx 12178
2019-11-24 VET ohlcv.xlsx 12490
2019-10-20 ZRX ohlcv.xlsx 12972
2019-11-15 ITC ohlcv.xlsx 13704
2019-10-29 LBA ohlcv.xlsx 14192
2019-10-21 BTC ohlcv.xlsx 15495
2019-10-31 BTC ohlcv.xlsx 16801
2019-

2019-11-14 LINK ohlcv.xlsx 114469
2019-10-19 PLY ohlcv.xlsx 115128
2019-11-01 CON ohlcv.xlsx 116147
2019-11-13 WET ohlcv.xlsx 116338
2019-11-18 IOST ohlcv.xlsx 117066
2019-10-12 VALOR ohlcv.xlsx 117171
2019-10-27 WET ohlcv.xlsx 117309
2019-10-21 OGO ohlcv.xlsx 117684
2019-10-18 HYC ohlcv.xlsx 117816
2019-10-12 FAB ohlcv.xlsx 118096
2019-10-16 FX ohlcv.xlsx 118224
2019-10-24 FNB ohlcv.xlsx 119352
2019-11-21 CON ohlcv.xlsx 120408
2019-10-26 LINK ohlcv.xlsx 121150
2019-11-20 ZIL ohlcv.xlsx 121786
2019-11-14 OGO ohlcv.xlsx 121988
2019-10-30 OCN ohlcv.xlsx 122386
2019-10-21 BSV ohlcv.xlsx 123155
2019-10-16 DAC ohlcv.xlsx 123906
2019-10-26 WET ohlcv.xlsx 124130
2019-10-25 LINK ohlcv.xlsx 124788
2019-11-01 POWR ohlcv.xlsx 124907
2019-10-29 HDAC ohlcv.xlsx 125188
2019-10-25 MTL ohlcv.xlsx 125684
2019-11-01 BCD ohlcv.xlsx 125794
2019-10-31 TRUE ohlcv.xlsx 125909
2019-11-02 DAC ohlcv.xlsx 126733
2019-11-01 DAC ohlcv.xlsx 127692
2019-10-26 ADA ohlcv.xlsx 128547
2019-10-24 BSV ohlcv.xlsx 129278
20

2019-10-30 EOS ohlcv.xlsx 228940
2019-10-19 LAMB ohlcv.xlsx 229242
2019-10-11 STEEM ohlcv.xlsx 229515
2019-10-31 MTL ohlcv.xlsx 230199
2019-10-28 DVP ohlcv.xlsx 230898
2019-10-26 LBA ohlcv.xlsx 231115
2019-10-26 PIVX ohlcv.xlsx 231598
2019-10-29 SNT ohlcv.xlsx 231698
2019-10-29 CMT ohlcv.xlsx 231949
2019-10-26 BAT ohlcv.xlsx 232184
2019-10-26 GXC ohlcv.xlsx 232375
2019-10-30 MCO ohlcv.xlsx 232476
2019-10-31 PIVX ohlcv.xlsx 232592
2019-10-30 OMG ohlcv.xlsx 232983
2019-10-28 POWR ohlcv.xlsx 233275
2019-10-28 ETC ohlcv.xlsx 233893
2019-10-29 LRC ohlcv.xlsx 234027
2019-10-27 PLY ohlcv.xlsx 234644
2019-10-26 THETA ohlcv.xlsx 234799
2019-10-26 STRAT ohlcv.xlsx 234946
2019-10-16 BTG ohlcv.xlsx 235120
2019-11-02 PIVX ohlcv.xlsx 235269
2019-10-31 DAD ohlcv.xlsx 235836
2019-10-26 POLY ohlcv.xlsx 235962
2019-10-26 EOS ohlcv.xlsx 237217
2019-10-10 ZRX ohlcv.xlsx 237678
2019-11-01 BAT ohlcv.xlsx 237780
2019-10-18 FAB ohlcv.xlsx 238011
2019-11-21 CTXC ohlcv.xlsx 238919
2019-10-22 ENJ ohlcv.xlsx 2390