In [3]:
import numpy as np
import pandas as pd
import pybithumb
import os
import matplotlib.pyplot as plt
import warnings
from sklearn.preprocessing import MinMaxScaler
warnings.filterwarnings("ignore")
pd.set_option('display.max_rows', 500)

Scaler = MinMaxScaler()

dir = 'ohlcv/'
ohlcv_list = os.listdir(dir)


def min_max_scaler(price):
    Scaler = MinMaxScaler()
    Scaler.fit(price)

    return Scaler.transform(price)


def low_high(Coin, input_data_length, ip_limit=None, trade_limit=None):

    #   거래 제한은 고점과 저점을 분리한다.

    #   User-Agent Configuration
    #   IP - Change
    if ip_limit is None:
        ohlcv_excel = pybithumb.get_ohlcv(Coin, 'KRW', 'minute1')
    else:
        ohlcv_excel = pybithumb.get_ohlcv(Coin, 'KRW', 'minute1', 'proxyon')

    price_gap = ohlcv_excel.close.max() / ohlcv_excel.close.min()
    if (price_gap < 1.07) and (trade_limit is not None):
        return None, None

    obv = [0] * len(ohlcv_excel)
    for m in range(1, len(ohlcv_excel)):
        if ohlcv_excel['close'].iloc[m] > ohlcv_excel['close'].iloc[m - 1]:
            obv[m] = obv[m - 1] + ohlcv_excel['volume'].iloc[m]
        elif ohlcv_excel['close'].iloc[m] == ohlcv_excel['close'].iloc[m - 1]:
            obv[m] = obv[m - 1]
        else:
            obv[m] = obv[m - 1] - ohlcv_excel['volume'].iloc[m]
    ohlcv_excel['OBV'] = obv

    closeprice = ohlcv_excel['close'].iloc[-1]

    # ----------- dataX, dataY 추출하기 -----------#
    #   OBV :
    ohlcv_data = ohlcv_excel.values[1:].astype(np.float)

    # 결측 데이터 제외
    if len(ohlcv_data) != 0:

        #          데이터 전처리         #
        #   Fixed X_data    #
        price = ohlcv_data[:, :4]
        volume = ohlcv_data[:, [4]]
        OBV = ohlcv_data[:, [-1]]

        scaled_price = min_max_scaler(price)
        scaled_volume = min_max_scaler(volume)
        scaled_OBV = min_max_scaler(OBV)
        # print(scaled_MA60.shape)

        x = np.concatenate((scaled_price, scaled_volume, scaled_OBV), axis=1)  # axis=1, 세로로 합친다

        if (x[-1][1] > 0.3) and (trade_limit is not None):
            return None, None

        # print(x.shape)  # (258, 6)
        # quit()

        dataX = []  # input_data length 만큼 담을 dataX 그릇
        for i in range(input_data_length, len(ohlcv_data) + 1):  # 마지막 데이터까지 다 긇어모은다.
            group_x = x[i - input_data_length:i]
            dataX.append(group_x)  # dataX 리스트에 추가

        if (len(dataX) < 100) and (trade_limit is not None):
            return None, None

        X_test = np.array(dataX)
        row = X_test.shape[1]
        col = X_test.shape[2]

        X_test = X_test.astype('float32').reshape(-1, row, col, 1)

        return X_test, closeprice


def made_x(file, input_data_length, model_num, ascend_gap, check_span, get_fig):

    ohlcv_excel = pd.read_excel(dir + file, index_col=0)

    # ----------- dataX, dataY 추출하기 -----------#
    # print(ohlcv_excel.info())
    # ohlcv_excel.to_excel('test.xlsx')
    # quit()

    # NaN 제외하고 데이터 자르기 (데이터가 PIXEL 로 들어간다고 생각하면 된다)
    #   OBV : -CHECK_SPAN
    ohlcv_data = ohlcv_excel.values[:].astype(np.float)
    # ohlcv_data = ohlcv_excel.values[1: -check_span].astype(np.float)
    # print(pd.DataFrame(ohlcv_data).info())
    # print(pd.DataFrame(ohlcv_data).to_excel('test.xlsx'))
    # print(list(map(float, ohlcv_data[0])))
    # quit()

    # 결측 데이터 제외
    if len(ohlcv_data) != 0:

        #          데이터 전처리         #
        #   Fixed X_data    #
        price = ohlcv_data[:, :4]
        # volume = ohlcv_data[:, [4]]
        # OBV = ohlcv_data[:, [-2]]

        scaled_price = min_max_scaler(price)
        # scaled_volume = min_max_scaler(volume)
        # scaled_OBV = min_max_scaler(OBV)

        closeprice = scaled_price[:, [1]]
        trade_state = [np.NaN] * len(ohlcv_data)
        for i in range(check_span, len(ohlcv_data) - check_span):
            #   저점이면서 급상승을 예측하는
            if closeprice[i + 1:i + 1 + check_span].min() >= closeprice[i]:
                if closeprice[i + 1:i + 1 + check_span].max() - closeprice[i] >= ascend_gap:
                    trade_state[i] = 1
                else:
                    trade_state[i] = 0
            #   급상승된 차트의 꼭대기 부근
            elif closeprice[i + 1:i + 1 + check_span].max() <= closeprice[i]:
                if closeprice[i] - closeprice[i - check_span:i].min() >= ascend_gap:
                    trade_state[i] = 2
                else:
                    trade_state[i] = 0
            #   거래 안함
            else:
                trade_state[i] = 0

        #   Flexible Y_data    #
        # print(trade_state)
        trade_state = np.array(trade_state).reshape(-1, 1).astype(np.float)
        # print(len(ohlcv_data) - np.isnan(trade_state).sum())
        # quit()

        # x = np.concatenate((scaled_price, scaled_volume, scaled_OBV), axis=1)  # axis=1, 세로로 합친다
        x = scaled_price[check_span:len(ohlcv_data) - check_span]
        y = trade_state[check_span:len(ohlcv_data) - check_span]
        # print(len(y))
        # quit()
        # print(x.shape, y_low.shape)  # (258, 6) (258, 1)
        # quit()

        dataX = []  # input_data length 만큼 담을 dataX 그릇
        dataY = []  # Target 을 담을 그릇

        for i in range(input_data_length, len(y)):
            # group_x >> 이전 완성된 데이터를 사용해보도록 한다. (진입하는 시점은 데이터가 완성되어있지 않으니까)
            group_x = x[i - input_data_length: i]  # group_y 보다 1개 이전 데이터
            group_y = y[i]
            # print(group_x.shape)  # (28, 6)
            # print(group_y.shape)  # (1,)
            # quit()
            # if i == len(y) - 1:
            #     # print(group_x, "->", group_y)
            #     print(group_x[-1])
            #     print(x[i - 1])
            #     quit()
            dataX.append(group_x)  # dataX 리스트에 추가
            dataY.append(group_y)

        if len(dataX) < 100:
            return None

        #       Exstracting fiexd X_data       #
        sliced_ohlcv = ohlcv_data[input_data_length:, :6]

        #                      Get Figure                     #
        if get_fig == 1:
            spanlist_low = []
            spanlist_high = []

            for m in range(len(trade_state)):
                if (trade_state[m] > 0.5) and (trade_state[m] < 1.5):
                    if m + 1 < len(trade_state):
                        spanlist_low.append((m, m + 1))
                    else:
                        spanlist_low.append((m - 1, m))

            for m in range(len(trade_state)):
                if (trade_state[m] > 1.5) and (trade_state[m] < 2.5):
                    if m + 1 < len(trade_state):
                        spanlist_high.append((m, m + 1))
                    else:
                        spanlist_high.append((m - 1, m))

            # ----------- 인덱스 초기화 됨 -----------#

            # ----------- 공통된 Chart 그리기 -----------#

            plt.subplot(211)
            plt.plot(min_max_scaler(ohlcv_data[:, 1:2]), 'gold', label='close')
            # plt.plot(scaled_OBV, 'b', label='OBV')
            plt.legend(loc='upper right')
            for i in range(len(spanlist_low)):
                plt.axvspan(spanlist_low[i][0], spanlist_low[i][1], facecolor='c', alpha=0.7)

            plt.subplot(212)
            plt.plot(min_max_scaler(ohlcv_data[:, 1:2]), 'gold', label='close')
            # plt.plot(scaled_OBV, 'b', label='OBV')
            plt.legend(loc='upper right')
            for i in range(len(spanlist_high)):
                plt.axvspan(spanlist_high[i][0], spanlist_high[i][1], facecolor='m', alpha=0.7)

            Date = file.split()[0]
            Coin = file.split()[1].split('.')[0]
            plt.savefig('./Figure_data/%s_%s/%s %s.png' % (input_data_length, model_num, Date, Coin), dpi=500)
            plt.close()
            # plt.show()
            # ----------- Chart 그리기 -----------#

        return dataX, dataY, sliced_ohlcv


if __name__ == '__main__':

    # ----------- Params -----------#
    input_data_length = 54
    model_num = 35
    ascend_gap = 0.5
    check_span = 50
    get_fig = 0

    #       Make folder      #
    try:
        os.mkdir('./Figure_data/%s_%s/' % (input_data_length, model_num))

    except Exception as e:
        pass

    Made_X = []
    Made_Y = []
    # ohlcv_list = ['2020-01-12 ADA ohlcv.xlsx']

    for file in ohlcv_list:
        
        try:
            if int(file.split()[0].split('-')[1]) != 1:
                continue

            result = made_x(file, input_data_length, model_num, ascend_gap, check_span, get_fig)
            # result = low_high('FX', input_data_length)
            # quit()

            # ------------ 데이터가 있으면 dataX, dataY 병합하기 ------------#
            if result is not None:

                Made_X += result[0]
                Made_Y += result[1]

                # 누적 데이터량 표시
                print(file, len(Made_X))
        
        except Exception as e:
            print('Error in :', e)

    # SAVING X, Y
    X = np.array(Made_X)
    Y = np.array(Made_Y)

    np.save('./Made_X/Made_X %s_%s' % (input_data_length, model_num), X)
    np.save('./Made_X/Made_Y %s_%s' % (input_data_length, model_num), Y)



2020-01-16 XLM ohlcv.xlsx 936
2020-01-23 TRX ohlcv.xlsx 1592
2020-01-31 AMO ohlcv.xlsx 2175
2020-01-16 SNT ohlcv.xlsx 2323
2020-01-11 FX ohlcv.xlsx 2572
2020-01-22 LUNA ohlcv.xlsx 3827
2020-01-16 LINK ohlcv.xlsx 4754
2020-01-13 FCT ohlcv.xlsx 4892
2020-01-20 IPX ohlcv.xlsx 6177
2020-01-20 SNT ohlcv.xlsx 6360
2020-01-29 TMTG ohlcv.xlsx 7205
2020-01-21 EOS ohlcv.xlsx 8298
2020-01-22 TRV ohlcv.xlsx 8420
2020-01-22 MTL ohlcv.xlsx 8728
2020-01-28 TRX ohlcv.xlsx 9335
2020-01-22 BTG ohlcv.xlsx 10275
2020-01-13 DAC ohlcv.xlsx 11478
2020-01-25 REP ohlcv.xlsx 11723
2020-01-11 APIS ohlcv.xlsx 11884
2020-01-26 XRP ohlcv.xlsx 12883
2020-01-28 LINK ohlcv.xlsx 13210
2020-01-16 ETH ohlcv.xlsx 14468
2020-01-12 BCH ohlcv.xlsx 15537
2020-01-11 ETH ohlcv.xlsx 16536
2020-01-26 TMTG ohlcv.xlsx 17647
2020-01-21 MXC ohlcv.xlsx 17791
2020-01-16 POWR ohlcv.xlsx 17891
2020-01-16 BCD ohlcv.xlsx 19002
2020-01-12 BCD ohlcv.xlsx 19639
2020-01-20 BSV ohlcv.xlsx 20880
2020-01-16 XMR ohlcv.xlsx 21168
2020-01-22 AE ohlc

2020-01-22 IPX ohlcv.xlsx 162904
2020-01-23 TMTG ohlcv.xlsx 163869
2020-01-20 STEEM ohlcv.xlsx 164900
2020-01-20 XLM ohlcv.xlsx 165589
2020-01-21 XSR ohlcv.xlsx 166249
2020-01-27 BCD ohlcv.xlsx 166473
2020-01-15 DAC ohlcv.xlsx 167556
2020-01-30 STEEM ohlcv.xlsx 167802
2020-01-11 XLM ohlcv.xlsx 168107
2020-01-23 SOC ohlcv.xlsx 169329
2020-01-13 HYC ohlcv.xlsx 169524
2020-01-28 LUNA ohlcv.xlsx 170691
2020-01-20 RDN ohlcv.xlsx 170899
2020-01-25 WPX ohlcv.xlsx 171650
2020-01-30 MBL ohlcv.xlsx 172861
2020-01-31 TRX ohlcv.xlsx 173620
2020-01-27 DAC ohlcv.xlsx 174857
2020-01-24 FCT ohlcv.xlsx 175427
2020-01-24 WTC ohlcv.xlsx 175532
2020-01-21 DAD ohlcv.xlsx 175669
2020-01-21 WAVES ohlcv.xlsx 175866
2020-01-12 XLM ohlcv.xlsx 176232
2020-01-25 FCT ohlcv.xlsx 176869
2020-01-15 ADA ohlcv.xlsx 177235
2020-01-15 XMR ohlcv.xlsx 177347
2020-01-11 SOC ohlcv.xlsx 178314
2020-01-12 STEEM ohlcv.xlsx 178440
2020-01-31 RDN ohlcv.xlsx 178548
2020-01-22 FAB ohlcv.xlsx 178952
2020-01-20 OMG ohlcv.xlsx 179059


2020-01-29 ETC ohlcv.xlsx 316566
2020-01-30 XRP ohlcv.xlsx 317768
2020-01-15 PLY ohlcv.xlsx 317969
2020-01-13 STEEM ohlcv.xlsx 318089
2020-01-22 MXC ohlcv.xlsx 318220
2020-01-14 MTL ohlcv.xlsx 318479
2020-01-27 BTC ohlcv.xlsx 319666
2020-01-27 LTC ohlcv.xlsx 319924
2020-01-30 ZEC ohlcv.xlsx 320224
2020-01-12 EOS ohlcv.xlsx 321359
2020-01-12 BSV ohlcv.xlsx 322514
2020-01-11 LINK ohlcv.xlsx 322931
2020-01-25 QTUM ohlcv.xlsx 323373
2020-01-24 WPX ohlcv.xlsx 324294
2020-01-11 BHP ohlcv.xlsx 324883
2020-01-19 BTC ohlcv.xlsx 326167
2020-01-24 ELF ohlcv.xlsx 326417
2020-01-19 ETH ohlcv.xlsx 327624
2020-01-15 AOA ohlcv.xlsx 327854
2020-01-14 BSV ohlcv.xlsx 328762
2020-01-25 STEEM ohlcv.xlsx 328872
2020-01-31 WPX ohlcv.xlsx 329489
2020-01-21 CON ohlcv.xlsx 330669
2020-01-29 DASH ohlcv.xlsx 330934
2020-01-22 ETH ohlcv.xlsx 331940
2020-01-16 MXC ohlcv.xlsx 332043
2020-01-16 BCH ohlcv.xlsx 333315
2020-01-20 STRAT ohlcv.xlsx 333749
2020-01-11 BSV ohlcv.xlsx 334441
2020-01-14 AOA ohlcv.xlsx 334630
2

2020-01-14 BCD ohlcv.xlsx 461960
2020-01-26 MTL ohlcv.xlsx 462115
2020-01-13 ABT ohlcv.xlsx 462387
2020-01-26 QTUM ohlcv.xlsx 462742
2020-01-14 ETC ohlcv.xlsx 462998
2020-01-25 INS ohlcv.xlsx 463265
2020-01-26 ADA ohlcv.xlsx 463477
2020-01-19 OMG ohlcv.xlsx 463843
2020-01-21 AOA ohlcv.xlsx 464120
2020-01-22 AOA ohlcv.xlsx 464358
2020-01-14 PLY ohlcv.xlsx 464598
2020-01-16 HYC ohlcv.xlsx 464853
2020-01-22 FNB ohlcv.xlsx 465106
2020-01-19 BCD ohlcv.xlsx 465804
2020-01-25 FNB ohlcv.xlsx 466230
2020-01-23 REP ohlcv.xlsx 466399
2020-01-11 IPX ohlcv.xlsx 467509
2020-01-20 BCD ohlcv.xlsx 468286
2020-01-21 REP ohlcv.xlsx 468657
2020-01-13 BCH ohlcv.xlsx 469542
2020-01-19 OCN ohlcv.xlsx 469668
2020-01-16 HC ohlcv.xlsx 470194
2020-01-14 SOC ohlcv.xlsx 470780
2020-01-16 NPXS ohlcv.xlsx 470962
2020-01-25 XSR ohlcv.xlsx 471650
2020-01-16 OMG ohlcv.xlsx 471975
2020-01-22 BCH ohlcv.xlsx 473030
2020-01-21 QTUM ohlcv.xlsx 473636
2020-01-15 XSR ohlcv.xlsx 474563
2020-01-22 LTC ohlcv.xlsx 475042
2020-01-