In [1]:
import numpy as np
import pandas as pd
import pybithumb
import os
import matplotlib.pyplot as plt
import warnings
from sklearn.preprocessing import MinMaxScaler
warnings.filterwarnings("ignore")
pd.set_option('display.max_rows', 500)

Scaler = MinMaxScaler()

dir = 'ohlcv/'
ohlcv_list = os.listdir(dir)


def min_max_scaler(price):
    Scaler = MinMaxScaler()
    Scaler.fit(price)

    return Scaler.transform(price)


def low_high(Coin, input_data_length, ip_limit=None, trade_limit=None):

    #   거래 제한은 고점과 저점을 분리한다.

    #   User-Agent Configuration
    #   IP - Change
    if ip_limit is None:
        ohlcv_excel = pybithumb.get_ohlcv(Coin, 'KRW', 'minute1')
    else:
        ohlcv_excel = pybithumb.get_ohlcv(Coin, 'KRW', 'minute1', 'proxyon')

    price_gap = ohlcv_excel.close.max() / ohlcv_excel.close.min()
    if (price_gap < 1.07) and (trade_limit is not None):
        return None, None

    obv = [0] * len(ohlcv_excel)
    for m in range(1, len(ohlcv_excel)):
        if ohlcv_excel['close'].iloc[m] > ohlcv_excel['close'].iloc[m - 1]:
            obv[m] = obv[m - 1] + ohlcv_excel['volume'].iloc[m]
        elif ohlcv_excel['close'].iloc[m] == ohlcv_excel['close'].iloc[m - 1]:
            obv[m] = obv[m - 1]
        else:
            obv[m] = obv[m - 1] - ohlcv_excel['volume'].iloc[m]
    ohlcv_excel['OBV'] = obv

    closeprice = ohlcv_excel['close'].iloc[-1]

    # ----------- dataX, dataY 추출하기 -----------#
    #   OBV :
    ohlcv_data = ohlcv_excel.values[1:].astype(np.float)

    # 결측 데이터 제외
    if len(ohlcv_data) != 0:

        #          데이터 전처리         #
        #   Fixed X_data    #
        price = ohlcv_data[:, :4]
        volume = ohlcv_data[:, [4]]
        OBV = ohlcv_data[:, [-1]]

        scaled_price = min_max_scaler(price)
        scaled_volume = min_max_scaler(volume)
        scaled_OBV = min_max_scaler(OBV)
        # print(scaled_MA60.shape)

        x = np.concatenate((scaled_price, scaled_volume, scaled_OBV), axis=1)  # axis=1, 세로로 합친다

        if (x[-1][1] > 0.3) and (trade_limit is not None):
            return None, None

        # print(x.shape)  # (258, 6)
        # quit()

        dataX = []  # input_data length 만큼 담을 dataX 그릇
        for i in range(input_data_length, len(ohlcv_data) + 1):  # 마지막 데이터까지 다 긇어모은다.
            group_x = x[i - input_data_length:i]
            dataX.append(group_x)  # dataX 리스트에 추가

        if (len(dataX) < 100) and (trade_limit is not None):
            return None, None

        X_test = np.array(dataX)
        row = X_test.shape[1]
        col = X_test.shape[2]

        X_test = X_test.astype('float32').reshape(-1, row, col, 1)

        return X_test, closeprice


def made_x(file, input_data_length, model_num, check_span, get_fig, crop_size):

    try:
        ohlcv_excel = pd.read_excel(dir + file, index_col=0)
    except Exception as e:
        print('Error in read_excel :', e)
        return None

    obv = [0] * len(ohlcv_excel)
    for m in range(1, len(ohlcv_excel)):
        if ohlcv_excel['close'].iloc[m] > ohlcv_excel['close'].iloc[m - 1]:
            obv[m] = obv[m - 1] + ohlcv_excel['volume'].iloc[m]
        elif ohlcv_excel['close'].iloc[m] == ohlcv_excel['close'].iloc[m - 1]:
            obv[m] = obv[m - 1]
        else:
            obv[m] = obv[m - 1] - ohlcv_excel['volume'].iloc[m]
    ohlcv_excel['OBV'] = obv

    #   이후 check_span 데이터와 현재 포인트를 비교해서 현재 포인트가 저가인지 고가인지 예측한다.
    #   진입, 저점, 고점, 거래 안함의 y_label 인 trade_state  >> [1, 2, 0]
    #   저점과 고점은 최대 3개의 중복 값을 허용한다.
    trade_state = [np.NaN] * len(ohlcv_excel)
    for i in range(len(ohlcv_excel) - check_span):
        #   저점
        if ohlcv_excel['close'][i + 1:i + 1 + check_span].min() >= ohlcv_excel['close'][i]:
            if ohlcv_excel['close'][i:i + 1 + check_span].value_counts().sort_index().iloc[0] <= 3:
                trade_state[i] = 1
            else:
                trade_state[i] = 0
        #   고점
        elif ohlcv_excel['close'][i + 1:i + 1 + check_span].max() <= ohlcv_excel['close'][i]:
            if ohlcv_excel['close'][i:i + 1 + check_span].value_counts().sort_index().iloc[-1] <= 3:
                trade_state[i] = 2
            else:
                trade_state[i] = 0
        #   거래 안함
        else:
            trade_state[i] = 0

    ohlcv_excel['trade_state'] = trade_state

    # ----------- dataX, dataY 추출하기 -----------#
# #     print(ohlcv_excel.info())
    # ohlcv_excel.to_excel('test.xlsx')
#     quit()

    # NaN 제외하고 데이터 자르기 (데이터가 PIXEL 로 들어간다고 생각하면 된다)
    #   OBV : -CHECK_SPAN
    ohlcv_data = ohlcv_excel.values[1:-check_span].astype(np.float)
    # print(pd.DataFrame(ohlcv_data).info())
    # print(pd.DataFrame(ohlcv_data).to_excel('test.xlsx'))
    # print(list(map(float, ohlcv_data[0])))
    # quit()

    # 결측 데이터 제외
    if len(ohlcv_data) != 0:

        #          데이터 전처리         #
#       #   Fixed X_data    #
        price = ohlcv_data[:, :4]
        volume = ohlcv_data[:, [4]]
        OBV = ohlcv_data[:, [-2]]

        #   Flexible Y_data    #
        trade_state = ohlcv_data[:, [-1]]

        scaled_price = min_max_scaler(price)
        scaled_volume = min_max_scaler(volume)
        scaled_OBV = min_max_scaler(OBV)

        x = np.concatenate((scaled_price, scaled_volume, scaled_OBV), axis=1)  # axis=1, 세로로 합친다
#         x = scaled_price  # axis=1, 세로로 합친다

        y = trade_state
        # print(x.shape, y_low.shape)  # (258, 6) (258, 1)
        # quit()

        dataX = []  # input_data length 만큼 담을 dataX 그릇
        dataY = []  # Target 을 담을 그릇

        for i in range(input_data_length, len(ohlcv_data)):
            # group_x >> 이전 완성된 데이터를 사용해보도록 한다. (진입하는 시점은 데이터가 완성되어있지 않으니까)
            group_x = x[i - input_data_length: i]  # group_y 보다 1개 이전 데이터
            group_y = y[i]
            # print(group_x.shape)  # (28, 6)
            # print(group_y.shape)  # (1,)
            # quit()
            # if i == len(y) - 1:
            #     # print(group_x, "->", group_y)
            #     print(group_x[-1])
            #     print(x[i - 1])
            #     quit()
            dataX.append(group_x)  # dataX 리스트에 추가
            dataY.append(group_y)

        #       Exstracting fiexd X_data       #
        sliced_ohlcv = ohlcv_data[input_data_length:, :5]

        #                      Get Figure                     #
        if get_fig == 1:
            spanlist_low = []
            spanlist_high = []

            for m in range(len(trade_state)):
                if (trade_state[m] > 0.5) and (trade_state[m] < 1.5):
                    if m + 1 < len(trade_state):
                        spanlist_low.append((m, m + 1))
                    else:
                        spanlist_low.append((m - 1, m))

            for m in range(len(trade_state)):
                if (trade_state[m] > 1.5) and (trade_state[m] < 2.5):
                    if m + 1 < len(trade_state):
                        spanlist_high.append((m, m + 1))
                    else:
                        spanlist_high.append((m - 1, m))

            # ----------- 인덱스 초기화 됨 -----------#

            # ----------- 공통된 Chart 그리기 -----------#

            plt.subplot(211)
            plt.plot(min_max_scaler(ohlcv_data[:, 1:2]), 'r', label='close')
            plt.plot(scaled_OBV, 'b', label='OBV')
            plt.legend(loc='upper right')
            for i in range(len(spanlist_low)):
                plt.axvspan(spanlist_low[i][0], spanlist_low[i][1], facecolor='m', alpha=0.5)

            plt.subplot(212)
            plt.plot(min_max_scaler(ohlcv_data[:, 1:2]), 'r', label='close')
            plt.plot(scaled_OBV, 'b', label='OBV')
            plt.legend(loc='upper right')
            for i in range(len(spanlist_high)):
                plt.axvspan(spanlist_high[i][0], spanlist_high[i][1], facecolor='c', alpha=0.5)

            Date = file.split()[0]
            Coin = file.split()[1].split('.')[0]
            plt.savefig('./Figure_data/%s_%s/%s %s.png' % (input_data_length, model_num, Date, Coin), dpi=500)
            plt.close()
            # plt.show()
            # ----------- Chart 그리기 -----------#

        return dataX, dataY, sliced_ohlcv


if __name__ == '__main__':

    # ----------- Params -----------#
    input_data_length = 54
    model_num = input('Press model number : ')

    #       Make folder      #
    try:
        os.mkdir('./Figure_data/%s_%s/' % (input_data_length, model_num))

    except Exception as e:
        pass
    check_span = 30
    get_fig = 0

    Made_X = []
    Made_Y = []

    for file in ohlcv_list:
        try:
            if int(file.split()[0].split('-')[1]) not in [1]:
                continue
    #         if file.split()[1] != 'DAD':
    #             continue

            # file = '2019-10-27 LAMB ohlcv.xlsx'

            result = made_x(file, input_data_length, model_num, check_span, get_fig, crop_size=500)
            # result = low_high('FX', input_data_length)
            # quit()

            # ------------ 데이터가 있으면 dataX, dataY 병합하기 ------------#
            if result is not None:

                Made_X += result[0]
                Made_Y += result[1]

                # 누적 데이터량 표시
                print(file, len(Made_X))
                if len(Made_X) > 200000:
                    break
                
        except Exception as e:
            print('Error in file :', e)

    # SAVING X, Y
    X = np.array(Made_X)
    Y = np.array(Made_Y)

    np.save('./Made_X/Made_X %s_%s' % (input_data_length, model_num), X)
    np.save('./Made_X/Made_Y %s_%s' % (input_data_length, model_num), Y)



Press model number : 21
2020-01-16 XLM ohlcv.xlsx 1005
2020-01-23 TRX ohlcv.xlsx 1730
2020-01-31 AMO ohlcv.xlsx 2382
2020-01-16 SNT ohlcv.xlsx 2599
2020-01-19 LBA ohlcv.xlsx 2677
2020-01-11 FX ohlcv.xlsx 2995
2020-01-22 LUNA ohlcv.xlsx 4319
2020-01-16 LINK ohlcv.xlsx 5315
2020-01-28 PLY ohlcv.xlsx 5389
2020-01-13 FCT ohlcv.xlsx 5596
2020-01-20 IPX ohlcv.xlsx 6950
2020-01-27 ITC ohlcv.xlsx 6987
2020-01-14 FZZ ohlcv.xlsx 6987
2020-01-24 BZNT ohlcv.xlsx 6997
2020-01-20 SNT ohlcv.xlsx 7249
2020-01-29 TMTG ohlcv.xlsx 8163
2020-01-25 VET ohlcv.xlsx 8203
2020-01-21 EOS ohlcv.xlsx 9365
2020-01-24 FAB ohlcv.xlsx 9532
2020-01-22 TRV ohlcv.xlsx 9723
2020-01-13 MBL ohlcv.xlsx 9723
2020-01-26 DVP ohlcv.xlsx 9723
2020-01-22 MTL ohlcv.xlsx 10100
2020-01-12 POWR ohlcv.xlsx 10100
2020-01-28 TRX ohlcv.xlsx 10776
2020-01-31 LAMB ohlcv.xlsx 10887
2020-01-12 MCO ohlcv.xlsx 10887
2020-01-20 CMT ohlcv.xlsx 10929
2020-01-22 BTG ohlcv.xlsx 11938
2020-01-13 DAC ohlcv.xlsx 13210
2020-01-13 INS ohlcv.xlsx 13210
2

2020-01-12 DVP ohlcv.xlsx 87489
2020-01-29 AOA ohlcv.xlsx 87821
2020-01-30 ELF ohlcv.xlsx 87836
2020-01-20 REP ohlcv.xlsx 88427
2020-01-31 MBL ohlcv.xlsx 89703
2020-01-23 BCH ohlcv.xlsx 90756
2020-01-27 KNC ohlcv.xlsx 90756
2020-01-28 NPXS ohlcv.xlsx 90833
2020-01-19 ZEC ohlcv.xlsx 91575
2020-01-20 GNT ohlcv.xlsx 91616
2020-01-26 BTC ohlcv.xlsx 92894
2020-01-29 WOM ohlcv.xlsx 92908
2020-01-21 CMT ohlcv.xlsx 92913
2020-01-23 IPX ohlcv.xlsx 94267
2020-01-21 CHR ohlcv.xlsx 94327
2020-01-26 WOM ohlcv.xlsx 94327
2020-01-23 XMR ohlcv.xlsx 94359
2020-01-25 VALOR ohlcv.xlsx 94498
2020-01-14 BCH ohlcv.xlsx 95277
2020-01-19 QTUM ohlcv.xlsx 96480
2020-01-26 XVG ohlcv.xlsx 96480
2020-01-12 BAT ohlcv.xlsx 96532
2020-01-19 BSV ohlcv.xlsx 97864
2020-01-30 NPXS ohlcv.xlsx 98041
2020-01-12 ENJ ohlcv.xlsx 98230
2020-01-24 BTG ohlcv.xlsx 99019
2020-01-30 LRC ohlcv.xlsx 99019
2020-01-20 ZRX ohlcv.xlsx 99054
2020-01-25 PIVX ohlcv.xlsx 99213
2020-01-23 BSV ohlcv.xlsx 100465
2020-01-25 NPXS ohlcv.xlsx 100477

2020-01-15 WTC ohlcv.xlsx 175409
2020-01-31 LOOM ohlcv.xlsx 175409
2020-01-29 BTG ohlcv.xlsx 176599
2020-01-20 HC ohlcv.xlsx 176749
2020-01-29 BSV ohlcv.xlsx 178066
2020-01-15 IOST ohlcv.xlsx 178236
2020-01-16 QTUM ohlcv.xlsx 179396
2020-01-29 CMT ohlcv.xlsx 179396
2020-01-25 FZZ ohlcv.xlsx 179396
2020-01-21 STEEM ohlcv.xlsx 179770
2020-01-15 ENJ ohlcv.xlsx 180076
2020-01-24 IPX ohlcv.xlsx 181430
2020-01-14 HC ohlcv.xlsx 181487
2020-01-22 PIVX ohlcv.xlsx 182227
2020-01-14 ICX ohlcv.xlsx 182227
2020-01-26 DAC ohlcv.xlsx 183550
2020-01-11 ITC ohlcv.xlsx 183550
2020-01-14 TMTG ohlcv.xlsx 184651
2020-01-16 EOS ohlcv.xlsx 185980
2020-01-11 ENJ ohlcv.xlsx 186143
2020-01-19 MCO ohlcv.xlsx 186208
2020-01-30 AOA ohlcv.xlsx 186531
2020-01-11 XEM ohlcv.xlsx 186531
2020-01-29 PAY ohlcv.xlsx 186580
2020-01-28 ADA ohlcv.xlsx 186987
2020-01-11 ADA ohlcv.xlsx 187274
2020-01-25 BTC ohlcv.xlsx 188617
2020-01-14 XRP ohlcv.xlsx 189768
2020-01-22 INS ohlcv.xlsx 189932
2020-01-19 IPX ohlcv.xlsx 191286
2020-