In [9]:
import numpy as np
import pandas as pd
import pybithumb
import os
import matplotlib.pyplot as plt
import warnings
from sklearn.preprocessing import MinMaxScaler
warnings.filterwarnings("ignore")
pd.set_option('display.max_rows', 500)

dir = 'ohlcv/'
ohlcv_list = os.listdir(dir)


def min_max_scaler(price):
    Scaler = MinMaxScaler()
    Scaler.fit(price)

    return Scaler.transform(price)


def low_high(Coin, input_data_length):

    #   Proxy 설정 해주기
    ohlcv_excel = pybithumb.get_ohlcv(Coin, 'KRW', 'minute1')

    closeprice = ohlcv_excel['close'].iloc[-1]
    ohlcv_excel['MA60'] = ohlcv_excel['close'].rolling(60).mean()

    # ----------- dataX, dataY 추출하기 -----------#
    ohlcv_data = ohlcv_excel.values[ohlcv_excel['MA60'].isnull().sum():].astype(np.float)

    # 결측 데이터 제외
    if len(ohlcv_data) != 0:

        #          데이터 전처리         #
        #   Fixed X_data    #
        price = ohlcv_data[:, :4]
        volume = ohlcv_data[:, [4]]
        MA60 = ohlcv_data[:, [-1]]

        scaled_price = min_max_scaler(price)
        scaled_volume = min_max_scaler(volume)
        scaled_MA60 = min_max_scaler(MA60)
        # print(scaled_MA60.shape)

        x = np.concatenate((scaled_price, scaled_volume, scaled_MA60), axis=1)  # axis=1, 세로로 합친다
        # print(x.shape, y.shape)  # (258, 6) (258, 1)
        # quit()

        dataX = []  # input_data length 만큼 담을 dataX 그릇
        for i in range(input_data_length, len(ohlcv_data) + 1):  # 마지막 데이터까지 다 긇어모은다.
            group_x = x[i - input_data_length:i]
            dataX.append(group_x)  # dataX 리스트에 추가

        if len(dataX) < 100:
            return None, None

        X_test = np.array(dataX)
        row = X_test.shape[1]
        col = X_test.shape[2]

        X_test = X_test.astype('float32').reshape(-1, row, col, 1)

        return X_test, closeprice


def made_x(file, input_data_length, model_num, check_span, get_fig):

    ohlcv_excel = pd.read_excel(dir + file, index_col=0)
    
    obv = [0] * len(ohlcv_excel.index)
    for m in range(1, len(ohlcv_excel.index)):
        if ohlcv_excel['close'].iloc[m] > ohlcv_excel['close'].iloc[m - 1]:
            obv[m] = obv[m - 1] + ohlcv_excel['volume'].iloc[m]
        elif ohlcv_excel['close'].iloc[m] == ohlcv_excel['close'].iloc[m - 1]:
            obv[m] = obv[m - 1]
        else:
            obv[m] = obv[m - 1] - ohlcv_excel['volume'].iloc[m]
    ohlcv_excel['OBV'] = obv
    
    ohlcv_excel['MA60'] = ohlcv_excel['close'].rolling(60).mean()
    #   이전 & 이후 check_span 데이터와 현재 포인트를 비교해서 현재 포인트가 저가인지 고가인지 예측한다.
    #   최대 3개의 중복 값을 허용한다.

    #   고저점을 잡아주는 함수 구현
    list_low_check = [np.NaN] * len(ohlcv_excel)
    list_high_check = [np.NaN] * len(ohlcv_excel)
    for i in range(len(ohlcv_excel) - check_span):
        if ohlcv_excel['close'][i + 1:i + 1 + check_span].min() >= ohlcv_excel['close'][i]:
            if ohlcv_excel['close'][i:i + 1 + check_span].value_counts().sort_index().iloc[0] <= 3:
                list_low_check[i] = 1
            else:
                list_low_check[i] = 0
        else:
            list_low_check[i] = 0
    
        if ohlcv_excel['close'][i + 1:i + 1 + check_span].max() <= ohlcv_excel['close'][i]:
            if ohlcv_excel['close'][i:i + 1 + check_span].value_counts().sort_index().iloc[-1] <= 3:
                list_high_check[i] = 1
            else:
                list_high_check[i] = 0
        else:
            list_high_check[i] = 0

    ohlcv_excel['low_check'] = list_low_check
    ohlcv_excel['high_check'] = list_high_check

    # ----------- dataX, dataY 추출하기 -----------#
    # print(ohlcv_excel)
    # ohlcv_excel.to_excel('test.xlsx')
    # return

    # NaN 제외하고 데이터 자르기 (데이터가 PIXEL 로 들어간다고 생각하면 된다)
    # MA60 부터 FLUC_CLOSE, 존재하는 값만 슬라이싱
    if check_span < 60:
        ohlcv_data = ohlcv_excel.values[ohlcv_excel['MA60'].isnull().sum(): -check_span].astype(np.float)
    else:
        ohlcv_data = ohlcv_excel.values[check_span: -check_span].astype(np.float)
    # print(pd.DataFrame(ohlcv_data).info())
    # print(pd.DataFrame(ohlcv_data).to_excel('test.xlsx'))
    # print(list(map(float, ohlcv_data[0])))
    # quit()

    # 결측 데이터 제외
    if len(ohlcv_data) != 0:

        #          데이터 전처리         #
        #   Fixed X_data    #
        price = ohlcv_data[:, :4]
        volume = ohlcv_data[:, [4]]
        OBV = ohlcv_data[:, [-4]]
        MA60 = ohlcv_data[:, [-3]]

        #   Flexible Y_data    #
        low_check = ohlcv_data[:, [-2]]
        high_check = ohlcv_data[:, [-1]]

        scaled_price = min_max_scaler(price)
        scaled_volume = min_max_scaler(volume)
        scaled_OBV = min_max_scaler(OBV)
        scaled_MA60 = min_max_scaler(MA60)
        # print(scaled_MA60.shape)

        x = np.concatenate((scaled_price, scaled_volume, scaled_OBV, scaled_MA60), axis=1)
        y_low = low_check
        y_high = high_check
        # print(x.shape, y.shape)  # (258, 6) (258, 1)
        # quit()

        dataX = []  # input_data length 만큼 담을 dataX 그릇
        dataY_low = []  # Target 을 담을 그릇
        dataY_high = []  # Target 을 담을 그릇

        for i in range(input_data_length, len(ohlcv_data)):
            # group_x >> 이전 완성된 데이터를 사용해보도록 한다. (진입하는 시점은 데이터가 완성되어있지 않으니까)
            group_x = x[i - input_data_length: i]  # group_y 보다 1개 이전 데이터
            group_y_low = y_low[i]
            group_y_high = y_high[i]
            # print(group_x.shape)  # (28, 6)
            # print(group_y.shape)  # (1,)
            # quit()
            # if i == len(y) - 1:
            #     # print(group_x, "->", group_y)
            #     print(group_x[-1])
            #     print(x[i - 1])
            #     quit()
            dataX.append(group_x)  # dataX 리스트에 추가
            dataY_low.append(group_y_low)  # dataY 리스트에 추가
            dataY_high.append(group_y_high)  # dataY 리스트에 추가

        if len(dataX) < 100:
            return None

        #       Exstracting fiexd X_data       #
        sliced_ohlcv = ohlcv_data[input_data_length:, :-2]

        # ----------- FLUC_CLOSE TO SPAN, 넘겨주기 위해서 INDEX 를 담아주어야 한다. -----------#
        if get_fig == 1:
            spanlist_low = []
            spanlist_high = []

            for m in range(len(low_check)):
                if low_check[m] > 0.5:
                    if m + 1 < len(low_check):
                        spanlist_low.append((m, m + 1))
                    else:
                        spanlist_low.append((m - 1, m))

            for m in range(len(high_check)):
                if high_check[m] > 0.5:
                    if m + 1 < len(high_check):
                        spanlist_high.append((m, m + 1))
                    else:
                        spanlist_high.append((m - 1, m))

            # ----------- 인덱스 초기화 됨 -----------#

            # ----------- 공통된 Chart 그리기 -----------#

            plt.subplot(211)
            plt.plot(min_max_scaler(ohlcv_data[:, 1:2]), 'r', label='close')
            plt.plot(scaled_MA60, 'b', label='MA60')
            plt.legend(loc='upper right')
            for i in range(len(spanlist_low)):
                plt.axvspan(spanlist_low[i][0], spanlist_low[i][1], facecolor='m', alpha=0.5)

            plt.subplot(212)
            plt.plot(min_max_scaler(ohlcv_data[:, 1:2]), 'r', label='close')
            plt.plot(scaled_MA60, 'b', label='MA60')
            plt.legend(loc='upper right')
            for i in range(len(spanlist_high)):
                plt.axvspan(spanlist_high[i][0], spanlist_high[i][1], facecolor='c', alpha=0.5)

            Date = file.split()[0]
            Coin = file.split()[1].split('.')[0]
            plt.savefig('Figure_data/%s_%s/%s %s.png' % (input_data_length, model_num, Date, Coin), dpi=500)
            plt.close()
            # plt.show()
            # ----------- Chart 그리기 -----------#

        return dataX, dataY_low, dataY_high, sliced_ohlcv


if __name__ == '__main__':

    # ----------- Params -----------#
    input_data_length = 54
    check_span = 30
    get_fig = 0

    model_num = input('Press model number : ')

    Made_X = []
    Made_Y = []
    Made_Y_low = []
    Made_Y_high = []

    #    Make folder for figure
    try:
        os.mkdir('Figure_data/%s_%s' % (input_data_length, model_num))
        
    except Exception as e:
        pass
    
    for file in ohlcv_list:

        if int(file.split()[0].split('-')[1]) == 1:
            continue

        result = made_x(file, input_data_length, model_num, check_span, get_fig)

        # ------------ 데이터가 있으면 dataX, dataY 병합하기 ------------#
        if result is not None:

            Made_X += result[0]
            Made_Y_low += result[1]
            Made_Y_high += result[2]

            # 누적 데이터량 표시
            print(file, len(Made_X))

    # SAVING X, Y
    X = np.array(Made_X)
    Y_low = np.array(Made_Y_low)
    Y_high = np.array(Made_Y_high)

    np.save('./Made_X/Made_X %s_%s' % (input_data_length, model_num), X)
    np.save('./Made_X_low/Made_Y %s_%s' % (input_data_length, model_num), Y_low)
    np.save('./Made_X_high/Made_Y %s_%s' % (input_data_length, model_num), Y_high)



Press model number : 9
2019-10-25 FAB ohlcv.xlsx 234
2019-11-02 PAY ohlcv.xlsx 375
2019-11-13 DVP ohlcv.xlsx 508
2019-11-02 IOST ohlcv.xlsx 672
2019-10-28 FNB ohlcv.xlsx 1642
2019-11-14 QTUM ohlcv.xlsx 2200
2019-11-20 LBA ohlcv.xlsx 2751
2019-10-30 LTC ohlcv.xlsx 3465
2019-10-31 FX ohlcv.xlsx 3823
2019-10-16 STRAT ohlcv.xlsx 4068
2019-10-26 WTC ohlcv.xlsx 4350
2019-10-31 ICX ohlcv.xlsx 4464
2019-10-10 CHR ohlcv.xlsx 4651
2019-11-15 CTXC ohlcv.xlsx 5702
2019-11-02 CMT ohlcv.xlsx 6081
2019-10-26 PLY ohlcv.xlsx 7014
2019-10-26 BCH ohlcv.xlsx 8252
2019-10-22 BSV ohlcv.xlsx 9237
2019-10-25 GXC ohlcv.xlsx 9415
2019-11-02 LOOM ohlcv.xlsx 9560
2019-11-20 CTXC ohlcv.xlsx 10230
2019-10-31 BHP ohlcv.xlsx 10902
2019-10-26 LTC ohlcv.xlsx 11743
2019-11-24 VET ohlcv.xlsx 12045
2019-10-20 ZRX ohlcv.xlsx 12517
2019-11-15 ITC ohlcv.xlsx 13239
2019-10-29 LBA ohlcv.xlsx 13717
2019-10-21 BTC ohlcv.xlsx 15010
2019-10-31 BTC ohlcv.xlsx 16306
2019-11-13 PST ohlcv.xlsx 16487
2019-10-26 QTUM ohlcv.xlsx 17245
20

2019-10-24 FNB ohlcv.xlsx 115677
2019-11-21 CON ohlcv.xlsx 116723
2019-10-26 LINK ohlcv.xlsx 117455
2019-11-20 ZIL ohlcv.xlsx 118081
2019-11-14 OGO ohlcv.xlsx 118273
2019-10-30 OCN ohlcv.xlsx 118661
2019-10-21 BSV ohlcv.xlsx 119420
2019-10-16 DAC ohlcv.xlsx 120161
2019-10-26 WET ohlcv.xlsx 120375
2019-10-25 LINK ohlcv.xlsx 121023
2019-11-01 POWR ohlcv.xlsx 121132
2019-10-29 HDAC ohlcv.xlsx 121403
2019-10-25 MTL ohlcv.xlsx 121889
2019-11-01 BCD ohlcv.xlsx 121989
2019-10-31 TRUE ohlcv.xlsx 122094
2019-11-02 DAC ohlcv.xlsx 122908
2019-11-01 DAC ohlcv.xlsx 123857
2019-10-26 ADA ohlcv.xlsx 124702
2019-10-24 BSV ohlcv.xlsx 125423
2019-10-13 PPT ohlcv.xlsx 125986
2019-11-13 LBA ohlcv.xlsx 126099
2019-10-26 XMR ohlcv.xlsx 126238
2019-10-22 LAMB ohlcv.xlsx 127131
2019-10-27 FZZ ohlcv.xlsx 127793
2019-10-11 WTC ohlcv.xlsx 127998
2019-10-26 KNC ohlcv.xlsx 128242
2019-10-27 TRX ohlcv.xlsx 128862
2019-11-18 ADA ohlcv.xlsx 129185
2019-10-29 ORBS ohlcv.xlsx 129437
2019-11-19 FCT ohlcv.xlsx 129820
201

2019-10-16 BTG ohlcv.xlsx 228057
2019-11-02 PIVX ohlcv.xlsx 228196
2019-10-31 DAD ohlcv.xlsx 228753
2019-10-26 POLY ohlcv.xlsx 228869
2019-10-26 EOS ohlcv.xlsx 230114
2019-10-10 ZRX ohlcv.xlsx 230565
2019-10-18 FAB ohlcv.xlsx 230786
2019-11-21 CTXC ohlcv.xlsx 231684
2019-11-15 LAMB ohlcv.xlsx 232151
2019-10-21 LAMB ohlcv.xlsx 232687
2019-11-01 SNT ohlcv.xlsx 232820
2019-10-21 ZRX ohlcv.xlsx 233367
2019-10-30 XVG ohlcv.xlsx 233543
2019-10-28 XRP ohlcv.xlsx 234839
2019-10-29 WAVES ohlcv.xlsx 235291
2019-11-02 FNB ohlcv.xlsx 236088
2019-10-19 BCH ohlcv.xlsx 236738
2019-10-30 ORBS ohlcv.xlsx 236869
2019-10-25 BSV ohlcv.xlsx 237847
2019-11-01 IOST ohlcv.xlsx 238167
2019-10-10 TRUE ohlcv.xlsx 238595
2019-10-11 TMTG ohlcv.xlsx 239046
2019-11-13 FZZ ohlcv.xlsx 240181
2019-10-31 BCH ohlcv.xlsx 241361
2019-10-31 ZRX ohlcv.xlsx 241559
2019-10-14 TMTG ohlcv.xlsx 241927
2019-10-18 QTUM ohlcv.xlsx 242312
2019-11-02 SNT ohlcv.xlsx 242431
2019-10-26 FNB ohlcv.xlsx 243220
2019-10-30 CTXC ohlcv.xlsx 243