In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import pyupbit
from sklearn.preprocessing import MinMaxScaler

import os
from glob import glob
from tqdm import tqdm

import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split



class Data_preprocess():
    def __init__(self, ticker, interval, to, count):
        self.data, self.label, self.dataset = self.preprocess(
            pyupbit.get_ohlcv(ticker=ticker, interval=interval, to=to, count=count))

    def MinMax(self, dataset_df):
        norm = MinMaxScaler()
        norm_dataset = norm.fit_transform(dataset_df)
        return pd.DataFrame(norm_dataset, columns=list(dataset_df.columns))

    def add_after10(self, dataset_df):
        after10 = np.zeros_like(self.norm_dataset['close'])
        for i in range(len(dataset_df['close']) - 1):
            after10[i] = dataset_df['close'][i + 1]
        return after10

    def drop_feature(self, dataset_df):
        # index(시간) 제거
        dataset_df = dataset_df.reset_index(drop=True)
        # value 제거
        dataset_df = dataset_df.drop(columns=['value'])
        return dataset_df

    def add_avgPrice(self, dataset_df):
        return (dataset_df['high'] + dataset_df['low'] +
                dataset_df['open'] + dataset_df['close']) // 4

    def preprocess(self, dataset, latest=False):

        # drop feature
        dataset_df = self.drop_feature(dataset)

        # avg_price 추가
        dataset_df['avg_price'] = self.add_avgPrice(dataset_df)

        if latest == True:
            # 가장 예전 데이터 삭제 - norm이랑 original 둘 다 적용
            self.dataset = self.dataset.drop([self.dataset.index[0]]).drop(columns=['after10'])
            self.norm_dataset = self.norm_dataset.drop([self.norm_dataset.index[0]])

            # ori dataset에 추가
            self.dataset = pd.concat([self.dataset, dataset_df])
            self.dataset = self.dataset.reset_index(drop=True)

            # min max 정규화 (MinMaxScaler) 적용
            self.norm_dataset = self.MinMax(self.dataset)

            # after10 추가
            self.dataset['after10'] = self.add_after10(self.dataset)


        else:
            # min max 정규화 (MinMaxScaler) 적용
            self.norm_dataset = self.MinMax(dataset_df)

            # after10 추가
            dataset_df['after10'] = self.add_after10(dataset_df)

        # 예측될 값(label)인 10분 후 가격
        self.norm_dataset['after10'] = self.add_after10(self.norm_dataset)

        return self.norm_dataset.drop(columns=['after10']), self.norm_dataset['after10'], dataset_df

    # dataset에 window 적용
    def windowed_dataset(self, data, label, window_size, batch_size):
        sliced_data = tf.data.Dataset.from_tensor_slices(data)
        sliced_data = sliced_data.window(window_size, shift=1, stride=1, drop_remainder=True)
        sliced_data = sliced_data.flat_map(lambda x: x.batch(window_size))

        sliced_label = tf.data.Dataset.from_tensor_slices(label[window_size:])

        sliced_dataset = tf.data.Dataset.zip((sliced_data, sliced_label))

        return sliced_dataset.batch(batch_size).prefetch(1)





In [2]:
import os

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, LSTM, Conv1D, Lambda, Input, GlobalAveragePooling1D
from tensorflow.keras.losses import Huber
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint


class Custom_Model():
    def __init__(self, input_shape, args):
        self.args = args
        self.model = self.build_model(input_shape)


    def build_model(self, input_shape: tuple):
        input = Input(shape=input_shape)
        x = LSTM(128, return_sequences=True, activation='tanh', dropout=0.2)(input)
        x = LSTM(64, return_sequences=True, activation='tanh', dropout=0.2)(x)
        x = LSTM(32, return_sequences=True, activation='tanh', dropout=0.2)(x)
        x = GlobalAveragePooling1D()(x)
        output = Dense(1)(x)
        return Model(input, output)


    def compile_model(self, model):
        loss = Huber()
        optimizer = Adam(lr=self.args.lr)

        model.compile(loss=loss, optimizer=optimizer, metrics=['mse'])
        return model


    def callback(self):
        saveCheckpoint = ModelCheckpoint(os.path.join(self.args.save_path, 'checkpoint.ckpt'),
                                    save_weights_only=False,
                                    save_best_only=True,
                                    monitor='val_loss',
                                    verbose=1)
        earlyStopping = EarlyStopping(monitor='val_loss',
                                      patience=self.args.early_stop)
        return [saveCheckpoint, earlyStopping]


    def load_model(self, weight):
        self.model.load_weights(weight)
        return self.model


In [None]:

WINDOW_SIZE = 6
FEATURES = 6

init_model = models.Custom_Model((WINDOW_SIZE,FEATURES), args)
model = init_model.model
model = init_model.compile_model(model)

callbacks = init_model.callback()

ticker = 'KRW-BTC'
interval = 'minute10'
to = f'2021-11-10 00:10'
count = 1000

processed_data =  dataset.Data_preprocess(ticker, interval, to, count)

train_data, train_label, val_data, val_label = train_test_split(
    processed_data.data,
    processed_data.label,
    test_size=0.1,
    random_state=0,
    shuffle=False)

train_dataset = processed_data.windowed_dataset(train_data, train_label ,WINDOW_SIZE, FEATURES)
validation_dataset = processed_data.windowed_dataset(val_data, val_label ,WINDOW_SIZE, FEATURES)


train(model, train_dataset, validation_dataset, callbacks)