# Model Define

In [41]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, LSTM, Conv1D, Lambda, Input, GlobalAveragePooling1D
from tensorflow.keras.losses import Huber
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

In [42]:
class Custom_Model() :
    def __init__(self, weight, input_shape) :
        self.checkpoint = weight
        self.model = self.build_model(input_shape)
        
    def build_model(self, input_shape : tuple):
        input = Input(shape=input_shape)
        x = LSTM(128, return_sequences=True, activation='tanh', dropout=0.2)(input)
        x = LSTM(64, return_sequences=True, activation='tanh', dropout=0.2)(x)
        x = LSTM(32, return_sequences=True, activation='tanh', dropout=0.2)(x)
        x = GlobalAveragePooling1D()(x)
        output = Dense(1)(x)
        return Model(input, output)
        
    
    def load_model(self) :
        self.model.load_weights(self.checkpoint)
        return self.model


In [43]:
window_size = 6
n_feature = 6

weight = './checkpoints/ckeckpointer.ckpt'
input_shape = (window_size, n_feature)

model = Custom_Model(weight, input_shape)
lstm_model = model.load_model()
lstm_model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 6, 6)]            0         
_________________________________________________________________
lstm (LSTM)                  (None, 6, 128)            69120     
_________________________________________________________________
lstm_1 (LSTM)                (None, 6, 64)             49408     
_________________________________________________________________
lstm_2 (LSTM)                (None, 6, 32)             12416     
_________________________________________________________________
global_average_pooling1d (Gl (None, 32)                0         
_________________________________________________________________
dense (Dense)                (None, 1)                 33        
Total params: 130,977
Trainable params: 130,977
Non-trainable params: 0
_______________________________________________________

# Inference Data Preprocess

In [44]:
import pandas as pd
import os
from glob import glob
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import pyupbit

In [45]:
class Data_preprocess() :
    def __init__(self, ticker, interval, to, count) :
        self.data, self.label, self.dataset = self.preprocess(pyupbit.get_ohlcv(ticker=ticker, interval=interval, to=to, count=count))
    
    def MinMax(self, dataset_df) :
        norm = MinMaxScaler()
        norm_dataset = norm.fit_transform(dataset_df)
        return pd.DataFrame(norm_dataset, columns=list(dataset_df.columns))
    
    
    def add_after10(self, dataset_df) :
        after10 = np.zeros_like(self.norm_dataset['close'])
        for i in range(len(dataset_df['close']) - 1) :
            after10[i] = dataset_df['close'][i + 1]
        return after10
    
    
    def drop_feature(self, dataset_df) :
        # index(시간) 제거
        dataset_df = dataset_df.reset_index(drop=True)
        # value 제거
        dataset_df = dataset_df.drop(columns=['value'])
        return dataset_df
    
    
    def add_avgPrice(self, dataset_df) :
        return (dataset_df['high'] + dataset_df['low'] + 
                dataset_df['open'] + dataset_df['close']) // 4
       
    
    def preprocess(self, dataset) :
        
        # drop feature
        dataset_df = self.drop_feature(dataset)
        
        # avg_price 추가
        dataset_df['avg_price'] = self.add_avgPrice(dataset_df)
        
        # min max 정규화 (MinMaxScaler) 적용
        self.norm_dataset = self.MinMax(dataset_df)
        
        # 예측될 값(label)인 10분 후 가격
        self.norm_dataset['after10'] = self.add_after10(self.norm_dataset)
        dataset_df['after10'] = self.add_after10(dataset_df)
        
        return self.norm_dataset.drop(columns=['after10']), self.norm_dataset['after10'], dataset_df
        
        
    def add_latest_data(self, latest_data) :
        # 가장 예전 데이터 삭제 - norm이랑 original 둘 다 적용
        self.dataset = self.dataset.drop([self.dataset.index[0]])
        self.norm_dataset = self.norm_dataset.drop([self.norm_dataset.index[0]])

        latest_data = self.drop_feature(latest_data)
        latest_data['avg_price'] = self.add_avgPrice(latest_data)
        
        # latest data가 들어오면 original dataframe에도 추가하고
        # norm_dataset에도 추가해줘야힘.
        # 여기서 그 과정을 만들어줘야함.
        norm_latest_data = self.MinMax(latest_data)
        
        latest_data['after10'] = self.add_after10(latest_data)
        self.dataset = pd.concat([self.dataset, latest_data])
        
            
    
        
    # dataset에 window 적용
    def windowed_dataset(self, window_size, batch_size) :
        sliced_data = tf.data.Dataset.from_tensor_slices(self.data)
        sliced_data = sliced_data.window(window_size, shift=1, stride=1, drop_remainder=True)
        sliced_data = sliced_data.flat_map(lambda x : x.batch(window_size))
        
        sliced_label = tf.data.Dataset.from_tensor_slices(self.label[window_size:])
        
        sliced_dataset = tf.data.Dataset.zip((sliced_data, sliced_label))
        
        return sliced_dataset.batch(batch_size).prefetch(1)
    

        
        

In [47]:
ticker = 'KRW-BTC'
interval ='minute10'
to = f'2021-11-10 00:10'
count = 1000

window_size = 6
batch_size = 1

processed_data =  Data_preprocess(ticker, interval, to, count)
dataset = processed_data.windowed_dataset(window_size, batch_size)


for data in dataset.take(1):
    print("Data ==> ")
    print(data[0])
    
    print("\nLabel ==> ")
    print(data[1])
    
print(processed_data.dataset)
print(processed_data.norm_dataset)

Data == 
tf.Tensor(
[[[0.22419028 0.21817058 0.23657237 0.203964   0.12238874 0.21419267]
  [0.2041498  0.20220437 0.23677737 0.20993023 0.07456903 0.20658573]
  [0.21012146 0.2092089  0.24446494 0.21266053 0.07799429 0.21255145]
  [0.21295547 0.20879687 0.24395244 0.21963798 0.0829506  0.21484395]
  [0.21973684 0.20488257 0.22539975 0.19445849 0.06683506 0.20439744]
  [0.19463563 0.1894314  0.20110701 0.17382951 0.11533618 0.18254051]]], shape=(1, 6, 6), dtype=float64)

Label == 
tf.Tensor([0.16988573], shape=(1,), dtype=float64)
           open        high         low       close      volume   avg_price  \
0    75025000.0  75110000.0  74808000.0  74827000.0   42.323616  74942500.0   
1    74827000.0  74955000.0  74810000.0  74886000.0   26.774901  74869500.0   
2    74886000.0  75023000.0  74885000.0  74913000.0   27.888634  74926750.0   
3    74914000.0  75019000.0  74880000.0  74982000.0   29.500190  74948750.0   
4    74981000.0  74981000.0  74699000.0  74733000.0   24.260177  748

In [48]:
pred = lstm_model.predict(dataset)
actual = np.asarray(processed_data.label)[6:]

pred = pred[:, 0]

print(pred.shape)
print(actual.shape)

(994,)
(994,)


# Data Parsing every 10 min
# Delete First Line and add latest data at last line  

### LIKE A QUEUE

In [62]:
# 현재 시간 문자열로 가져오기
import datetime
now = datetime.datetime.now()
str_now = now.strftime('%Y-%m-%d %H:%M')
print(type(str_now))
print(str_now)

<class 'str'>
2021-11-11 23:26


In [32]:
# 다음 10분 찾기
import datetime
current_t = datetime.datetime.now()
remainder_min = 10 - current_t.minute % 10

print(remainder_min)
print("현재 시간 : ", current_t)

coming_10m = current_t + datetime.timedelta(minutes=int(remainder_min))
print("다음 10분 : ",coming_10m)

7
현재 시간 :  2021-11-12 10:03:28.042872
다음 10분 :  2021-11-12 10:10:28.042872


In [51]:
import pyupbit

ticker = 'KRW-BTC'
interval ='minute10'
to = coming_10m
count = 1

latest_data = pyupbit.get_ohlcv(ticker=ticker, interval=interval, to=to, count=count)
_latest_data = pyupbit.get_ohlcv(ticker=ticker, interval=interval, to=to, count=count)

pd.concat([latest_data, _latest_data])

Unnamed: 0,open,high,low,close,volume,value
2021-11-12 10:10:00,78640000.0,78640000.0,78239000.0,78315000.0,153.887487,12077920000.0
2021-11-12 10:10:00,78640000.0,78640000.0,78239000.0,78315000.0,153.887487,12077920000.0
