In [84]:
import pandas as pd
import numpy as np
import os

import tensorflow as tf
from tensorflow.keras import layers, losses, models, optimizers

import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from numpy.lib.stride_tricks import sliding_window_view
from sklearn.preprocessing import MinMaxScaler


def data_scaling(data):             #스케일러, main
    scaler = MinMaxScaler()
    data = scaler.fit_transform(data)

    return data


def data_sliding(data, n_features, Time_window): #데이터 밀기, main
    reshape_num = data.shape[0] - Time_window + 1
    data = sliding_window_view(data, (Time_window, n_features))
    data = data.reshape(reshape_num,Time_window, n_features,1)

    return data


def data_load_processing(FILENAME, Time_window=None): #데이터 로딩 및 스케일러 사용, main
    df = pd.read_csv(FILENAME)
    display(df.head(10))
    df = df.drop(['Time'], axis=1)
    n_features = df.shape[1]
    print(df.columns)
    
    #x_validation, x_test = train_test_split(x_test, test_size=0.5, shuffle=False)

    df = data_scaling(df)
    
    #x_validation = data_scaling(x_validation)

    df = data_sliding(df, n_features, Time_window)
    
    #x_validation = data_sliding(x_validation, n_features, Time_window)

    return df #, x_validation


class Sampling(layers.Layer):
    """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit."""

    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

class VAE(tf.keras.Model):
    def __init__(self, encoder, decoder, **kwargs):
        super(VAE, self).__init__(**kwargs)
        self.encoder = encoder
        self.decoder = decoder
        self.total_loss_tracker = tf.keras.metrics.Mean(name="total_loss")
        self.reconstruction_loss_tracker = tf.keras.metrics.Mean(
            name="reconstruction_loss"
        )
        self.kl_loss_tracker = tf.keras.metrics.Mean(name="kl_loss")

    @property
    def metrics(self):
        return [
            self.total_loss_tracker,
            self.reconstruction_loss_tracker,
            self.kl_loss_tracker,
        ]

    def train_step(self, data):
        with tf.GradientTape() as tape:
            z_mean, z_log_var, z = self.encoder(data)
            reconstruction = self.decoder(z)
            reconstruction_loss = tf.reduce_mean(
                tf.reduce_sum(
                    tf.keras.losses.binary_crossentropy(data, reconstruction), axis=(1, 2)
                )
            )
            kl_loss = -0.5 * (1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var))
            kl_loss = tf.reduce_mean(tf.reduce_sum(kl_loss, axis=1))
            total_loss = reconstruction_loss + kl_loss
        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))
        self.total_loss_tracker.update_state(total_loss)
        self.reconstruction_loss_tracker.update_state(reconstruction_loss)
        self.kl_loss_tracker.update_state(kl_loss)
        return {
            "loss": self.total_loss_tracker.result(),
            "reconstruction_loss": self.reconstruction_loss_tracker.result(),
            "kl_loss": self.kl_loss_tracker.result(),
        }


def vae_model(input_shape, latent_dim, kernel_size, strides, padding):
    inputs = tf.keras.Input(shape=input_shape)
    h = layers.Conv2D(32, kernel_size=kernel_size, strides=strides, padding=padding, activation='relu',
                      name="conv1", data_format="channels_first")(inputs)
    h = layers.Conv2D(64, kernel_size=kernel_size, strides=strides, padding=padding, activation='relu',
                      name="conv2", data_format="channels_first")(h)
    h = layers.Conv2D(128, kernel_size=kernel_size, strides=strides, padding=padding, activation='relu',
                      name="conv3", data_format="channels_first")(h)
    shape_save = h.get_shape().as_list()
    h = layers.Flatten()(h)
    h = layers.Dense(16, activation="relu")(h)

    # vae
    z_mean = layers.Dense(latent_dim, name='z_mean')(h)
    z_log_var = layers.Dense(latent_dim, name='z_log_var')(h)
    z = sampling([z_mean, z_log_var])
    encoder = tf.keras.Model(inputs, [z_mean, z_log_var, z], name='encoder')

    # decoder
    latent_inputs = tf.keras.Input(shape=(latent_dim,))
    h = layers.Dense(shape_save[1] * shape_save[2] * shape_save[3], activation="relu")(latent_inputs)
    h = layers.Reshape((shape_save[1], shape_save[2], shape_save[3]))(h)
    h = layers.Conv2DTranspose(64, kernel_size=kernel_size, strides=strides, padding=padding, activation='relu',
                               data_format="channels_first")(h)
    h = layers.Conv2DTranspose(32, kernel_size=kernel_size, strides=strides, padding=padding, activation='relu',
                               data_format="channels_first")(h)
    decoder_outputs = layers.Conv2DTranspose(1,kernel_size=kernel_size, strides=strides, padding=padding, activation='relu',
                                             data_format="channels_first")(h)
    decoder = tf.keras.Model(latent_inputs, decoder_outputs, name="decoder")

    return encoder, decoder


if __name__ == '__main__':
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
    config = tf.compat.v1.ConfigProto()
    config.gpu_options.allow_growth = True
    session = tf.compat.v1.Session(config=config)

    #---------------------------------사용자 설정 변수---------------------------------------
    FILENAME = "BTCUSDT_220705.csv"
    Time_window = 8
    #-------------------------------------------------------------------------------------

    #데이터 로딩
    x_train = data_load_processing(FILENAME, Time_window=Time_window)


Unnamed: 0,Time,Open,High,Low,Close,Volume,close_ma5,volume_ma5,close_ema5,volume_ema5,...,volume_ema120_ratio,open_lastclose_ratio,high_close_ratio,low_close_ratio,close_lastclose_ratio,volume_lastvolume_ratio,MACD,RSI,Fast_k,MFI
0,2022-06-01 01:59:00,31910.0,31919.5,31919.0,31910.0,81.209,31898.2,79.6376,31896.188831,74.781966,...,0.754098,0.000219,0.000298,0.000282,0.000219,-0.02232,20.000756,94.711538,90.776699,90.8177
1,2022-06-01 02:00:00,31919.0,31919.5,31905.5,31900.0,95.428,31901.6,77.4962,31897.459221,81.663977,...,1.020347,0.000282,0.000611,0.000172,-0.000313,0.175091,17.810787,86.403509,81.067961,82.007609
2,2022-06-01 02:01:00,31905.5,31912.0,31912.0,31905.0,37.872,31904.1,64.9128,31899.972814,67.066652,...,-0.195162,0.000172,0.000219,0.000219,0.000157,-0.603135,16.081151,86.462882,85.92233,83.652088
3,2022-06-01 02:02:00,31912.0,31912.0,31912.0,31911.5,17.175,31905.9,62.9494,31903.815209,50.436101,...,-0.630554,0.000219,1.6e-05,1.6e-05,0.000204,-0.546499,14.885505,91.341991,92.23301,86.144146
4,2022-06-01 02:03:00,31912.0,31912.0,31911.5,31911.5,21.722,31907.6,50.6812,31906.376806,40.864734,...,-0.527987,1.6e-05,1.6e-05,0.0,0.0,0.264745,13.223735,91.341991,90.588235,89.326028
5,2022-06-01 02:04:00,31911.5,31912.0,31912.0,31911.5,2.976,31907.9,35.0346,31908.084537,28.235156,...,-0.93417,0.0,1.6e-05,1.6e-05,0.0,-0.862996,11.326779,91.341991,87.5,89.477159
6,2022-06-01 02:05:00,31912.0,31912.0,31912.0,31911.5,5.184,31910.2,16.9858,31909.223025,20.551437,...,-0.883383,1.6e-05,1.6e-05,1.6e-05,0.0,0.741935,9.356041,89.690722,86.086957,89.331419
7,2022-06-01 02:06:00,31912.0,31912.0,31906.0,31906.0,9.439,31910.4,11.2992,31908.148683,16.847292,...,-0.784475,1.6e-05,0.000188,0.0,-0.000172,0.820795,6.719086,81.871345,72.44898,86.788193
8,2022-06-01 02:07:00,31906.0,31906.5,31900.0,31895.0,40.308,31907.1,15.9258,31903.765789,24.667528,...,-0.078253,0.0,0.000361,0.000157,-0.000345,3.270368,3.085516,69.186047,31.944444,83.113385
9,2022-06-01 02:08:00,31900.0,31900.5,31879.5,31862.5,100.942,31897.3,31.7698,31890.010526,50.092352,...,1.253154,0.000157,0.001193,0.000534,-0.001019,1.504267,-3.733307,46.363636,-42.5,77.922744


Index(['Open', 'High', 'Low', 'Close', 'Volume', 'close_ma5', 'volume_ma5',
       'close_ema5', 'volume_ema5', 'close_ma5_ratio', 'volume_ma5_ratio',
       'close_ema5_ratio', 'volume_ema5_ratio', 'close_ma10', 'volume_ma10',
       'close_ema10', 'volume_ema10', 'close_ma10_ratio', 'volume_ma10_ratio',
       'close_ema10_ratio', 'volume_ema10_ratio', 'close_ma20', 'volume_ma20',
       'close_ema20', 'volume_ema20', 'close_ma20_ratio', 'volume_ma20_ratio',
       'close_ema20_ratio', 'volume_ema20_ratio', 'close_ma60', 'volume_ma60',
       'close_ema60', 'volume_ema60', 'close_ma60_ratio', 'volume_ma60_ratio',
       'close_ema60_ratio', 'volume_ema60_ratio', 'close_ma120',
       'volume_ma120', 'close_ema120', 'volume_ema120', 'close_ma120_ratio',
       'volume_ma120_ratio', 'close_ema120_ratio', 'volume_ema120_ratio',
       'open_lastclose_ratio', 'high_close_ratio', 'low_close_ratio',
       'close_lastclose_ratio', 'volume_lastvolume_ratio', 'MACD', 'RSI',
       'Fast_k', 

In [83]:
x_train

array([[[[0.99937045],
         [1.        ],
         [1.        ],
         ...,
         [0.94711538],
         [0.96202313],
         [0.91305032]],

        [[1.        ],
         [1.        ],
         [0.99905568],
         ...,
         [0.86403509],
         [0.91960095],
         [0.82447666]],

        [[0.99905568],
         [0.99947368],
         [0.99951035],
         ...,
         [0.86462882],
         [0.94081204],
         [0.84100969]],

        ...,

        [[0.99947538],
         [0.99947368],
         [0.99951035],
         ...,
         [0.91341991],
         [0.94770564],
         [0.89957297]],

        [[0.99951035],
         [0.99947368],
         [0.99951035],
         ...,
         [0.89690722],
         [0.94153137],
         [0.89810776]],

        [[0.99951035],
         [0.99947368],
         [0.99909065],
         ...,
         [0.81871345],
         [0.88194045],
         [0.87253902]]],


       [[[1.        ],
         [1.        ],
         [0.99

In [68]:
def vae_model(input_shape, latent_dim, kernel_size, strides, padding):
    inputs = tf.keras.Input(shape=input_shape)
    h = layers.Conv2D(32, kernel_size=(2,4), strides=(2,2), padding="same", activation='relu',
                      name="conv1")(inputs)
    h = layers.Conv2D(64, kernel_size=(2,4), strides=(2,2), padding="same", activation='relu',
                      name="conv2")(h)
    h = layers.Conv2D(128, kernel_size=(2,4), strides=(2,2), padding="same", activation='relu',
                      name="conv3")(h)
    shape_save = h.get_shape().as_list()
    h = layers.Flatten()(h)
    h = layers.Dense(32, activation="relu")(h)

    # vae
    z_mean = layers.Dense(latent_dim, name='z_mean')(h)
    z_log_var = layers.Dense(latent_dim, name='z_log_var')(h)
    z = Sampling()([z_mean, z_log_var])
    encoder = tf.keras.Model(inputs, [z_mean, z_log_var, z], name='encoder')

    # decoder
    latent_inputs = tf.keras.Input(shape=(latent_dim,))
    h = layers.Dense(shape_save[1] * shape_save[2] * shape_save[3], activation="relu")(latent_inputs)
    h = layers.Reshape((shape_save[1], shape_save[2], shape_save[3]))(h)
    h = layers.Conv2DTranspose(64, kernel_size=(2,6), strides=(2,1), padding='valid', activation='relu')(h)
    h = layers.Conv2DTranspose(32, kernel_size=(2,4), strides=(2,2), padding='valid', activation='relu')(h)
    decoder_outputs = layers.Conv2DTranspose(1,kernel_size=(2,4), strides=(2,2), padding='valid', activation="relu")(h)
    decoder = tf.keras.Model(latent_inputs, decoder_outputs, name="decoder")

    return encoder, decoder

In [81]:
x_train.shape

(43074, 8, 54, 1)

In [69]:
#VAE_model
latent_dim = 3
input_shape = (8, 54, 1)
kernel_size = (2,8)
strides = (2,2)
padding = "same"

encoder, decoder = vae_model(input_shape, latent_dim, kernel_size, strides, padding)
encoder.summary()
decoder.summary()

#VAE
vae = VAE(encoder, decoder)
vae.compile(optimizer=tf.keras.optimizers.Adam())
vae.fit(x_train, epochs=10, batch_size=128)
vae.encoder.save("vae_encoder")
z_mean_, z_log_var_, latent_z = vae.encoder.predict(x_train)

decoded_latent_z = vae.decoder.predict(latent_z)

Model: "encoder"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_22 (InputLayer)          [(None, 8, 54, 1)]   0           []                               
                                                                                                  
 conv1 (Conv2D)                 (None, 4, 27, 32)    288         ['input_22[0][0]']               
                                                                                                  
 conv2 (Conv2D)                 (None, 2, 14, 64)    16448       ['conv1[0][0]']                  
                                                                                                  
 conv3 (Conv2D)                 (None, 1, 7, 128)    65664       ['conv2[0][0]']                  
                                                                                            



INFO:tensorflow:Assets written to: vae_encoder\assets


INFO:tensorflow:Assets written to: vae_encoder\assets




In [70]:
def lstm_preprocess(latent_z, seq_length):
    reshape_num = latent_z.shape[0] - seq_length + 1
    slide_z = sliding_window_view(latent_z, (seq_length, latent_z.shape[1])) #확인 필요
    slide_z = slide_z.reshape(reshape_num, seq_length, latent_z.shape[1])
    slide_z = slide_z[:-1]

    dataY = []
    for i in range(0, len(latent_z) - seq_length):
        _y = latent_z[i + seq_length]  # 다음 나타날 z(정답)
        dataY.append(_y)  # dataY 리스트에 추가
    dataY = np.array(dataY)
    
    return slide_z, dataY

In [74]:
def lstm_model(seq_length, latent_dim, lstm_z, dataY):
    
    inputs = tf.keras.Input(shape=(seq_length, latent_dim))
    h = layers.LSTM(128, activation="tanh", name='lstm1')(inputs)
    lstm_outputs = layers.Dense(latent_dim, activation='linear')(h)
    lstm = tf.keras.Model(inputs, lstm_outputs, name='lstm')
    lstm.summary()
    lstm.compile(loss='mae', optimizer=tf.keras.optimizers.Adam())
    lstm.fit(lstm_z, dataY, epochs=15, batch_size=128)
    lstm.save('lstm')
    lstm_results = lstm.predict(lstm_z)
    

    return lstm_results

In [75]:
seq_length=30
slide_z, dataY = lstm_preprocess(latent_z, seq_length)

In [76]:
lstm_results=lstm_model(seq_length, latent_dim, slide_z, dataY)

Model: "lstm"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_25 (InputLayer)       [(None, 30, 3)]           0         
                                                                 
 lstm1 (LSTM)                (None, 128)               67584     
                                                                 
 dense_24 (Dense)            (None, 3)                 387       
                                                                 
Total params: 67,971
Trainable params: 67,971
Non-trainable params: 0
_________________________________________________________________
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15




INFO:tensorflow:Assets written to: lstm\assets


INFO:tensorflow:Assets written to: lstm\assets




In [77]:
df2 = pd.read_csv(FILENAME)

In [78]:
df2=df2[7:]

In [79]:
result1 = pd.concat([pd.DataFrame(df2["Open"][30:]).reset_index(drop=True),pd.DataFrame(lstm_results)],axis=1)
result1.columns=["Price","z0","z1","z2"]

In [80]:
result1.to_csv("out_"+FILENAME,index=False)