In [None]:
!git clone https://github.com/cye2020/TSF-SPI.git

In [None]:
cd TSF-SPI/

In [None]:
!git pull

In [None]:
# User-defined Data preprocessor
from utils.data_loader import DataLoader
from utils.spliter import WindowGenerator

# User-defined Visualization and Print Results
from utils.plot_util import plot
from utils.evaluation import printResult

# Model Callbacks
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TensorBoard

# Model Optimizer
from tensorflow.keras.optimizers import Adam, RMSprop, SGD

# Dataframe
import pandas as pd
import numpy as np

# Model Structure
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv1D, MaxPool1D, Input, Bidirectional, LSTM
from tensorflow.keras.layers import Activation, Dropout, Flatten, Dense, InputLayer
from tensorflow.keras import backend as K

# Seed
from tensorflow.random import set_seed
from numpy.random import randint

In [None]:
data_loader = DataLoader()
Target = 'Kospi'

features = [Target, 'USD/KRW', 'GDP', 'IAIP', 'LIR', 'M1', 'disease']

train, test, date_lists = data_loader.load_csv(path='./data/kospi.csv', 
                                               features=np.concatenate((['Date'],features), axis=0), 
                                               split_date='2021-01-01')


In [None]:
data = pd.read_csv('./data/kospi.csv')
data.isnull().any()

In [None]:
boolarr = np.isnan(test)
boolarr.sum()
np.count_nonzero(boolarr)

In [None]:
seedNum = 3394
set_seed(seedNum)
np.random.seed(seedNum)

In [None]:
CNN_input = (30, 1, 1)

wg_1 = WindowGenerator(train, input_width=CNN_input[0], label_width=CNN_input[1], shift=CNN_input[2])
twg_1 = WindowGenerator(test, input_width=CNN_input[0], label_width=CNN_input[1], shift=CNN_input[2])

train_cnn_x, train_cnn_y = wg_1.split_window()
test_cnn_x, test_cnn_y = twg_1.split_window()

In [None]:
model = Sequential()
model.add(Conv1D(64, 2, input_shape=(30,7)))
model.add(Activation('relu'))
model.add(MaxPool1D(pool_size = 2))

model.add(Flatten())
model.add(Dense(100))
model.add(Activation('relu'))
model.add(Dropout(0.25))
model.add(Dense(1))

In [None]:
model.summary()

In [None]:
# 모델의 학습률입니다.
Lr = 0.001

# 옵티마이저 함수입니다.
Optimizer = Adam(learning_rate=Lr)

# 학습하는 최대 Epoch(iterations)입니다.
Epochs = 100

# Train 데이터 중 validation 데이터의 비율을 정합니다. 
Validation_split=0.2

# Batch size입니다.
Batch_size = 64

# 손실함수입니다.
Loss = 'mean_squared_error'

#callbacks
es = EarlyStopping(monitor='val_loss', min_delta=1e-10, patience=10, verbose=1)
rlr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, verbose=1)

In [None]:
model.compile(optimizer=Optimizer, loss=Loss)

history = model.fit(train_cnn_x,
                    train_cnn_y,
                    shuffle=True,
                    epochs=Epochs,
                    callbacks=[es, rlr],
                    validation_split=Validation_split,
                    verbose=1,
                    batch_size=Batch_size)

In [None]:
train_cnn_predict = model.predict(train_cnn_x)
test_cnn_predict = model.predict(test_cnn_x)

In [None]:
TRUE = pd.DataFrame(data_loader.original[:, 0:1], columns=["True"]).set_index(pd.Series(date_lists[0]))
prediction_train = wg_1.to_pandas(train_cnn_predict[1:][:,-1], date_lists[1], name="Train")
prediction_test = twg_1.to_pandas(test_cnn_predict[1:][:,-1], date_lists[2], name="Test")

plot(TRUE, prediction_train, prediction_test, show=True)

In [None]:
printResult(date_lists[2], test_cnn_y, test_cnn_predict, Target, features)

## Model - RNN

In [None]:
seedNum = 2020
set_seed(seedNum)
np.random.seed(seedNum)

In [None]:
RNN_input = (30, 5, 1)

#Normalization
RNN_train = data_loader.standardScale(train)
RNN_test = data_loader.standardScale(test)

wg_2 = WindowGenerator(RNN_train, input_width=RNN_input[0], label_width=RNN_input[1], shift=RNN_input[2])
twg_2 = WindowGenerator(RNN_test, input_width=RNN_input[0], label_width=RNN_input[1], shift=RNN_input[2])
train_rnn_x, train_rnn_y = wg_2.split_window()
test_rnn_x, test_rnn_y = twg_2.split_window()

In [None]:
units = [128,64]
num_of_stack = 2

def is_last_layer(stack_id, num_of_stack):
        if stack_id == num_of_stack-1:
            return True
        else:
            return False

model = Sequential()
model.add(InputLayer(input_shape=(30,7)))
for i in range(num_of_stack):
    if is_last_layer(units[i], num_of_stack):
        model.add(Bidirectional(LSTM(units=units, return_sequences=False)))
        model.add(Dropout(0.25))
        continue
    model.add(Bidirectional(LSTM(units=units[i], return_sequences=True, input_shape=(30,7))))

model.add(Dropout(0.25))
model.add(Dense(units=60, activation='tanh'))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(units=1, activation='linear'))

In [None]:
model.summary()

In [None]:
# 모델의 학습률입니다.
Lr = 0.001

# 옵티마이저 함수입니다.
Optimizer = Adam(learning_rate=Lr)

# 학습하는 최대 Epoch(iterations)입니다.
Epochs = 100

# Train 데이터 중 validation 데이터의 비율을 정합니다. 
Validation_split=0.2

# Batch size입니다.
Batch_size = 64

# 손실함수입니다.
Loss = 'mean_squared_error'

#callbacks
es = EarlyStopping(monitor='val_loss', min_delta=1e-10, patience=10, verbose=1)
rlr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, verbose=1)

In [None]:
model.compile(optimizer=Optimizer, loss=Loss)

history = model.fit(train_rnn_x,
                    train_rnn_y,
                    shuffle=True,
                    epochs=Epochs,
                    callbacks=[es, rlr],
                    validation_split=Validation_split,
                    verbose=1,
                    batch_size=Batch_size)

In [None]:
train_predict = model.predict(train_rnn_x)
test_predict = model.predict(test_rnn_x)

#Denormalization
inv_train_predict = data_loader.inverseScale(train_predict)
inv_test_predict = data_loader.inverseScale(test_predict)
original = data_loader.inverseScale(test_rnn_y)

In [None]:
TRUE = pd.DataFrame(data_loader.original[:, 0:1], columns=["True"]).set_index(pd.Series(date_lists[0]))
prediction_train = wg_2.to_pandas(inv_train_predict[1:], date_lists[1], name="Train")
prediction_test = twg_2.to_pandas(inv_test_predict[1:], date_lists[2], name="Test")

plot(TRUE, prediction_train, prediction_test, show=True)

In [None]:
printResult(date_lists[2], original, inv_test_predict, Target, features)