### 1. Library

In [1]:
# Library
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Warnings
import warnings
warnings.filterwarnings(action='ignore') 

### 2. Data Loading

In [2]:
# Read Data
data = pd.read_csv('./data/DAILY_DATA.csv')
data = data[['date', 'USD']].set_index('date')

# Data Head
data

Unnamed: 0_level_0,USD
date,Unnamed: 1_level_1
2010-01-04,1161.0
2010-01-05,1147.3
2010-01-06,1141.4
2010-01-07,1132.0
2010-01-08,1134.1
...,...
2020-01-10,1160.0
2020-01-13,1161.1
2020-01-14,1155.7
2020-01-15,1153.1


### 3. Data Shfit 생성

In [3]:
# Input에 포함시킬 기간
PAST_DAYS = 60

# Data Shfit 생성    
cnt = ['USD']
for step1 in cnt : 
    for step2 in range(1,PAST_DAYS+1):
        data['{}_shift-{}'.format(step1,step2)] = data[step1].shift(step2)
        
# Nan Omit
data = data.dropna()

# Data Head
data

Unnamed: 0_level_0,USD,USD_shift-1,USD_shift-2,USD_shift-3,USD_shift-4,USD_shift-5,USD_shift-6,USD_shift-7,USD_shift-8,USD_shift-9,...,USD_shift-51,USD_shift-52,USD_shift-53,USD_shift-54,USD_shift-55,USD_shift-56,USD_shift-57,USD_shift-58,USD_shift-59,USD_shift-60
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2010-03-31,1132.6,1130.7,1140.2,1143.1,1140.7,1135.4,1134.6,1136.3,1133.4,1130.7,...,1122.2,1123.1,1127.0,1123.3,1121.3,1134.1,1132.0,1141.4,1147.3,1161.0
2010-04-01,1128.9,1132.6,1130.7,1140.2,1143.1,1140.7,1135.4,1134.6,1136.3,1133.4,...,1128.4,1122.2,1123.1,1127.0,1123.3,1121.3,1134.1,1132.0,1141.4,1147.3
2010-04-02,1124.8,1128.9,1132.6,1130.7,1140.2,1143.1,1140.7,1135.4,1134.6,1136.3,...,1123.4,1128.4,1122.2,1123.1,1127.0,1123.3,1121.3,1134.1,1132.0,1141.4
2010-04-05,1124.6,1124.8,1128.9,1132.6,1130.7,1140.2,1143.1,1140.7,1135.4,1134.6,...,1128.0,1123.4,1128.4,1122.2,1123.1,1127.0,1123.3,1121.3,1134.1,1132.0
2010-04-06,1122.9,1124.6,1124.8,1128.9,1132.6,1130.7,1140.2,1143.1,1140.7,1135.4,...,1137.9,1128.0,1123.4,1128.4,1122.2,1123.1,1127.0,1123.3,1121.3,1134.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-01-10,1160.0,1174.1,1165.3,1169.6,1162.7,1156.9,1157.8,1157.8,1160.9,1161.2,...,1169.6,1174.2,1171.0,1173.7,1171.4,1175.4,1180.5,1186.6,1187.0,1184.1
2020-01-13,1161.1,1160.0,1174.1,1165.3,1169.6,1162.7,1156.9,1157.8,1157.8,1160.9,...,1166.5,1169.6,1174.2,1171.0,1173.7,1171.4,1175.4,1180.5,1186.6,1187.0
2020-01-14,1155.7,1161.1,1160.0,1174.1,1165.3,1169.6,1162.7,1156.9,1157.8,1157.8,...,1168.4,1166.5,1169.6,1174.2,1171.0,1173.7,1171.4,1175.4,1180.5,1186.6
2020-01-15,1153.1,1155.7,1161.1,1160.0,1174.1,1165.3,1169.6,1162.7,1156.9,1157.8,...,1161.7,1168.4,1166.5,1169.6,1174.2,1171.0,1173.7,1171.4,1175.4,1180.5


### 4. Target, Input 지정

In [4]:
# X, y
X = data[data.columns[1:]]
y = data[data.columns[0]]

# X 순서 뒤집기
X = X[X.columns[::-1]]

### 5. MinMaxScaler

In [5]:
from sklearn.preprocessing import MinMaxScaler
sc_X = MinMaxScaler(feature_range=(0, 100))
X = sc_X.fit_transform(X)
sc_y = MinMaxScaler()
y = sc_y.fit_transform(y.values.reshape(-1,1))

### 6. Train Validation Test Split

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1234)
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.2, random_state=1234)

### 7. Dataset Size

In [7]:
print('Train Set Size : {:,} obs' .format(len(X_train)))
print('Valid Set Size : {:,} obs' .format(len(X_valid)))
print('Test Set Size : {:,} obs' .format(len(X_test)))

Train Set Size : 1,745 obs
Valid Set Size : 437 obs
Test Set Size : 243 obs


### 8. Keras Seed

In [8]:
from tensorflow import set_random_seed
set_random_seed(1234)

### 9. DataSet For LSTM

In [9]:
X_train_t = X_train.reshape(X_train.shape[0], 60, 1)
X_valid_t = X_valid.reshape(X_valid.shape[0], 60, 1)
X_test_t = X_test.reshape(X_test.shape[0], 60, 1)

In [10]:
X_train_t.shape

(1745, 60, 1)

### 10. LSTM Structure

In [11]:
# Library
from keras.layers import LSTM 
from keras.models import Sequential 
from keras.layers import Dense 
import keras.backend as K
from keras.callbacks import EarlyStopping
from keras.layers import Dense, Dropout, Activation

# Model Structure
    
# Sequnetial Model 
K.clear_session() 
model = Sequential()

# LSTM Layer
model.add(LSTM(10, input_shape=(60,1))) # (timestep, feature) 
model.add(Dense(1))

# Multi GPU Model
from keras.utils.training_utils import multi_gpu_model
model = multi_gpu_model(model, gpus=3)

# 결과
model.compile(loss='mean_squared_error', optimizer='adam') 

# Model Structure
model.summary()

Using TensorFlow backend.


ModuleNotFoundError: No module named 'keras.utils.training_utils'

### 11. Model Score

In [None]:
# Early Stopping
early_stop = EarlyStopping(monitor='loss', patience=30, verbose=1)

# Model Fitting
model.fit(X_train_t, y_train, epochs=300, batch_size=30, verbose=1, callbacks=[early_stop])

### 11. Model RMSE

In [None]:
from sklearn.metrics import mean_squared_error

# Train
pred_train = model.predict(X_train)
print("Train rmse : {}" .format(mean_squared_error(y_train, pred_train)**0.5))

# Valid
pred_valid = model.predict(X_valid)
print("Valid rmse : {}" .format(mean_squared_error(y_valid, pred_valid)**0.5))

# Test
pred_test = model.predict(X_test)
print("Test rmse : {}" .format(mean_squared_error(y_test, pred_test)**0.5))

### 12. Data Scale 원래 데이터로 변환

In [None]:
# Data Reverse
y_test_origin = sc_y.inverse_transform(y_test)
y_pred_origin = sc_y.inverse_transform(pred_test.reshape(-1,1))

# Test Data
test = np.array([y_test_origin.reshape(-1), y_pred_origin.reshape(-1)]).transpose()
test = pd.DataFrame(test, columns = ['y_test', 'y_pred'])

### 13. 1일 뒤 예측 시각화

In [None]:
# Pred vs True Visualization
plt.figure(figsize = (16,6))
sns.lineplot(data = test, linewidth = 2.5)
plt.show()

### 14. 2019년 예측

In [None]:
# Date for 2019
date_list = pd.date_range('2019-01-02','2020-01-15', freq='B')
date_list = list(date_list.strftime('%Y-%m-%d'))

In [None]:
# Virtual Data Setting
data = pd.read_csv('./data/DAILY_DATA.csv')
data = data[['date', 'USD']].set_index('date')
data = data[2167:2227]

In [None]:
# Virtual Prediction
for step in date_list :
    
    # Prediction
    virtual_X = sc_X.transform(np.array(data[-60:].T))
    pred = sc_y.inverse_transform(model.predict(virtual_X).reshape(-1,1)).reshape(-1)[0]
    pred_df = pd.DataFrame({'USD' : pred}, index=[step])

    # Data Append
    data = data.append(pred_df)

### 15. 결과 DataFrame으로 변환

In [None]:
# Virtual 2019 ~ 2020 Data
data = data[60:]
usd_pred = data
usd_pred.columns = ['USD_PRED']

In [None]:
# Real 2019 ~ 2020 Data
data = pd.read_csv('./data/DAILY_DATA.csv')
data = data[['date', 'USD']].set_index('date')
data = data[2227:-1]
usd_true = data
usd_true.columns = ['USD_TRUE']

In [None]:
# Concat
virtual_data = pd.concat([usd_true, usd_pred], axis=1)

In [None]:
# NaN Omit
virtual_data = virtual_data.dropna()

In [None]:
# Result
virtual_data

### 16. 2019년 1년의 RMSE

In [None]:
# RMSE
from sklearn.metrics import mean_squared_error
print("rmse : {}" .format(mean_squared_error(virtual_data['USD_TRUE'], virtual_data['USD_PRED'])**0.5))

### 17. Pred vs True Visualization

In [None]:
plt.figure(figsize = (16,6))
ax = sns.lineplot(data = virtual_data, linewidth = 2.5)
ax.set(xticks=virtual_data.index[0::30])
plt.show()

### 18. 만약 3개월만 예측을 한다면...?

In [None]:
# Result
virtual_data = virtual_data[0:60]

# RMSE
from sklearn.metrics import mean_squared_error
rmse = mean_squared_error(virtual_data['USD_TRUE'], virtual_data['USD_PRED'])**0.5
print("rmse : {}" .format(rmse))
print("percent rmse : {:.2f}%" .format(100 * rmse / (1140-1115)))

# Pred vs True Visualization
plt.figure(figsize = (16,6))
ax = sns.lineplot(data = virtual_data, linewidth = 2.5)
ax.set(xticks=virtual_data.index[0::5])
plt.show()