#Import Module

In [None]:
!pip install finance-datareader
!pip install yfinance

In [109]:
import FinanceDataReader as fdr
import matplotlib.pyplot as plt
import yfinance as yf
import numpy as np
import pandas as pd
import seaborn as sns
from fbprophet import Prophet
from pandas_datareader import data
import tensorflow as tf
from keras.models import Sequential
from keras.layers import LSTM, Dropout, Dense, Activation
from keras.callbacks import EarlyStopping
from bokeh.plotting import figure
from bokeh.io import show, output_notebook
from sklearn.preprocessing import MinMaxScaler

# Load Dataset


In [245]:
sp500 = fdr.DataReader('SPX', '2000-01-01', '2022-06-07') # S&P500 데이터 로드

# Check Dataset

In [207]:
sp500.head()

Unnamed: 0_level_0,Close,Open,High,Low,Volume,Change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2000-01-04,444.4,439.3,441.4,439.1,15600.0,0.0
2000-01-05,437.3,441.4,444.4,431.3,660660.0,-0.0158
2000-01-06,437.3,441.4,441.4,432.1,802720.0,0.0
2000-01-07,461.4,439.3,462.4,439.3,290110.0,0.0551
2000-01-10,472.9,469.4,474.5,468.4,6690.0,0.025


In [93]:
# 결측값 확인
for col in sp500.columns :
  missing = sp500.loc[sp500[col] == 0].shape[0]
  print(col + ': ' + str(missing))

Close: 0
Open: 0
High: 0
Low: 0
Volume: 0
Change: 499


In [119]:
sp500.index

DatetimeIndex(['2000-01-04', '2000-01-05', '2000-01-06', '2000-01-07',
               '2000-01-10', '2000-01-11', '2000-01-12', '2000-01-13',
               '2000-01-14', '2000-01-17',
               ...
               '2022-05-23', '2022-05-24', '2022-05-25', '2022-05-26',
               '2022-05-27', '2022-05-30', '2022-05-31', '2022-06-01',
               '2022-06-06', '2022-06-07'],
              dtype='datetime64[ns]', name='Date', length=5659, freq=None)

In [275]:
# 주가 그래프 출력 (종가)
p = figure(width=1000, height= 500, x_axis_type="datetime",
          title = "S&P500 Stock Price",
          y_axis_label='Price', x_axis_label='Period')

p.line(pd.to_datetime(sp500.index), sp500['Close'], line_width=2)
output_notebook()
show(p)

# Normalize Data

In [246]:
# 데이터 정규화 (MinMaxScaler 사용)
scaler = MinMaxScaler()
scale_cols = ['Close',	'Open',	'High',	'Low',	'Volume',	'Change']
scaled_df = scaler.fit_transform(sp500[scale_cols])
scaled_df = pd.DataFrame(scaled_df, columns=scale_cols)
print(scaled_df)

         Close      Open      High       Low    Volume    Change
0     0.008730  0.008266  0.008126  0.008746  0.003417  0.001085
1     0.008309  0.008391  0.008303  0.008277  0.144878  0.000925
2     0.008309  0.008391  0.008126  0.008325  0.176031  0.001085
3     0.009740  0.008266  0.009367  0.008758  0.063617  0.001645
4     0.010423  0.010057  0.010082  0.010511  0.001463  0.001339
...        ...       ...       ...       ...       ...       ...
5654  0.621977  0.617664  0.624433  0.622013  0.106117  0.001282
5655  0.609802  0.622722  0.620438  0.617194  0.062016  0.000893
5656  0.604754  0.614986  0.611432  0.609664  0.019583  0.001004
5657  0.617523  0.614986  0.616739  0.620206  0.025842  0.001294
5658  0.611881  0.615284  0.614682  0.612074  0.012888  0.000995

[5659 rows x 6 columns]


In [247]:
# 딥러닝 입력 데이터 
future_cols = ['Close',	'High',	'Low'] # 3가지 항목을 분석
future_df = pd.DataFrame(scaled_df, columns=future_cols)

# 정답 데이터 
label_cols = ['Close']
label_df = pd.DataFrame(scaled_df, columns=label_cols)

# DataFrame -> Numpy
label = label_df.to_numpy()
future = future_df.to_numpy()

# Using LSTM

# Create Windows

In [97]:
window_size = 50
future_list = []
label_list = []

for i in range(len(future)-window_size) :
  future_list.append(future[i:i+window_size]) # LSTM에 입력될 tensor 생성
  label_list.append(label[i+window_size]) # 정답 matrix 생성성

x = np.array(future_list) 
y = np.array(label_list)
print(x.shape, y.shape)

(5609, 50, 3) (5609, 1)


# Split Data

In [99]:
split = 200 * -1
x_train = x[0:split]
y_train = y[0:split]
print(x_train.shape, y_train.shape)

x_test = x[split:]
y_test = y[split:]
print(x_test.shape, y_test.shape)

(5409, 50, 3) (5409, 1)
(200, 50, 3) (200, 1)


# Build a Module

In [103]:
#LSTM 사용하여 모듈 생성
model = Sequential()

model.add(LSTM(128, activation='tanh', input_shape=x_train[0].shape)) # 입력 층
model.add(Dense(1, activation='linear')) # 출력 층
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_3 (LSTM)               (None, 128)               67584     
                                                                 
 dense_3 (Dense)             (None, 1)                 129       
                                                                 
Total params: 67,713
Trainable params: 67,713
Non-trainable params: 0
_________________________________________________________________


# Training

In [111]:
# MeanSquaredError / ADAM(RMSprop+Momentum) / Early Stopping 사용
model.compile(loss='mse', optimizer='adam', metrics=['mae']) 
early_stop = EarlyStopping(monitor='val_loss', patience=5)
model.fit(x_train, y_train, validation_data=(x_test, y_test), batch_size=15, epochs=100, callbacks=[early_stop])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100


<keras.callbacks.History at 0x7fe631041850>

#Prediction

In [191]:
pred = model.predict(x_test)

# MAPE

In [271]:
mape = np.sum(abs(y_test - pred) / y_test / len(x_test))
print("LSTM 평균절대비오차(MAPE) : " + str(round(mape * 100, 3)) + "%")

LSTM 평균절대비오차(MAPE) : 1.636%


# Inverse Transformation

In [251]:
# 정규화 과정을 역산하여 원래 수치로 복구
scaled_real = scaler.fit_transform(sp500[label_cols])
scaled_real = pd.DataFrame(scaled_df, columns=label_cols)

rescaled_real = scaler.inverse_transform(label_df)
rescaled_pred = scaler.inverse_transform(pred)

# Chart

In [250]:
time = [i for i in range(len(y_test))]
p = figure(width=1000, height= 500,
          title = "Predict : Blue / Real : Red",
          x_axis_label='time', y_axis_label='Price')

p.multi_line([time, time], [rescaled_real[split:], rescaled_pred], color=['tomato', 'dodgerblue'], line_width=2)
output_notebook()
show(p)

# Using Prophet

# Data Preprocessing

In [263]:
sp500_turnc = pd.DataFrame({"ds":sp500.index, "y":sp500["Close"]})
sp500_turnc.reset_index(inplace=True)
del sp500_turnc["Date"]

# Build a Module and Prediction


In [264]:
m = Prophet(yearly_seasonality=True, daily_seasonality=True)
m.fit(sp500_turnc)
future = m.make_future_dataframe(periods=50)

# Prediction

In [265]:
forecast = m.predict(future)

# Chart

In [274]:
p = figure(width=1000, height= 500,
          title = "Predict : Blue / Real : Red", x_axis_type="datetime",
          x_axis_label='Period', y_axis_label='Price')

p.multi_line([pd.to_datetime(sp500.index), forecast["ds"]], [sp500["Close"], forecast["yhat"]], color=['tomato', 'dodgerblue'], line_width=2)
output_notebook()
show(p)