In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# 4.4 네이버 주식 데이터로 종가 예측

## 4.2.2 데이터 구성 및 준비 (pandas-datareader)

In [1]:
import pandas_datareader.data as web

data = web.DataReader('035420', 'naver', start='2011-01-01', end='2022-06-30')

print(data.shape)
data.tail()

(2832, 5)


Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2022-06-24,238500,250000,236000,247500,990710
2022-06-27,247500,253500,244500,249000,634239
2022-06-28,248500,253000,246000,252500,460119
2022-06-29,246500,249000,244500,246500,464699
2022-06-30,241500,245000,236500,240000,764891


In [2]:
data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2832 entries, 2011-01-03 to 2022-06-30
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Open    2832 non-null   object
 1   High    2832 non-null   object
 2   Low     2832 non-null   object
 3   Close   2832 non-null   object
 4   Volume  2832 non-null   object
dtypes: object(5)
memory usage: 132.8+ KB


## 4.4.3 데이터 전처리

In [3]:
data = data.astype('int')

data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2832 entries, 2011-01-03 to 2022-06-30
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   Open    2832 non-null   int64
 1   High    2832 non-null   int64
 2   Low     2832 non-null   int64
 3   Close   2832 non-null   int64
 4   Volume  2832 non-null   int64
dtypes: int64(5)
memory usage: 132.8 KB


In [4]:
# 데이터 분할
train_df = data['2011-1':'2020-12'].copy()
val_df = data['2021-1':'2021-12'].copy()
test_df = data['2022-1':].copy()

print('train_df.shape'  , train_df.shape)
print('val_df.shape'    , val_df.shape)
print('test_df.shape'   , test_df.shape)

train_df.shape (2463, 5)
val_df.shape (248, 5)
test_df.shape (121, 5)


In [5]:
# 데이터 정규화(Normalization)
train_max = train_df.max()
train_min = train_df.min()

train_df = (train_df - train_min) / (train_max - train_min)
val_df = (val_df - train_min) / (train_max - train_min)
test_df = (test_df - train_min) / (train_max - train_min)

train_df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2011-01-03,0.213297,0.207602,0.205221,0.051382,0.028787
2011-01-04,0.208175,0.203986,0.202425,0.051382,0.028788
2011-01-05,0.203988,0.205798,0.204291,0.052488,0.035352
2011-01-06,0.209107,0.203986,0.204755,0.051382,0.036038
2011-01-07,0.206318,0.205798,0.206624,0.055251,0.038215


In [6]:
from tensorflow.keras.preprocessing import timeseries_dataset_from_array

window_length = 7

x_train = train_df.values
y_train = train_df['Close'].iloc[window_length:]

train_ds = timeseries_dataset_from_array(
            x_train, y_train,
            sequence_length=window_length,
            batch_size=32
)

for batch in train_ds.take(1):
    inputs, targets = batch

print("Input shape:", inputs.numpy().shape)
print("Target shape:", targets.numpy().shape)

Input shape: (32, 7, 5)
Target shape: (32,)


In [7]:
from tensorflow.keras.preprocessing import timeseries_dataset_from_array

window_length = 7

x_val = val_df.values
y_val = val_df['Close'].iloc[window_length:]

val_ds = timeseries_dataset_from_array(
    x_val, y_val,
    sequence_length=window_length,
    batch_size=32
)

for batch in val_ds.take(1):
    inputs, targets = batch

print("Input shape:", inputs.numpy().shape)
print("Target shape:", targets.numpy().shape)

Input shape: (32, 7, 5)
Target shape: (32,)


## 4.4.4 모델 생성

In [8]:
from tensorflow.keras import Model
from tensorflow.keras.layers import Input, LSTM, Dense

inputs = Input(shape=(7, 5))
x = LSTM(32)(inputs)
outputs = Dense(1)(x)

model = Model(inputs=inputs, outputs=outputs)
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 7, 5)]            0         
                                                                 
 lstm (LSTM)                 (None, 32)                4864      
                                                                 
 dense (Dense)               (None, 1)                 33        
                                                                 
Total params: 4,897
Trainable params: 4,897
Non-trainable params: 0
_________________________________________________________________


## 4.4.5 모델 학습

In [9]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

earlyetopping_callback = EarlyStopping(monitor="val_loss", 
                                       patience=10)

modelcheckpoint_callback = ModelCheckpoint(filepath="best_checkpoint_model.h5", 
                                           monitor="val_loss", 
                                           save_weights_only=True, 
                                           save_best_only=True, 
                                           verbose=1)

In [10]:
%%time

history = model.fit(train_ds,
    epochs=100,
    validation_data=val_ds,
    batch_size = 32,
    callbacks=[earlyetopping_callback, modelcheckpoint_callback]
)

Epoch 1/100
Epoch 1: val_loss improved from inf to 0.06743, saving model to best_checkpoint_model.h5
Epoch 2/100
Epoch 2: val_loss improved from 0.06743 to 0.01217, saving model to best_checkpoint_model.h5
Epoch 3/100
Epoch 3: val_loss improved from 0.01217 to 0.01160, saving model to best_checkpoint_model.h5
Epoch 4/100
Epoch 4: val_loss improved from 0.01160 to 0.01073, saving model to best_checkpoint_model.h5
Epoch 5/100
Epoch 5: val_loss did not improve from 0.01073
Epoch 6/100
Epoch 6: val_loss improved from 0.01073 to 0.00988, saving model to best_checkpoint_model.h5
Epoch 7/100
Epoch 7: val_loss improved from 0.00988 to 0.00958, saving model to best_checkpoint_model.h5
Epoch 8/100
Epoch 8: val_loss improved from 0.00958 to 0.00875, saving model to best_checkpoint_model.h5
Epoch 9/100
Epoch 9: val_loss improved from 0.00875 to 0.00813, saving model to best_checkpoint_model.h5
Epoch 10/100
Epoch 10: val_loss improved from 0.00813 to 0.00770, saving model to best_checkpoint_model.h

In [11]:
import plotly.graph_objects as go

fig = go.Figure()
fig.add_trace(go.Scattergl(y=history.history['loss'],name='loss'))
fig.add_trace(go.Scattergl(y=history.history['val_loss'],name='val_loss'))
fig.update_layout(title="<b>Loss of Model</b>", xaxis_title='Epoch',yaxis_title='Loss', template='seaborn')
fig.show()

## 4.4.7 모델 예측

In [12]:
model.load_weights('best_checkpoint_model.h5')

In [13]:
from tensorflow.keras.preprocessing import timeseries_dataset_from_array

window_length = 7

x_test = test_df.values
y_test = test_df['Close'].iloc[window_length:]

test_ds = timeseries_dataset_from_array(
    x_test, y_test,
    sequence_length=window_length,
    batch_size=32
)

for batch in test_ds.take(1):
    inputs, targets = batch

print("Input shape:", inputs.numpy().shape)
print("Target shape:", targets.numpy().shape)

Input shape: (32, 7, 5)
Target shape: (32,)


In [14]:
test_numpy = test_df['Close'][7:].to_numpy()
pred = model.predict(test_ds)

print(test_numpy.shape)
print(pred.shape)

(114,)
(114, 1)


In [16]:
import plotly.graph_objects as go

fig = go.Figure()
fig.add_trace(go.Scattergl(y=test_numpy, name='actual'))
fig.add_trace(go.Scattergl(y=pred.reshape(114,), name='prediction'))
fig.update_layout(title="<b>네이버 주식 종가 예측</b>", xaxis_title='Date',yaxis_title='Close', template='seaborn')
fig.show()