In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Conv1D, Dropout
from tensorflow.keras.callbacks import EarlyStopping

from tensorflow.keras.losses import Huber
from tensorflow.keras.optimizers import Adam

In [3]:
# 데이터 - CSV 파일 가져오기
from google.colab import files
uploaded = files.upload()

csv_df = pd.read_csv('000660_SK하이닉스.csv')  # CSV 파일명 입력
csv_df

Saving 000660_SK하이닉스.csv to 000660_SK하이닉스.csv


Unnamed: 0.1,Unnamed: 0,Date,Open,High,Low,Close,Volume,Change,BPS,PER,PBR,EPS,DIV,DPS,금리,환율
0,0,2015-01-02,47950,48400,47650,47750,1155944,0.000000,18399,11.80,2.60,4045,0.00,0,2.0,1093.599976
1,1,2015-01-05,48050,48400,47500,47650,1409146,-0.002094,18399,11.78,2.59,4045,0.00,0,2.0,1111.000000
2,2,2015-01-06,47000,47600,46900,47500,1661196,-0.003148,18399,11.74,2.58,4045,0.00,0,2.0,1108.500000
3,3,2015-01-07,46700,47600,46700,47400,1821266,-0.002105,18399,11.72,2.58,4045,0.00,0,2.0,1097.300049
4,4,2015-01-08,48250,50000,47900,49950,5006675,0.053797,18399,12.35,2.71,4045,0.00,0,2.0,1097.800049
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2114,2114,2023-08-02,123300,124800,118700,119500,3765478,-0.044764,92004,36.86,1.30,3242,1.00,1200,3.5,1289.050049
2115,2115,2023-08-03,119000,121300,117200,120000,3498537,0.004184,92004,37.01,1.30,3242,1.00,1200,3.5,1295.369995
2116,2116,2023-08-04,118300,120900,118100,120100,2375384,0.000833,92004,37.05,1.31,3242,1.00,1200,3.5,1296.400024
2117,2117,2023-08-07,120600,124900,120500,121900,3724826,0.014988,92004,37.60,1.32,3242,0.98,1200,3.5,1303.050049


In [4]:
csv_df = csv_df.loc[:, ['Date', 'Close', 'BPS', 'EPS', '금리', '환율']]
csv_df = csv_df.set_index('Date') # 날짜를 index로 바꿈
csv_df

Unnamed: 0_level_0,Close,BPS,EPS,금리,환율
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-01-02,47750,18399,4045,2.0,1093.599976
2015-01-05,47650,18399,4045,2.0,1111.000000
2015-01-06,47500,18399,4045,2.0,1108.500000
2015-01-07,47400,18399,4045,2.0,1097.300049
2015-01-08,49950,18399,4045,2.0,1097.800049
...,...,...,...,...,...
2023-08-02,119500,92004,3242,3.5,1289.050049
2023-08-03,120000,92004,3242,3.5,1295.369995
2023-08-04,120100,92004,3242,3.5,1296.400024
2023-08-07,121900,92004,3242,3.5,1303.050049


In [5]:
csv_df.dropna() # 결측치 처리

Unnamed: 0_level_0,Close,BPS,EPS,금리,환율
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-01-02,47750,18399,4045,2.0,1093.599976
2015-01-05,47650,18399,4045,2.0,1111.000000
2015-01-06,47500,18399,4045,2.0,1108.500000
2015-01-07,47400,18399,4045,2.0,1097.300049
2015-01-08,49950,18399,4045,2.0,1097.800049
...,...,...,...,...,...
2023-08-02,119500,92004,3242,3.5,1289.050049
2023-08-03,120000,92004,3242,3.5,1295.369995
2023-08-04,120100,92004,3242,3.5,1296.400024
2023-08-07,121900,92004,3242,3.5,1303.050049


In [6]:
# 날짜 빼고 정규화 작업 진행 -> MinMaxScaler 사용
scaler = MinMaxScaler()

# 정규화 수행 -> 정규화된 데이터프레임은 scaled_df
scale_cols = ['Close', 'BPS', 'EPS', '금리', '환율']
scaled_df = scaler.fit_transform(csv_df[scale_cols]) # 정규화된 데이터는 넘파이 형태
scaled_df = pd.DataFrame(scaled_df, columns = scale_cols) # Pandas DataFrame 형태로 변경

# 테스트
scaled_df

Unnamed: 0,Close,BPS,EPS,금리,환율
0,0.179226,0.0,0.057063,0.5,0.102171
1,0.178411,0.0,0.057063,0.5,0.146760
2,0.177189,0.0,0.057063,0.5,0.140353
3,0.176375,0.0,0.057063,0.5,0.111652
4,0.197149,0.0,0.057063,0.5,0.112934
...,...,...,...,...,...
2114,0.763747,1.0,0.015483,1.0,0.603029
2115,0.767821,1.0,0.015483,1.0,0.619225
2116,0.768635,1.0,0.015483,1.0,0.621864
2117,0.783299,1.0,0.015483,1.0,0.638905


In [7]:
# 날짜를 뺐으니 다시 날짜를 index 로 붙여줍니다
scaled_csv_df = scaled_df.set_index(csv_df.index)
scaled_csv_df

Unnamed: 0_level_0,Close,BPS,EPS,금리,환율
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-01-02,0.179226,0.0,0.057063,0.5,0.102171
2015-01-05,0.178411,0.0,0.057063,0.5,0.146760
2015-01-06,0.177189,0.0,0.057063,0.5,0.140353
2015-01-07,0.176375,0.0,0.057063,0.5,0.111652
2015-01-08,0.197149,0.0,0.057063,0.5,0.112934
...,...,...,...,...,...
2023-08-02,0.763747,1.0,0.015483,1.0,0.603029
2023-08-03,0.767821,1.0,0.015483,1.0,0.619225
2023-08-04,0.768635,1.0,0.015483,1.0,0.621864
2023-08-07,0.783299,1.0,0.015483,1.0,0.638905


In [8]:
# 입력데이터, 정답(예측하는)데이터를 정의
# 입력데이터 -> 이전날까지의 종가와 변수들
# 정답데이터 -> 다음날의 종가

feature_cols = ['Close', 'BPS', 'EPS', '금리', '환율']
label_cols = ['Close']

# 입력데이터, 정답데이터 프레임 -> feature_df, label_df
feature_df = scaled_csv_df[feature_cols]
label_df = scaled_csv_df[label_cols]

print(feature_df)
print(label_df)


# DataFrame을 Numpy 형태로 저장
feature_np = feature_df.to_numpy()
label_np = label_df.to_numpy()

               Close  BPS       EPS   금리        환율
Date                                              
2015-01-02  0.179226  0.0  0.057063  0.5  0.102171
2015-01-05  0.178411  0.0  0.057063  0.5  0.146760
2015-01-06  0.177189  0.0  0.057063  0.5  0.140353
2015-01-07  0.176375  0.0  0.057063  0.5  0.111652
2015-01-08  0.197149  0.0  0.057063  0.5  0.112934
...              ...  ...       ...  ...       ...
2023-08-02  0.763747  1.0  0.015483  1.0  0.603029
2023-08-03  0.767821  1.0  0.015483  1.0  0.619225
2023-08-04  0.768635  1.0  0.015483  1.0  0.621864
2023-08-07  0.783299  1.0  0.015483  1.0  0.638905
2023-08-08  0.756415  1.0  0.015483  1.0  0.650950

[2119 rows x 5 columns]
               Close
Date                
2015-01-02  0.179226
2015-01-05  0.178411
2015-01-06  0.177189
2015-01-07  0.176375
2015-01-08  0.197149
...              ...
2023-08-02  0.763747
2023-08-03  0.767821
2023-08-04  0.768635
2023-08-07  0.783299
2023-08-08  0.756415

[2119 rows x 1 columns]


In [9]:
# sequence_dataset 만들기
def make_sequence_dataset(feature, label, window_size):

  feature_list = []   # 생성될 feature list
  label_list = []     # 생성될 label list

  for i in range(len(feature)-window_size): # range는 전체값에서 window_size를 뺀 값

    feature_list.append(feature[i:i+window_size]) # feature list 에 i번째서 부터 window size 만큼의 입력데이터를 추가
    label_list.append(label[i+window_size]) # label list 에 그 다음 번째('window_size + 1' 번째)의 정답데이터를 추가

  return np.array(feature_list), np.array(label_list)



---



### **5일치 예측** -> 출력층 units수 = 5

In [10]:
window_size = 80  # window_size 만큼의 입력데이터를 이용해 바로 다음 값에 오는 Close 값을 예측

X, y = make_sequence_dataset(feature_np, label_np, window_size) # X에는 np.array(feature_list), y에는 np.array(label_list) 가 대입됨

print(X.shape, y.shape)

(2039, 80, 5) (2039, 1)


In [11]:
# 모델 훈련을 위한 준비 -> 훈련을 하고, 제대로 훈련이 됐는지 테스트
split = int(len(X)*0.7) # 테스트 데이터로 분리 -> train:test = 7:3

# 훈련data 는 전체 데이터의 70퍼센트
X_train = X[0:split]
y_train = y[0:split]

# 테스트data 는 전체 데이터의 30퍼센트
X_test = X[split:]
y_test = y[split:]

# shape를 출력 -> 분할된 데이터 값으로 출력됨
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1427, 80, 5) (1427, 1)
(612, 80, 5) (612, 1)


In [12]:
#LSTM 모델 구축
model = Sequential()

# 1차원 feature map 생성
model.add(Conv1D(filters=32, kernel_size=5,
           padding="causal",
           activation="relu",
           input_shape=[window_size, 5]))# input_shape = (40,5) -> 다음값 예측을 위한 이전 40개(window_size)의 값과 5개의 특성을 입력으로 넣습니다

# LSTM layer
model.add(LSTM(units = 16, activation = 'tanh'))
model.add(Dense(units = 16, activation = 'sigmoid'))
model.add(Dense(units = 5)) # 출력층 -> 출력층의 unit 수를 조절해서 5일치, 10일치, 20일치, 30일치 등등 예측이 가능!
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 80, 32)            832       
                                                                 
 lstm (LSTM)                 (None, 16)                3136      
                                                                 
 dense (Dense)               (None, 16)                272       
                                                                 
 dense_1 (Dense)             (None, 5)                 85        
                                                                 
Total params: 4,325
Trainable params: 4,325
Non-trainable params: 0
_________________________________________________________________


In [13]:
# 모델 컴파일
loss = Huber()
optimizer = Adam(0.0005)
model.compile(loss=loss, optimizer=optimizer, metrics=['mse']) # 손실 함수는 Huber, 옵티마이저는 Adam,  평가지표는 mse로 설정

# 조기종료 설정 -> earlystopping은 10번의 epoch통안 loss 개선이 없다면 학습을 멈춤
earlystopping = EarlyStopping(monitor='loss', patience=10)

# 모델 학습 -> epoch은 100번 진행
model.fit(X_train, y_train, epochs=100, batch_size=32, callbacks=[earlystopping])

# 주가 예측 -> test 데이터를 이용하여 학습된 LSTM모델을 테스트
predictions = model.predict(X_test)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [14]:
#예측 비교하기
print(predictions)

[[0.82537913 0.8390823  0.8385793  0.8268516  0.8260046 ]
 [0.82642287 0.84015447 0.83967704 0.82798445 0.82714385]
 [0.82752824 0.84130347 0.8408171  0.82923913 0.8284435 ]
 ...
 [0.70081407 0.7076856  0.70679885 0.7072922  0.7161156 ]
 [0.69849044 0.70551014 0.70454973 0.7049894  0.713967  ]
 [0.6978309  0.7050954  0.7039415  0.7044925  0.7135731 ]]


In [15]:
print(y_test) #predictions 와 대충 값이 일치해야함

[[0.87372709]
 [0.90224033]
 [0.9185336 ]
 [0.89816701]
 [1.        ]
 [0.94297352]
 [0.96741344]
 [0.98778004]
 [0.94704684]
 [0.93075356]
 [0.89409369]
 [0.90224033]
 [0.87372709]
 [0.90631365]
 [0.93075356]
 [0.90224033]
 [0.93482688]
 [0.93075356]
 [0.94704684]
 [0.91446029]
 [0.91446029]
 [0.89002037]
 [0.87780041]
 [0.87372709]
 [0.89002037]
 [0.86558045]
 [0.88594705]
 [0.86965377]
 [0.93482688]
 [0.9389002 ]
 [0.95519348]
 [0.95519348]
 [0.9592668 ]
 [0.96334012]
 [0.93075356]
 [0.91038697]
 [0.92668024]
 [0.90631365]
 [0.91038697]
 [0.91038697]
 [0.91446029]
 [0.9185336 ]
 [0.86965377]
 [0.87372709]
 [0.86965377]
 [0.86150713]
 [0.89002037]
 [0.84928717]
 [0.84928717]
 [0.83299389]
 [0.86150713]
 [0.86558045]
 [0.84114053]
 [0.84521385]
 [0.84928717]
 [0.79226069]
 [0.76374745]
 [0.74745418]
 [0.75560081]
 [0.74745418]
 [0.77596741]
 [0.78818737]
 [0.78818737]
 [0.76374745]
 [0.79226069]
 [0.79226069]
 [0.81262729]
 [0.80855397]
 [0.82484725]
 [0.83706721]
 [0.81670061]
 [0.84

In [16]:
# 실제값 변환
# 실제값으로 변화시키기 위해서 기존 데이터에서 종가의 최댓값과 최솟값을 가져옵니다
close_min = csv_df['Close'].min()
close_max = csv_df['Close'].max()

print(csv_df['Close'].min())
print(csv_df['Close'].max())

25750
148500


In [17]:
# MinMaxScaler이용해서 실제값으로 역변환
scaler2 = MinMaxScaler()
scaled_df2 = scaler2.fit_transform(csv_df[['Close']])

# MinMaxScaler에 정규화에 사용한 최솟값과 최댓값을 설정
scaler2.data_min_ = close_min  # 정규화에 사용한 최솟값
scaler2.data_max_ = close_max  # 정규화에 사용한 최댓값

# 예측한 출력값을 실제값으로 역변환
original_pred_values = scaler2.inverse_transform(predictions)

# 역변환된 예측값 출력
original_pred_values

array([[127065.28 , 128747.35 , 128685.6  , 127246.03 , 127142.06 ],
       [127193.41 , 128878.96 , 128820.36 , 127385.086, 127281.91 ],
       [127329.086, 129019.99 , 128960.3  , 127539.1  , 127441.44 ],
       ...,
       [111774.93 , 112618.41 , 112509.56 , 112570.12 , 113653.19 ],
       [111489.7  , 112351.375, 112233.484, 112287.445, 113389.45 ],
       [111408.75 , 112300.46 , 112158.82 , 112226.46 , 113341.1  ]],
      dtype=float32)

In [18]:
# 테스트 data의 실제값 -> 정규화시켰던 값을 split했었기 때문에 원본 데이터에서 다시 분리해서 가져옴

# original_label_df -> 데이터셋의 실제 종가값
original_label_df = csv_df[label_cols]

# 다시 테스트용만큼의 값으로 분리
original_y_train = original_label_df[0:split]
original_y_test = original_label_df[split:]
original_y_test = original_y_test[window_size:] # window_size만큼의 데이터 제거(예측에 사용하였으므로)

# 역변환된 실제값 출력(종가)
original_y_test

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2021-02-19,133000
2021-02-22,136500
2021-02-23,138500
2021-02-24,136000
2021-02-25,148500
...,...
2023-08-02,119500
2023-08-03,120000
2023-08-04,120100
2023-08-07,121900


In [19]:
# 실제로 에측하기
# feature_df 에서 가장 최근의 값을 window_size 만큼 가져옴
pred_feature = feature_df.tail(window_size)

pred_feature

Unnamed: 0_level_0,Close,BPS,EPS,금리,환율
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-04-13,0.512831,0.978126,0.571976,1.0,0.689593
2023-04-14,0.517719,0.978126,0.571976,1.0,0.634754
2023-04-17,0.511202,0.978126,0.571976,1.0,0.639751
2023-04-18,0.503870,0.978126,0.571976,1.0,0.672270
2023-04-19,0.508758,0.978126,0.571976,1.0,0.671835
...,...,...,...,...,...
2023-08-02,0.763747,1.000000,0.015483,1.0,0.603029
2023-08-03,0.767821,1.000000,0.015483,1.0,0.619225
2023-08-04,0.768635,1.000000,0.015483,1.0,0.621864
2023-08-07,0.783299,1.000000,0.015483,1.0,0.638905


In [20]:
pred_feature_list = []
pred_feature_list.append(pred_feature)
pred_feature = np.array(pred_feature_list)
print(pred_feature.shape)

(1, 80, 5)


In [21]:
# 5일 예측 -> 5일까지의 예측값 5개 출력
predictions_5d = model.predict(pred_feature)
pred_values_5d = scaler2.inverse_transform(predictions_5d)
print(pred_values_5d)
print(len(pred_values_5d[0]))

[[110901.8   111866.555 111678.414 111762.086 112924.16 ]]
5


In [67]:
# 예측값 추가해서 DataFrame으로 만들기
# 2020년 부터 현재까지의 종가값을 제시하고 앞에 5일 예측값을 추가
pred5dCsvDf = csv_df[['Close']].loc['2020-01-01':,:]

for i in range (len(pred_values_5d[0])):
  pred5dCsvDf.loc[(i+1),:]=[pred_values_5d[0,i]]

pred5dCsvDf

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2020-01-02,94700.000000
2020-01-03,94500.000000
2020-01-06,94300.000000
2020-01-07,94000.000000
2020-01-08,97400.000000
...,...
1,110901.796875
2,111866.554688
3,111678.414062
4,111762.085938


In [23]:
# DataFrame을 csv파일로 만들기
pred5dCsvDf.to_csv('5d_predict_filename.csv', header=False, index=True)



---



### **10일치 예측**

In [24]:
window_size = 80

X, y = make_sequence_dataset(feature_np, label_np, window_size) # X에는 np.array(feature_list), y에는 np.array(label_list) 가 대입됨

print(X.shape, y.shape)

(2039, 80, 5) (2039, 1)


In [25]:
# 모델 훈련을 위한 준비 -> 훈련을 하고, 제대로 훈련이 됐는지 테스트
split = int(len(X)*0.7) # 테스트 데이터로 분리 -> train:test = 7:3

# 훈련data 는 전체 데이터의 70퍼센트
X_train = X[0:split]
y_train = y[0:split]

# 테스트data 는 전체 데이터의 30퍼센트
X_test = X[split:]
y_test = y[split:]

# shape를 출력 -> 분할된 데이터 값으로 출력됨
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(1427, 80, 5) (1427, 1)
(612, 80, 5) (612, 1)


In [26]:
#LSTM 모델 구축
model = Sequential()

# 1차원 feature map 생성
model.add(Conv1D(filters=32, kernel_size=5,
           padding="causal",
           activation="relu",
           input_shape=[window_size, 5]))# input_shape = (40,5) -> 다음값 예측을 위한 이전 40개(window_size)의 값과 5개의 특성을 입력으로 넣습니다

# LSTM layer
model.add(LSTM(units = 16, activation = 'tanh'))
model.add(Dense(units = 16, activation = 'sigmoid'))
model.add(Dense(units = 10)) # 출력층 -> 출력층의 unit 수를 조절해서 5일치, 10일치, 20일치, 30일치 등등 예측이 가능!
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_1 (Conv1D)           (None, 80, 32)            832       
                                                                 
 lstm_1 (LSTM)               (None, 16)                3136      
                                                                 
 dense_2 (Dense)             (None, 16)                272       
                                                                 
 dense_3 (Dense)             (None, 10)                170       
                                                                 
Total params: 4,410
Trainable params: 4,410
Non-trainable params: 0
_________________________________________________________________


In [27]:
# 모델 컴파일
from tensorflow.keras.losses import Huber
from tensorflow.keras.optimizers import Adam
loss = Huber()
optimizer = Adam(0.0005)
model.compile(loss=loss, optimizer=optimizer, metrics=['mse']) # 손실 함수는 Huber, 옵티마이저는 Adam,  평가지표는 mse로 설정

# 조기종료 설정 -> earlystopping은 10번의 epoch통안 loss 개선이 없다면 학습을 멈춤
earlystopping = EarlyStopping(monitor='loss', patience=10)

# 모델 학습 -> epoch은 100번 진행
model.fit(X_train, y_train, epochs=100, batch_size=32, callbacks=[earlystopping])

# 주가 예측 -> test 데이터를 이용하여 학습된 LSTM모델을 테스트
predictions = model.predict(X_test)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [28]:
#예측 비교하기
print(predictions)

[[0.8451492  0.85424805 0.83937144 ... 0.84604055 0.8568356  0.8519946 ]
 [0.84545505 0.85468864 0.83959365 ... 0.8461873  0.8566859  0.85250473]
 [0.8463606  0.85521793 0.8399694  ... 0.8465744  0.85691166 0.8530834 ]
 ...
 [0.73477304 0.68747944 0.68731445 ... 0.71238273 0.67746055 0.6867638 ]
 [0.7349618  0.68779975 0.6879873  ... 0.71266836 0.677711   0.68722695]
 [0.73631567 0.68882203 0.6892557  ... 0.7139507  0.6785853  0.68842614]]


In [29]:
print(y_test) #predictions 와 대충 값이 일치해야함

[[0.87372709]
 [0.90224033]
 [0.9185336 ]
 [0.89816701]
 [1.        ]
 [0.94297352]
 [0.96741344]
 [0.98778004]
 [0.94704684]
 [0.93075356]
 [0.89409369]
 [0.90224033]
 [0.87372709]
 [0.90631365]
 [0.93075356]
 [0.90224033]
 [0.93482688]
 [0.93075356]
 [0.94704684]
 [0.91446029]
 [0.91446029]
 [0.89002037]
 [0.87780041]
 [0.87372709]
 [0.89002037]
 [0.86558045]
 [0.88594705]
 [0.86965377]
 [0.93482688]
 [0.9389002 ]
 [0.95519348]
 [0.95519348]
 [0.9592668 ]
 [0.96334012]
 [0.93075356]
 [0.91038697]
 [0.92668024]
 [0.90631365]
 [0.91038697]
 [0.91038697]
 [0.91446029]
 [0.9185336 ]
 [0.86965377]
 [0.87372709]
 [0.86965377]
 [0.86150713]
 [0.89002037]
 [0.84928717]
 [0.84928717]
 [0.83299389]
 [0.86150713]
 [0.86558045]
 [0.84114053]
 [0.84521385]
 [0.84928717]
 [0.79226069]
 [0.76374745]
 [0.74745418]
 [0.75560081]
 [0.74745418]
 [0.77596741]
 [0.78818737]
 [0.78818737]
 [0.76374745]
 [0.79226069]
 [0.79226069]
 [0.81262729]
 [0.80855397]
 [0.82484725]
 [0.83706721]
 [0.81670061]
 [0.84

In [30]:
# MinMaxScaler이용해서 실제값으로 역변환
scaler2 = MinMaxScaler()
scaled_df2 = scaler2.fit_transform(csv_df[['Close']])

# MinMaxScaler에 정규화에 사용한 최솟값과 최댓값을 설정
scaler2.data_min_ = close_min  # 정규화에 사용한 최솟값
scaler2.data_max_ = close_max  # 정규화에 사용한 최댓값

# 예측한 출력값을 실제값으로 역변환
original_pred_values = scaler2.inverse_transform(predictions)

# 역변환된 예측값 출력
original_pred_values

array([[129492.07 , 130608.945, 128782.836, ..., 129601.48 , 130926.56 ,
        130332.336],
       [129529.6  , 130663.02 , 128810.12 , ..., 129619.49 , 130908.19 ,
        130394.95 ],
       [129640.766, 130728.   , 128856.234, ..., 129667.01 , 130935.9  ,
        130465.98 ],
       ...,
       [115943.39 , 110138.1  , 110117.85 , ..., 113194.984, 108908.28 ,
        110050.26 ],
       [115966.56 , 110177.42 , 110200.445, ..., 113230.05 , 108939.03 ,
        110107.11 ],
       [116132.75 , 110302.91 , 110356.14 , ..., 113387.45 , 109046.34 ,
        110254.31 ]], dtype=float32)

In [31]:
# 테스트 data의 실제값 -> 정규화시켰던 값을 split했었기 때문에 원본 데이터에서 다시 분리해서 가져옴

# original_label_df -> 데이터셋의 실제 종가값
original_label_df = csv_df[label_cols]

# 다시 테스트용만큼의 값으로 분리
original_y_train = original_label_df[0:split]
original_y_test = original_label_df[split:]
original_y_test = original_y_test[window_size:] # window_size만큼의 데이터 제거(예측에 사용하였으므로)

# 역변환된 실제값 출력(종가)
original_y_test

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2021-02-19,133000
2021-02-22,136500
2021-02-23,138500
2021-02-24,136000
2021-02-25,148500
...,...
2023-08-02,119500
2023-08-03,120000
2023-08-04,120100
2023-08-07,121900


In [32]:
# 실제로 에측하기
# feature_df 에서 가장 최근의 값을 window_size 만큼 가져옴
pred_feature = feature_df.tail(window_size)

pred_feature

Unnamed: 0_level_0,Close,BPS,EPS,금리,환율
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-04-13,0.512831,0.978126,0.571976,1.0,0.689593
2023-04-14,0.517719,0.978126,0.571976,1.0,0.634754
2023-04-17,0.511202,0.978126,0.571976,1.0,0.639751
2023-04-18,0.503870,0.978126,0.571976,1.0,0.672270
2023-04-19,0.508758,0.978126,0.571976,1.0,0.671835
...,...,...,...,...,...
2023-08-02,0.763747,1.000000,0.015483,1.0,0.603029
2023-08-03,0.767821,1.000000,0.015483,1.0,0.619225
2023-08-04,0.768635,1.000000,0.015483,1.0,0.621864
2023-08-07,0.783299,1.000000,0.015483,1.0,0.638905


In [33]:
pred_feature_list = []
pred_feature_list.append(pred_feature)
pred_feature = np.array(pred_feature_list)
print(pred_feature.shape)

(1, 80, 5)


In [34]:
# 10일 예측 -> 10일까지의 예측값 10개 출력
predictions_10d = model.predict(pred_feature)
pred_values_10d = scaler2.inverse_transform(predictions_10d)
print(pred_values_10d)
print(len(pred_values_10d[0]))

[[116041.07  110173.33  110249.95  116785.59  108374.74  113025.41
  117624.53  113247.445 108910.97  110103.68 ]]
10


In [65]:
# 예측값 추가해서 CSV파일로 만들기
# 2020년 부터 현재까지의 종가값을 제시하고 앞에 10일 예측값을 추가
pred10dCsvDf = csv_df[['Close']].loc['2020-01-01':,:]

for i in range (len(pred_values_10d[0])):
  pred10dCsvDf.loc[(i+1),:]=[pred_values_10d[0,i]]

pred10dCsvDf

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2020-01-02,94700.000000
2020-01-03,94500.000000
2020-01-06,94300.000000
2020-01-07,94000.000000
2020-01-08,97400.000000
...,...
6,113025.406250
7,117624.531250
8,113247.445312
9,108910.968750


In [36]:
pred10dCsvDf.to_csv('10d_predict_filename.csv', header=False, index=True)



---



In [55]:
#현재 종가값만 따로 csv파일로 저장

predNowCsvDf = csv_df[['Close']].loc['2020-01-01':,:]

predNowCsvDf.to_csv('Data_filename.csv', header=False, index=True)

In [68]:
pred5dCsvDf = pred5dCsvDf.reset_index()
day_five = pred5dCsvDf.values
day_five = day_five.tolist()
day_five

[['2020-01-02', 94700.0],
 ['2020-01-03', 94500.0],
 ['2020-01-06', 94300.0],
 ['2020-01-07', 94000.0],
 ['2020-01-08', 97400.0],
 ['2020-01-09', 99000.0],
 ['2020-01-10', 98900.0],
 ['2020-01-13', 100500.0],
 ['2020-01-14', 100500.0],
 ['2020-01-15', 98200.0],
 ['2020-01-16', 99200.0],
 ['2020-01-17', 99000.0],
 ['2020-01-20', 99900.0],
 ['2020-01-21', 99300.0],
 ['2020-01-22', 101000.0],
 ['2020-01-23', 98700.0],
 ['2020-01-28', 96300.0],
 ['2020-01-29', 97900.0],
 ['2020-01-30', 94000.0],
 ['2020-01-31', 93500.0],
 ['2020-02-03', 94700.0],
 ['2020-02-04', 97100.0],
 ['2020-02-05', 97700.0],
 ['2020-02-06', 100500.0],
 ['2020-02-07', 99300.0],
 ['2020-02-10', 98800.0],
 ['2020-02-11', 99800.0],
 ['2020-02-12', 100000.0],
 ['2020-02-13', 102000.0],
 ['2020-02-14', 104500.0],
 ['2020-02-17', 105000.0],
 ['2020-02-18', 102000.0],
 ['2020-02-19', 103500.0],
 ['2020-02-20', 104000.0],
 ['2020-02-21', 103000.0],
 ['2020-02-24', 99500.0],
 ['2020-02-25', 98000.0],
 ['2020-02-26', 94600.0],


In [66]:
pred10dCsvDf = pred10dCsvDf.reset_index()
day_ten = pred10dCsvDf.values
day_ten = day_ten.tolist()
day_ten

[['2020-01-02', 94700.0],
 ['2020-01-03', 94500.0],
 ['2020-01-06', 94300.0],
 ['2020-01-07', 94000.0],
 ['2020-01-08', 97400.0],
 ['2020-01-09', 99000.0],
 ['2020-01-10', 98900.0],
 ['2020-01-13', 100500.0],
 ['2020-01-14', 100500.0],
 ['2020-01-15', 98200.0],
 ['2020-01-16', 99200.0],
 ['2020-01-17', 99000.0],
 ['2020-01-20', 99900.0],
 ['2020-01-21', 99300.0],
 ['2020-01-22', 101000.0],
 ['2020-01-23', 98700.0],
 ['2020-01-28', 96300.0],
 ['2020-01-29', 97900.0],
 ['2020-01-30', 94000.0],
 ['2020-01-31', 93500.0],
 ['2020-02-03', 94700.0],
 ['2020-02-04', 97100.0],
 ['2020-02-05', 97700.0],
 ['2020-02-06', 100500.0],
 ['2020-02-07', 99300.0],
 ['2020-02-10', 98800.0],
 ['2020-02-11', 99800.0],
 ['2020-02-12', 100000.0],
 ['2020-02-13', 102000.0],
 ['2020-02-14', 104500.0],
 ['2020-02-17', 105000.0],
 ['2020-02-18', 102000.0],
 ['2020-02-19', 103500.0],
 ['2020-02-20', 104000.0],
 ['2020-02-21', 103000.0],
 ['2020-02-24', 99500.0],
 ['2020-02-25', 98000.0],
 ['2020-02-26', 94600.0],


In [58]:
import requests

data = {
    'day_five': day_five,
    'day_ten': day_ten
}

server_url = 'https://616d-39-118-146-59.ngrok-free.app/prediction'

response = requests.post(server_url, json=data)  # POST 요청으로 변경, 헤더는 자동으로 설정됨

if response.status_code == 200:
    print('성공')
else:
    print('실패:', response.status_code)

TypeError: ignored