In [16]:
import os
import pickle
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
import matplotlib.pyplot as plt

from tqdm import tqdm
from keras.models import load_model
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler

from tensorflow.keras import optimizers
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, LSTM
from tensorflow.keras.activations import softmax, sigmoid
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.layers import Dropout, PReLU, LeakyReLU, Input, BatchNormalization, Activation


In [2]:
# tf gpu 메모리 관련 코드 
gpus = tf.config.list_physical_devices(device_type = 'GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)

In [3]:
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [9]:
with open('./pickle/pickle_df/complete/20221206_1년_10개_대형주.pkl', 'rb') as f:
    stock_df = pickle.load(f) # 10개 대형주 df
with open('./pickle/pickle_corr/', 'rb') as f:
    col_list = pickle.load(f) # 상관 계수에 따른 컬럼 리스트

In [10]:
# 0.5까지 0, 0.6이상 부터 1 로 labeling
stock_df['pct_label'] = np.where(stock_df['pct_label'].values > 0.5 , 1, 0)
stock_df['pct_label'].value_counts()

0    274942
1    197498
Name: pct_label, dtype: int64

In [11]:
# label 원 핫 인코딩
ohe = OneHotEncoder(sparse=False)
y_stock_df = ohe.fit_transform(stock_df[['pct_label']])

# value 컬럼 설정
col_list = list(col_list.index) # col_list의 인덱스를 리스트로 생성
X_stock_df = stock_df.drop(['pct_label'],axis=1)
X_stock_df = X_stock_df.set_index(['날짜'])
X_stock_df = X_stock_df[col_list] # col_list에 있는 컬럼들만 사용

# values에 nan값이 있나 확인
print(list(X_stock_df.isnull().sum())) 

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [12]:
min_abs_scaler = MaxAbsScaler()
X_stock_sc = min_abs_scaler.fit_transform(X_stock_df)

X_train, X_test, y_train, y_test = train_test_split(X_stock_sc, y_stock_df
                                                    , test_size=0.3, shuffle=True
                                                    , random_state=42, stratify=y_stock_df)

X_train = X_train.reshape(X_train.shape[0], X_stock_df.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_stock_df.shape[1], 1)
print('훈련 데이터의 크기 :', X_train.shape, y_train.shape)
print('테스트 데이터의 크기 :', X_test.shape, y_test.shape)

훈련 데이터의 크기 : (330708, 69, 1) (330708, 2)
테스트 데이터의 크기 : (141732, 69, 1) (141732, 2)


In [13]:
modelpath="./model/lstm_30_64_5_0.001_0.07_대형주_범위(0,1)_6개월.hdf5"
model_pred = load_model(modelpath)
print("\n Test Accuracy: %.4f" % (model_pred.evaluate(X_test, y_test)[1]))
# 예측 레이블 확인
predict = model_pred.predict(X_test)
pred_X = tf.argmax(predict,1).numpy()
np.unique(pred_X,return_counts=True)


 Test Accuracy: 0.6379


(array([0, 1], dtype=int64), array([62122, 79610], dtype=int64))

In [14]:
with open('./pickle/pickle_df/update/20221207_10개_대형주_update.pkl', 'rb') as f:
    pred_df = pickle.load(f)

In [15]:
pred_df['pct_label'] = np.where(pred_df['pct_label'].values > 0.5 , 1, 0)
# pred_df['pct_label'] = np.where(pred_df['pct_label'].values > 4 , 5, pred_df['pct_label'].values)
pred_df['pct_label'].value_counts()

0    298
1     83
Name: pct_label, dtype: int64

In [None]:
y_pred_df = ohe.transform(pred_df[['pct_label']])
y_pred_df.shape

(381, 2)

In [None]:
# col_list = list(col_list.index)
X_pred_df = pred_df.drop(['pct_label'],axis=1)
X_pred_df = X_pred_df.set_index(['날짜'])
X_pred_df = X_pred_df[col_list]
X_pred_sc = min_abs_scaler.fit_transform(X_pred_df)
X_pred = X_pred_sc.reshape(X_pred_sc.shape[0], model_pred.input.shape[1], 1)

In [None]:
predict_ = model_pred.predict(X_pred)



In [None]:
pred = tf.argmax(predict_,1).numpy()
test = tf.argmax(y_pred_df,1).numpy()
np.unique(pred,return_counts=True), np.unique(test,return_counts=True)

((array([0], dtype=int64), array([381], dtype=int64)),
 (array([0, 1], dtype=int64), array([298,  83], dtype=int64)))

In [None]:
acc = accuracy_score(test, pred)
print('accuracy_score : ',acc)

accuracy_score :  0.7821522309711286


In [None]:
# from tensorflow import keras
# # 전이 학습할 모델 불러오기
# modelpath="./model/lstm_30_64_5_0.001_0.07_대형주_범위(0,1)_6개월.hdf5"
# transfer_model = tf.keras.models.load_model(
#     modelpath, custom_objects=None, compile=True, options=None
# )
# # keras.models.load_model(modelpath)
# transfer_model.trainable = False
# transfer_model.input.shape

TensorShape([None, 69, 1])