In [None]:
#상태유지 스택 순환신경망
from __future__ import absolute_import, division, print_function, unicode_literals
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
tf.keras.backend.clear_session()

from keras.utils import np_utils

import pandas as pd
from sklearn import datasets
from keras.backend import tensorflow_backend as K
from keras import regularizers
import os
from keras import backend as K
import matplotlib.pyplot as plt
import glob


from pandas import DataFrame   
from pandas import Series
from pandas import concat
from pandas import read_csv
from pandas import datetime
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler, LabelEncoder, OneHotEncoder, StandardScaler
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Input, LSTM, Bidirectional, Dropout
from math import sqrt
from matplotlib import pyplot
from numpy import array
 
# date-time parsing function for loading the dataset
def parser(x):
	return datetime.strptime('190'+x, '%Y-%m')
 
# convert time series into supervised learning problem
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
	n_vars = 1 if type(data) is list else data.shape[1]
	df = DataFrame(data)
	cols, names = list(), list()
	for i in range(n_in, 0, -1):
		cols.append(df.shift(i))
		names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
	for i in range(0, n_out):
		cols.append(df.shift(-i))
		if i == 0:
			names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
		else:
			names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
	agg = concat(cols, axis=1)
	agg.columns = names
	if dropnan:
		agg.dropna(inplace=True)
	return agg
 
# create a differenced series / 데이터를 정지하게 하는 기능
def difference(dataset, interval=1):
	diff = list() #빈 리스트 생성 -> 간격만큼 뺀 값들, 작업하기 더 간단한 표현
	for i in range(interval, len(dataset)): #1부터 데이터셋 길이만큼 반복
		value = dataset[i] - dataset[i - interval]
		diff.append(value)
	return Series(diff)
 
# transform series into train and test sets for supervised learning
def prepare_data(series, n_test, n_lag, n_seq):
	raw_values = series.values
	diff_series = difference(raw_values, 1)
	diff_values = diff_series.values
	diff_values = diff_values.reshape(len(diff_values), 1)
    
	# rescale values to -1, 1 - 정규화 minmax
	scaler = MinMaxScaler(feature_range=(-1, 1))
	scaled_values = scaler.fit_transform(diff_values)
	scaled_values = scaled_values.reshape(len(scaled_values), 1)
    #scaled_values: 차이만큼 뺴고 정규화한 값을 지도핛ㅂ
	supervised = series_to_supervised(scaled_values, n_lag, n_seq)
	supervised_values = supervised.values
	train, test = supervised_values[0:-n_test], supervised_values[-n_test:]
	return scaler, train, test
 
# fit an LSTM network to training data
def fit_lstm(train, n_lag, n_seq, n_batch, nb_epoch, n_neurons):
	X, y = train[:, 0:n_lag], train[:, n_lag:]
	X = X.reshape(X.shape[0], 1, X.shape[1])
	# design network
	model = Sequential()
	model.add(LSTM(n_neurons, batch_input_shape=(n_batch, X.shape[1], X.shape[2])
                 , return_sequences=True,stateful=True, recurrent_dropout=0.2))
	model.add(LSTM(90, return_sequences=True, stateful=True, recurrent_dropout=0.2))
	model.add(Bidirectional(LSTM(60, return_sequences = False)))
	model.add(Dense(y.shape[1]))
    
	model.compile(loss='mse', optimizer='RMSprop')
	print(model.summary())
	# fit network
	for i in range(nb_epoch):
		print("Total epoch: %d / %d" % (i+1,nb_epoch))
		model.fit(X, y, epochs=1, batch_size=n_batch, verbose=1, shuffle=False)
		model.reset_states()
	return model
 
# make one forecast with an LSTM,
def forecast_lstm(model, X, n_batch):
	X = X.reshape(1, 1, len(X))
	forecast = model.predict(X, batch_size=n_batch)
	return [x for x in forecast[0, :]]
 
# evaluate the persistence model
def make_forecasts(model, n_batch, train, test, n_lag, n_seq):
	forecasts = list()
	for i in range(len(test)):
		X, y = test[i, 0:n_lag], test[i, n_lag:]
		forecast = forecast_lstm(model, X, n_batch)
		forecasts.append(forecast)
	return forecasts
 
# invert differenced forecast
def inverse_difference(last_ob, forecast):
	inverted = list()
	inverted.append(forecast[0] + last_ob)
	for i in range(1, len(forecast)):
		inverted.append(forecast[i] + inverted[i-1])
	return inverted
 
# inverse data transform on forecasts
def inverse_transform(series, forecasts, scaler, n_test):
	inverted = list()
	for i in range(len(forecasts)):
		forecast = array(forecasts[i])
		forecast = forecast.reshape(1, len(forecast))
		inv_scale = scaler.inverse_transform(forecast)
		inv_scale = inv_scale[0, :]
		index = len(series) - n_test + i - 1
		last_ob = series.values[index]
		inv_diff = inverse_difference(last_ob, inv_scale)
		inverted.append(inv_diff)
	return inverted
 
# evaluate the RMSE for each forecast time step
def evaluate_forecasts(test, forecasts, n_lag, n_seq):
	for i in range(n_seq):
		actual = [row[i] for row in test]
		predicted = [forecast[i] for forecast in forecasts]
		rmse = sqrt(mean_squared_error(actual, predicted))
		#print('t+%d RMSE: %f' % ((i+1), rmse))
 
# plot the forecasts in the context of the original dataset
def plot_forecasts(series, forecasts, n_test, y):
	plt.figure(figsize=(25,5))
	plt.xticks(np.arange(0,series.shape[0],step=500),series.index, rotation=90)
	pyplot.plot(series.values)
	for i in range(len(forecasts)):
		off_s = len(series) - n_test + i - 1
		off_e = off_s + len(forecasts[i]) + 1
		xaxis = [x for x in range(off_s, off_e)]
		yaxis = [series.values[off_s]] + forecasts[i]
		plt.axhline(y=y, xmin=0.02, xmax=0.98, color='red')
		pyplot.plot(xaxis, yaxis, color='orange')
pyplot.show()
 

In [None]:
def read_file():
    files1 = glob.glob(os.path.join('../../', '*.csv'))
    #files = os.path.listdir('./')
    #train = pd.DataFrame()
    #xy = pd.DataFrame()

    li = []
    print(len(files1))
    for file in range(len(files1)-300):
        df = pd.read_csv(files1[file], error_bad_lines=False, header=0, index_col=False, usecols=[*range(0, 198)],
            names=["DATE","EA1","EA2","EA3","EA4","EA5","EA6","EA7","EA8","EA9","EA10","EA11","EA12","EA13","EA14","EA15","EA16","EA17","EA18","EA19","EA20","EA21","EA22","EA23","EA24","EA25","EA26","EA27","EA28","EA29","EA30","EA31","EA32","EA33","EA34","EA35","EA36","EA37","EA38","EA39","EA40","EA41","EA42","EA43","EA44","EA45","EA46","EA47","EA48","EA49","EA50","EA51","EA52","EA53","EA54","EA55","EA56","EA57","EA58","EA59","EA60","EA61","EA62","EA63","EA64","EA65","EA66","EA67","EA68","EA69","EA70","EA71","EA72","EA73","EA74","EA75","EA76","EA77","EA78","EA79","EA80","EA81","EA82","EA83","EA84","EA85","EA86","EA87","EA88","EA89","EA90","EA91","EA92","EA93","EA94","EA95","EA96","EA97","EA98","EA99","EA100","EA101","EA102","EA103","EA104","EA105","EA106","EA107","EA108","EA109","EA110","EA111","EA112","EA113"]
            )   #오류나는 라인 생략, header=0: 1번째 행이 칼럼 이름

        li.append(df)
    xy = pd.concat(li, axis=0, ignore_index=True)
    return xy

In [None]:
def test_api():
        xy = read_file()
        date = xy.iloc[27000:33000, 0:1].values
        value = xy.iloc[27000:33000, 89:90].values

        date = date.ravel()
        date1 = date
        value = value.ravel()


        x1 = pd.DataFrame({xy.columns[89]: value},index=date)

        #value = std_based_outlier(value)
        #xy = MinMaxScaler(xy)
        #xy = pd.DataFrame(list(xy))

        #지수 이동평균 EMA (Exponetial Moving Average)
        origin_x =x1
        x1 = x1.ewm(170).mean()

        n_lag = 299 #모델링에 사용 된 지연 수
        n_seq = 300  #지속할 예측 단계의 수
        n_test = 1
        n_epochs = 10
        n_batch = 100
        n_neurons = 100

        # prepare data
        # 데이터를 변경하고 크기를 재조정한 다음, 지속성(persistence) 예제를 사용하여 감독된 학습 문제로 변환을 수행하고 테스트 세트를 훈련
        scaler, train, test = prepare_data(x1, n_test, n_lag, n_seq)

        # fit model
        model = fit_lstm(train, n_lag, n_seq, n_batch, n_epochs, n_neurons)

        #다른 배치사이즈 적용
        n_batch = 1
        X1, y1 = train[:, 0:n_lag], train[:, n_lag:]
        X1 = X1.reshape(X1.shape[0], 1, X1.shape[1])

        new_model =  Sequential()
        new_model.add(LSTM(n_neurons, batch_input_shape=(n_batch, X1.shape[1], X1.shape[2])
                          ,return_sequences=True, stateful=True, dropout=0.2))
        new_model.add(LSTM(90, return_sequences=True,stateful=True, dropout=0.2))
        new_model.add(Bidirectional(LSTM(60, return_sequences = False)))
        new_model.add(Dense(y1.shape[1]))

        old_weights = model.get_weights()
        new_model.set_weights(old_weights)

        new_model.compile(loss='mse', optimizer='RMSprop')


        from keras.models import load_model
        new_model.save('model1.h5')
        # make forecasts
        forecasts = make_forecasts(new_model, n_batch, train, test, n_lag, n_seq)

        forecasts = inverse_transform(x1, forecasts, scaler, n_test+2)
        actual = [row[n_lag:] for row in test]
        actual = inverse_transform(x1, actual, scaler, n_test+2)

        # evaluate forecasts
        evaluate_forecasts(actual, forecasts, n_lag, n_seq)

        # make forecasts
        forecasts = make_forecasts(new_model, n_batch, train, test, n_lag, n_seq)

        # inverse transform forecasts and test
        forecasts = inverse_transform(x1, forecasts, scaler, n_test+2)
        actual = [row[n_lag:] for row in test]
        actual = inverse_transform(x1, actual, scaler, n_test+2)

        # evaluate forecasts
        evaluate_forecasts(actual, forecasts, n_lag, n_seq)

        #plot_forecasts(x1, forecasts, n_test+2, 510.1)
        a1 = []

        for i in range(len(forecasts[0])):
            a1.append(forecasts[0][i][0])   

        x1 = x1.to_numpy()
        x1 = x1.flatten().tolist()
    
        origin_x = origin_x.to_numpy()
        origin_x = origin_x.flatten().tolist()

        date1 = date1.tolist()
        date1 = date1[5000:]
        x1 = x1[5000:]

        return date1, x1, origin_x, a1