In [10]:
import keras
from keras import layers
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import copy

from math import sqrt
from numpy import concatenate
from matplotlib import pyplot
from pandas import read_csv
from pandas import DataFrame
from pandas import concat
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn import svm, metrics

In [11]:
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
	n_vars = 1 if type(data) is list else data.shape[1]
	df = pd.DataFrame(data)
	cols, names = list(), list()
	# input sequence (t-n, ... t-1)
	for i in range(n_in, 0, -1):
		cols.append(df.shift(i))
		names += [('var%d(t-%d)' % (j + 1, i)) for j in range(n_vars)]
	# forecast sequence (t, t+1, ... t+n)
	for i in range(0, n_out):
		cols.append(df.shift(-i))
		if i == 0:
			names += [('var%d(t)' % (j + 1)) for j in range(n_vars)]
		else:
			names += [('var%d(t+%d)' % (j + 1, i)) for j in range(n_vars)]
	# put it all together
	agg = pd.concat(cols, axis=1)
	agg.columns = names
	# drop rows with NaN values
	if dropnan:
		agg.dropna(inplace=True)
	return agg

In [12]:
def data_preprocessing(step_size):
    df_final = pd.read_csv('data_2019v1.csv', header=0, index_col=0)	
    values = df_final.values #67*6

    # ensure all data is float
    values = values.astype('float32')

    # normalize features
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled = scaler.fit_transform(values)
    #print(scaled[:10])   
    # frame as supervised learning
    reframed = series_to_supervised(scaled, step_size, 1)  # 5天预测一天
    print(reframed.shape)  
    #print(reframed.head())
    #split into train and test sets
    values = reframed.values
        
    n_train_days = 304 * 24
    train = values[:n_train_days, :]
    test = values[n_train_days:, :]
        
    return train, test, scaler

In [13]:
step_size = 5
feature_num = 11

In [14]:
generator_input = keras.Input(shape=(step_size,feature_num))
x = layers.LSTM(75,return_sequences=True)(generator_input)
#x = layers.Dropout(0.2)(x)
x = layers.LSTM(25)(x)
x = layers.Dense(1)(x)
x = layers.LeakyReLU()(x)
generator = keras.models.Model(generator_input, x)
generator.summary()

Model: "functional_7"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         [(None, 5, 11)]           0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 5, 75)             26100     
_________________________________________________________________
lstm_3 (LSTM)                (None, 25)                10100     
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 26        
_________________________________________________________________
leaky_re_lu_4 (LeakyReLU)    (None, 1)                 0         
Total params: 36,226
Trainable params: 36,226
Non-trainable params: 0
_________________________________________________________________


In [15]:
discriminator_input = layers.Input(shape=(step_size+1,1))
y = layers.Dense(72)(discriminator_input)
y = layers.LeakyReLU(alpha=0.05)(y)
y = layers.Dense(100)(y)
y = layers.LeakyReLU(alpha=0.05)(y)
y = layers.Dense(10)(y)
y = layers.LeakyReLU(alpha=0.05)(y)
y = layers.Dense(1,activation='sigmoid')(y)
discriminator = keras.models.Model(discriminator_input, y)
discriminator.summary()

Model: "functional_9"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         [(None, 6, 1)]            0         
_________________________________________________________________
dense_6 (Dense)              (None, 6, 72)             144       
_________________________________________________________________
leaky_re_lu_5 (LeakyReLU)    (None, 6, 72)             0         
_________________________________________________________________
dense_7 (Dense)              (None, 6, 100)            7300      
_________________________________________________________________
leaky_re_lu_6 (LeakyReLU)    (None, 6, 100)            0         
_________________________________________________________________
dense_8 (Dense)              (None, 6, 10)             1010      
_________________________________________________________________
leaky_re_lu_7 (LeakyReLU)    (None, 6, 10)            

In [16]:
discriminator_optimizer = keras.optimizers.RMSprop(lr=8e-4, clipvalue=1.0, decay=1e-8)
discriminator.compile(optimizer=discriminator_optimizer, loss='binary_crossentropy')

In [17]:
discriminator.trainable = False
gan_input = keras.Input(shape=(step_size, feature_num))
gan_output = discriminator(generator(gan_input))
gan = keras.models.Model(gan_input, gan_output)
gan_optimizer = keras.optimizers.RMSprop(lr=4e-4, clipvalue=1.0, decay=1e-8)
gan.compile(optimizer=gan_optimizer, loss='binary_crossentropy')



In [18]:
train,test,scaler = data_preprocessing(step_size)

(7671, 66)


In [19]:
n_obs = step_size * feature_num
train_X, train_Y = train[:, :n_obs], train[:, -1] #choose the first feature,namely 'open price'
#print(train_X[:3])
#print(train_Y[:3])

In [20]:
test_X, test_Y = test[:, :n_obs], test[:, -1]  # !!!!!!!!!!!!!!!
#print(train_X.shape, len(train_X), train_Y.shape) #train_X 应为 280*30,train_Y 应为 280*6
#print(test_X.shape, len(test_X), test_Y.shape)

In [21]:
# reshape input to be 3D [samples, timesteps, features]
trainX = train_X.reshape((train_X.shape[0], step_size, feature_num))
testX = test_X.reshape((test_X.shape[0], step_size, feature_num))
#print(trainX.shape, len(trainX), train_Y.shape) 
#print(testX.shape, len(testX), test_Y.shape) 

In [22]:
iterations = 7296
batch_size = 1
start = 0
final = []

In [None]:
for step in range(iterations):
	#print(step)
	temp_X = copy.deepcopy(trainX[step])
	#print("temp_X", temp_X.shape)
	temp_X = temp_X.reshape(batch_size,step_size,feature_num)
	temp_Y = copy.deepcopy(train_Y[step])
	temp_Y = temp_Y.reshape(batch_size,1)
	predictions = generator.predict(temp_X)
	#print("predition", predictions)
	# 训练鉴别器（discrimitor）
	for i in range(25):
		aaa = trainX[step]
		input_f = np.concatenate([np.transpose(np.array([aaa[:,0]])), predictions], 0)
		input_r = np.concatenate([np.transpose(np.array([aaa[:,0]])), temp_Y], 0)
		input = np.concatenate([[input_f],[input_r]])
		#print("input", input)
		labels = np.concatenate([[np.ones((6, 1))], [np.zeros((6, 1))]])
		d_loss = discriminator.train_on_batch(input, labels)
	# 训练生成器（generator）（通过gan模型，鉴别器（discrimitor）权值被冻结）
	for i in range(5):
		misleading_targets = np.zeros((batch_size, 1))
		a_loss = gan.train_on_batch(temp_X, [misleading_targets])
	final.append(predictions[0])



In [None]:
final = np.concatenate((np.array(final), train_X[:, -10:]), axis=1)
final2 = np.concatenate((np.transpose(np.array([train_Y])), train_X[:, -10:]), axis=1)
int1 = scaler.inverse_transform(final)
int2 = scaler.inverse_transform(final2)

In [None]:
# plot history
pyplot.plot(int1[:,0], label='train_generator')
pyplot.plot(int2[:,0], label='train_discriminator')
pyplot.legend()
pyplot.show()

In [None]:
# calculate RMSE
rmse = sqrt(mean_squared_error(int1, int2))
print('Training RMSE: %.3f' % rmse)

MAPE_O1 = np.mean(np.abs((int2[:,0] - int1[:,0]) / int2[:,0]))
print('Mtraining result:',MAPE_O1)

In [None]:
final = []
for step in range(375):
	temp_X = copy.deepcopy(testX[step])
	temp_X = temp_X.reshape(batch_size, step_size, feature_num)
	predictions = generator.predict(temp_X)
	final.append(predictions[0])

final = np.concatenate((np.array(final), test_X[:, -10:]), axis=1)
final2 = np.concatenate((np.transpose(np.array([test_Y])), test_X[:, -10:]), axis=1)
int1 = scaler.inverse_transform(final)
int2 = scaler.inverse_transform(final2)

# plot history
pyplot.plot(int1[:,0], label='test_generator')
pyplot.plot(int2[:,0], label='test_discriminator')
pyplot.legend()
pyplot.show()

In [None]:
# calculate RMSE
rmse = sqrt(mean_squared_error(int1, int2))
print('Test RMSE: %.3f' % rmse)

MAPE_O2 = np.mean(np.abs((int2[:,0] - int1[:,0]) / int2[:,0]))
print('Mtesting result:',MAPE_O2)