[toc]

# Tensorflow2 利用 lstm 实现股票预测

## 导入数据

In [1]:
import tensorflow as tf
import numpy as np
import tushare as ts
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

seq_length = 20
input_dim = 5
output_dim = 1

stock_data = ts.get_k_data('600000', start='2016-01-01', end='2018-11-20')
features = ['open', 'high', 'low', 'volume', 'close']
target = ['close']
data = stock_data[features].values # shape = (685, 5)
target = stock_data[target].values # shape = (685, 1)

本接口即将停止更新，请尽快使用Pro版接口：https://tushare.pro/document/2


## 划分验证集

这里不用 sklearn.model_selection.train_test_split，因为这会导致数据穿越

In [2]:
val_rate = 0.1
sample_size = data.shape[0]
train_size = int(sample_size * (1 - val_rate))
x_train, x_val, y_train, y_val = data[:train_size], data[train_size:], target[:train_size], target[:train_size]

print(f"train size: {train_size}, val sizse: {sample_size - train_size}")

train size: 616, val sizse: 69


## 对数据进行标准化

In [3]:
x_scaler = MinMaxScaler()
y_scaler = MinMaxScaler()

x_train_scaled = x_scaler.fit_transform(x_train)
y_train_scaled = y_scaler.fit_transform(y_train)
x_val_scaled = x_scaler.transform(x_val)
y_val_scaled = y_scaler.transform(y_val)

## 取得序列数据

In [4]:
def get_sequence_data(x, y):
    n = x.shape[0]
    sample_size = n - (seq_length + 1)
    x_sequence_sample = np.array([x[i:i+seq_length] for i in range(sample_size)])
    y_sequence_sample = np.array([y[i+seq_length] for i in range(sample_size)])
    return x_sequence_sample, y_sequence_sample

In [5]:
x_train_sequence, y_train_sequence = get_sequence_data(x_train_scaled, y_train_scaled)
x_val_sequence, y_val_sequence = get_sequence_data(x_val_scaled, y_val_scaled)

print(f"x_train_sequence shape: {x_train_sequence.shape}, y_train_sequence shape: {y_train_sequence.shape}")
print(f"x_val_sequence shape: {x_val_sequence.shape}, y_val_sequence shape: {y_val_sequence.shape}")

x_train_sequence shape: (595, 20, 5), y_train_sequence shape: (595, 1)
x_val_sequence shape: (48, 20, 5), y_val_sequence shape: (48, 1)


## 搭建网络

In [7]:
from tensorflow.keras.layers import Input, Dense, LSTM
from tensorflow.keras.models import Model

# 构建神经网络层 1层LSTM层+3层Dense层
lstm_input = Input(shape=(seq_length, input_dim), name='lstm_input') # input层不包括batch的shape
lstm_output = LSTM(128, activation='tanh', dropout=0.5)(lstm_input)  # LSTM网络
Dense_output_1 = Dense(64, activation='relu')(lstm_output)  
Dense_output_2 = Dense(16, activation='relu')(Dense_output_1)  
predictions = Dense(output_dim, activation='tanh')(Dense_output_2) 
model = Model(inputs=lstm_input, outputs=predictions)
def customLoss(y, yhat):
    return tf.reduce_mean((tf.math.abs(y - yhat)/(y+1e-7)))

earlyStopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=100, verbose=1, mode='auto')
model.compile(optimizer=tf.keras.optimizers.Adam(0.001), loss=customLoss)
model.fit(x_train_sequence, y_train_sequence, validation_data=(x_val_sequence, y_val_sequence), batch_size=32, epochs=1000, callbacks=[earlyStopping])

Train on 595 samples, validate on 48 samples
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/100

Epoch 77/1000
Epoch 78/1000
Epoch 79/1000
Epoch 80/1000
Epoch 81/1000
Epoch 82/1000
Epoch 83/1000
Epoch 84/1000
Epoch 85/1000
Epoch 86/1000
Epoch 87/1000
Epoch 88/1000
Epoch 89/1000
Epoch 90/1000
Epoch 91/1000
Epoch 92/1000
Epoch 93/1000
Epoch 94/1000
Epoch 95/1000
Epoch 96/1000
Epoch 97/1000
Epoch 98/1000
Epoch 99/1000
Epoch 100/1000
Epoch 101/1000
Epoch 102/1000
Epoch 103/1000
Epoch 104/1000
Epoch 105/1000
Epoch 106/1000
Epoch 107/1000
Epoch 108/1000
Epoch 109/1000
Epoch 110/1000
Epoch 111/1000
Epoch 112/1000
Epoch 113/1000
Epoch 114/1000
Epoch 115/1000
Epoch 116/1000
Epoch 117/1000
Epoch 118/1000
Epoch 119/1000
Epoch 120/1000
Epoch 121/1000
Epoch 122/1000
Epoch 123/1000
Epoch 124/1000
Epoch 125/1000
Epoch 126/1000
Epoch 127/1000
Epoch 128/1000
Epoch 129/1000
Epoch 130/1000
Epoch 131/1000
Epoch 132/1000
Epoch 133/1000
Epoch 134/1000
Epoch 135/1000
Epoch 136/1000
Epoch 137/1000
Epoch 138/1000
Epoch 139/1000
Epoch 140/1000
Epoch 141/1000
Epoch 142/1000
Epoch 143/1000
Epoch 144/1000
Epo

Epoch 152/1000
Epoch 153/1000
Epoch 154/1000
Epoch 155/1000
Epoch 156/1000
Epoch 157/1000
Epoch 158/1000
Epoch 159/1000
Epoch 160/1000
Epoch 161/1000
Epoch 162/1000
Epoch 163/1000
Epoch 164/1000
Epoch 165/1000
Epoch 166/1000
Epoch 167/1000
Epoch 168/1000
Epoch 169/1000
Epoch 170/1000
Epoch 171/1000
Epoch 172/1000
Epoch 173/1000
Epoch 174/1000
Epoch 175/1000
Epoch 176/1000
Epoch 177/1000
Epoch 178/1000
Epoch 179/1000
Epoch 180/1000
Epoch 181/1000
Epoch 182/1000
Epoch 183/1000
Epoch 184/1000
Epoch 185/1000
Epoch 186/1000
Epoch 187/1000
Epoch 188/1000
Epoch 189/1000
Epoch 190/1000
Epoch 191/1000
Epoch 192/1000
Epoch 193/1000
Epoch 194/1000
Epoch 195/1000
Epoch 196/1000
Epoch 197/1000
Epoch 198/1000
Epoch 199/1000
Epoch 200/1000
Epoch 201/1000
Epoch 202/1000
Epoch 203/1000
Epoch 204/1000
Epoch 205/1000
Epoch 206/1000
Epoch 207/1000
Epoch 208/1000
Epoch 209/1000
Epoch 210/1000
Epoch 211/1000
Epoch 212/1000
Epoch 213/1000
Epoch 214/1000
Epoch 215/1000
Epoch 216/1000
Epoch 217/1000
Epoch 218/

Epoch 227/1000
Epoch 228/1000
Epoch 229/1000
Epoch 230/1000
Epoch 231/1000
Epoch 232/1000
Epoch 233/1000
Epoch 234/1000
Epoch 235/1000
Epoch 236/1000
Epoch 237/1000
Epoch 238/1000
Epoch 239/1000
Epoch 240/1000
Epoch 241/1000
Epoch 242/1000
Epoch 243/1000
Epoch 244/1000
Epoch 245/1000
Epoch 246/1000
Epoch 247/1000
Epoch 248/1000
Epoch 249/1000
Epoch 250/1000
Epoch 251/1000
Epoch 252/1000
Epoch 253/1000
Epoch 254/1000
Epoch 255/1000
Epoch 00255: early stopping


<tensorflow.python.keras.callbacks.History at 0x133c63c88>

## 查看在 training data 上的效果

In [None]:
y_pred = y_scaler.inverse_transform(model.predict(x_train_sequence))
y_train_truth = y_scaler.inverse_transform(y_train_sequence)
plt.plot(y_pred, label="prediction")
plt.plot(y_train_truth, label="ground truth")
plt.legend()

## 查看在 validation data 上的效果

In [None]:
y_pred = y_scaler.inverse_transform(model.predict(x_val_sequence))
y_val_truth = y_scaler.inverse_transform(y_val_sequence)
plt.plot(y_pred, label="prediction")
plt.plot(y_val_truth, label="ground truth")
plt.legend()

In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import tushare as ts
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

#参数设置/parameter setting
timesteps = seq_length = 20 #时间窗/window length
data_dim = 5 #输入数据维度/dimension of input data
output_dim = 1 #输出数据维度/dimension of output data

#数据准备/data preparation 
#变量选取Open,High,Low,Close,Volume，以浦发银行股票为例
stock_data = ts.get_k_data('600000',start='2016-01-01',end='2018-11-20')
xy = stock_data[['open','close','high','low','volume']]
xy = np.array(xy.values)

#切分训练集合测试集/split to train and testing
train_size = int(len(xy) * 0.9)
test_size = len(xy) - train_size
xy_train, xy_test = np.array(xy[0:train_size]),np.array(xy[train_size:len(xy)])

#对training set进行预处理
scaler = MinMaxScaler()
xy_train_new = scaler.fit_transform(xy_train)
x_new = xy_train_new[:,0:5]
y_new = xy_train_new[:,1]

x = x_new
y = y_new
dataX = []
dataY = []
for i in range(0, len(y) - seq_length):
    _x = x[i:i + seq_length]
    _y = y[i + seq_length]  # Next close price
    print(_x, "->", _y)
    dataX.append(_x)
    dataY.append(_y)

#处理数据shape,准备进入神经网络层
x_real = np.vstack(dataX).reshape(-1,seq_length,data_dim)
y_real= np.vstack(dataY).reshape(-1,output_dim)
print(x_real.shape)
print(y_real.shape)
dataX = x_real
dataY = y_real

trainX, trainY = dataX, dataY

#对test set进行预处理，这里用了training的scaler
xy_test_new = scaler.transform(xy_test)
x_new = xy_test_new[:,0:5]
y_new = xy_test_new[:,1]

x = x_new
y = y_new
dataX = []
dataY = []
for i in range(0, len(y) - seq_length):
    _x = x[i:i + seq_length]
    _y = y[i + seq_length]  # Next price change
    print(_x, "->", _y)
    dataX.append(_x)
    dataY.append(_y)

#处理数据shape,准备进入神经网络层
x_real = np.vstack(dataX).reshape(-1,seq_length,data_dim)
y_real= np.vstack(dataY).reshape(-1,output_dim)
print(x_real.shape)
print(y_real.shape)
dataX = x_real
dataY = y_real

testX, testY = dataX, dataY

from tensorflow.keras.layers import Input, Dense, LSTM
from tensorflow.keras.models import Model

# 构建神经网络层 1层LSTM层+3层Dense层
lstm_input = Input(shape=(seq_length, data_dim), name='lstm_input')#shape: 形状元组（整型）不包括batch size。表示了预期的输入将是一批（seq_len,data_dim）的向量。
lstm_output = LSTM(128, activation='tanh', dropout=0.5)(lstm_input)#LSTM网络
#units: Positive integer,dimensionality of the output space.
#dropout: Float between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs.
Dense_output_1 = Dense(64, activation='relu')(lstm_output)#全连接网络
Dense_output_2 = Dense(16, activation='relu')(Dense_output_1)#全连接网络
predictions = Dense(output_dim, activation='tanh')(Dense_output_2)#全连接网络

model = Model(inputs=lstm_input, outputs=predictions)
#This model will include all layers required in the computation of output given input.
model.compile(optimizer='adam', loss='mse', metrics=['mse'])
#Configures the model for training.
#optimizer: String (name of optimizer) or optimizer instance. See optimizers.
#loss: String (name of objective function) or objective function.The loss value will be minimized by the model.
#metrics: List of metrics to be evaluated by the model during training and testing. Typically you will use  metrics=['accuracy'].
model.fit(trainX, trainY, batch_size=len(trainX), epochs=100, verbose=2)
#Trains the model for a given number of epochs (iterations on a dataset).
#verbose: Integer. 0, 1, or 2. Verbosity mode. 0 = silent, 1 = progress bar, 2 = one line per epoch.

# 保存模型
model.save('model.h5')


查看在 training data 上的效果

In [None]:
trainPredict = model.predict(trainX)
trainPredict1 = trainPredict * scaler.data_range_[1] + scaler.data_min_[1]
trainY1 = trainY * scaler.data_range_[1] + scaler.data_min_[1]
plt.plot(trainY1,color='blue', label='ground truth')
plt.plot(trainPredict1,color='orange', label="prediction")
plt.legend()
plt.title("train")
plt.show()
print(max(abs(trainPredict1 - trainY1)))

## 预测

In [None]:


# 载入模型
from tensorflow.keras.models import load_model
model = load_model('model.h5')

testPredict = model.predict(testX)
testPredict1 = testPredict * scaler.data_range_[1] + scaler.data_min_[1]
testY1 = testY * scaler.data_range_[1] + scaler.data_min_[1]
plt.plot(testY1,color='blue', label="ground truth")
plt.plot(testPredict1,color='orange', label="prediction")
plt.legend()
plt.title("val")
plt.show()
print(max(abs(testPredict1 - testY1)))