In [10]:
# 导入必要的库
import numpy as np
import keras
from keras import Sequential
from keras.layers import LSTM
from keras.layers import Dense, Dropout
import pandas as pd
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import GridSearchCV
from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tool import evalIndicator

ModuleNotFoundError: No module named 'keras.wrappers'

In [7]:
# 读取数据，划分数据集
df = pd.read_csv("/Users/datou/PycharmProjects/model/HengyangSta/HY_data/衡阳站降雨径流.csv", parse_dates=["Date"])
df_copy=df
df.reset_index(drop=True)
df=df.set_index('Date')
df

FileNotFoundError: [Errno 2] No such file or directory: '/Users/datou/PycharmProjects/model/HengyangSta/HY_data/衡阳站降雨径流.csv'

In [None]:
n_past=15

In [None]:
# 划分数据
test_split = round(len(df) * 0.30)
df_for_training = df[:-test_split]
df_for_testing = df[-(test_split+n_past):]
df_for_training.shape

In [None]:
# 数据归一化
scaler = MinMaxScaler(feature_range=(0, 1))
df_for_training_scaled = scaler.fit_transform(df_for_training)
df_for_testing_scaled = scaler.transform(df_for_testing)

In [None]:
# 创建数据集，n_past：时间步长
def createXY(dataset, n_past):
    dataX = []
    dataY = []
    for i in range(n_past, len(dataset)):
        dataX.append(dataset[i - n_past:i, 0:dataset.shape[1]])
        dataY.append(dataset[i, 0])
    return np.array(dataX), np.array(dataY)


In [None]:
"""
网络中的第一层必须定义预期输入数。输入必须是三维的，由Samples、Timesteps和Features组成。
Samples：数据中的行
Timesteps：特征的过去观测值
features：数据中的列
"""
# 这里使用30，意味着将使用过去的30个值(包括目标列在内的所有特性)来预测第31个目标值。
trainX, trainY = createXY(df_for_training_scaled, n_past)
trainX.shape, trainY.shape

In [None]:
testX, testY = createXY(df_for_testing_scaled, n_past)
testX.shape, testY.shape

In [None]:
# 建立模型
def build_model():
    """
    1、定义网络
    """
    grid_model = Sequential()  # 层次模型
    """
    指定input_shape，该参数需要包含时间步长数和特征数的元组。
    input_shape=(时间步长数，特征数元组)：
    此处采用时间步长数30，特征数元组长度为2
    LSTM 图层可以通过将它们添加到顺序模型来堆叠。
    重要的是，在堆叠 LSTM 图层时，我们必须为每个输入输出一个序列而不是单个值，以便后续 LSTM 图层可以具有所需的 3D 输入。
    通过将"return_sequences true"实现
    """
    grid_model.add(LSTM(50, return_sequences=True, input_shape=(n_past, 2)))
    """
    第一步是创建顺序类的实例。然后，创建图层，并按应连接它们的顺序添加它们。
    由内存单元组成的LSTM循环层称为LSTM（）。
    通常跟随 LSTM 图层并用于输出预测的完全连接层称为 Dense（）。
    """
    grid_model.add(LSTM(100))
    grid_model.add(Dropout(0.01))
    grid_model.add(Dense(1))
    learning_rate = 1e-3
    """
    编译网络：编译需要指定许多参数，这些参数是专为培训网络而定制的。具体来说，用于训练网络和用于评估优化算法最小化的网络的优化算法。
    model.compile(optimizer,loss)
    """
    optimizer = keras.optimizers.Adam(learning_rate=learning_rate)
    grid_model.compile(loss='mse', optimizer=optimizer)
    return grid_model


In [None]:
# 交叉搜索，寻求模型最佳参数，并输出最佳模型
parameters = {'batch_size': [16, 20],
              'epochs': [16, 20]}
"""
verbose参数设置为 2，可以将显示的信息量减小到每轮训练的损失。
可以通过将verbose设置为 1 来关闭所有输出
"""
# grid_model = KerasRegressor(build_fn=build_model, verbose=1, validation_data=(testX, testY))
grid_model = KerasRegressor(build_fn=build_model, verbose=1)
grid_search = GridSearchCV(estimator=grid_model, param_grid=parameters,
                           cv=5)

In [None]:
"""
训练网络
"""
grid_search = grid_search.fit(trainX, trainY)
# 将最佳模型保存在my_model变量中
my_model = grid_search.best_estimator_

In [None]:
"""
进行预测：测试集
"""
prediction = my_model.predict(testX)

In [None]:
grid_search.best_params_

In [None]:
"""
进行预测：训练集
"""
train_predict=my_model.predict(trainX)

In [None]:
# 逆缩放过程
# 改变形状来进行逆缩放，逆变换后的第一列是我们需要的，所以我们在最后使用了 → [:,0]。
prediction_copies_array = np.repeat(prediction, 2, axis=-1)
pred = scaler.inverse_transform(np.reshape(prediction_copies_array, (len(prediction), 2)))[:, 0]
pred.shape

In [None]:
# 同样步骤进行逆变换
original_copies_array = np.repeat(testY, 2, axis=-1)
original = scaler.inverse_transform(np.reshape(original_copies_array, (len(testY), 2)))[:, 0]
original.shape

In [None]:
# 对测试集输出数据进行逆变换
prediction_copies_array = np.repeat(train_predict, 2, axis=-1)
train_pred = scaler.inverse_transform(np.reshape(prediction_copies_array, (len(train_predict), 2)))[:, 0]
train_pred.shape

In [None]:
# 对测试数据Y进行逆变换
train_original_copies_array = np.repeat(trainY, 2, axis=-1)
train_original = scaler.inverse_transform(np.reshape(train_original_copies_array, (len(trainY), 2)))[:, 0]
train_original.shape

In [None]:
pred=pd.DataFrame(pred)
pred.columns=['Qsim']
pred

In [None]:
train_pred=pd.DataFrame(train_pred)
train_pred.columns=['Qsim']
train_pred

In [None]:
original=pd.DataFrame(original)
original.columns=['Qobs']
original

In [None]:
train_original=pd.DataFrame(train_original)
train_original.columns=['Qobs']
train_original

In [None]:
# df_copy=df_copy[-(test_split-30):]
df_copy

In [None]:
test_df=df_copy[-test_split:]
test_df

In [None]:
train_df=df_copy[n_past:-test_split]
train_df

In [None]:
test_df=test_df.reset_index(drop=True)
test_df=test_df['Date']
test_df

In [None]:
train_df=train_df.reset_index(drop=True)
train_df=train_df['Date']
train_df

In [None]:
test_df_full=pd.concat([test_df,original,pred],axis=1)
test_df_full=test_df_full.reset_index(drop=True)
test_df_full=test_df_full.set_index('Date')
test_df_full.to_csv('/Users/datou/PycharmProjects/model/HengyangSta/HY_data/HY_TestLSTM.csv')
test_df_full

In [None]:
train_df_full=pd.concat([train_df,train_original,train_pred],axis=1)
train_df_full=train_df_full.reset_index(drop=True)
train_df_full=train_df_full.set_index('Date')
train_df_full.to_csv('/Users/datou/PycharmProjects/model/HengyangSta/HY_data/HY_TrainLSTM.csv')
train_df_full

In [None]:
# 计算nash效率系数
evalIndicator.eval(test_df_full["Qsim"], test_df_full['Qobs'])
# 比较预测值和原始值，进行绘图展示
plt.figure(figsize=(12,5))
plt.plot(test_df_full['Qobs'], color='red', label='Qobs')
plt.plot(test_df_full['Qsim'], color='blue', label='Qsim',linestyle="--")
plt.title(' LSTM--')
plt.xlabel('Time')
plt.ylabel('Runoff(m3/s)')
plt.savefig("/Users/datou/PycharmProjects/model/HengyangSta/HY_picture/HY_TestLSTM.png")
plt.legend()
plt.show()

In [None]:
# 计算nash效率系数
evalIndicator.eval(train_df_full['Qsim'], train_df_full['Qobs'])
# 比较预测值和原始值，进行绘图展示
plt.figure(figsize=(12,5))
plt.plot(train_df_full['Qobs'], color='red', label='Qobs')
plt.plot(train_df_full['Qsim'], color='blue', label='Qsim',linestyle="--")
plt.title(' LSTM--' + 'R2: ' )
plt.xlabel('Time')
plt.ylabel('Runoff(m3/s)')
plt.savefig("/Users/datou/PycharmProjects/model/HengyangSta/HY_picture/HY_TrainLSTM.png")
plt.legend()
plt.show()

In [None]:
# # 预测一些未来值
# # 从主 df 数据集中获取我们在开始时加载的最后 30 个值
# df_past = df.iloc[-30:, :]
# # df_30_days_past.tail()
# df_future = pd.read_csv("test.csv", parse_dates=["Date"], index_col=[0])
# Qobs_future = df_future["Q"]
# # 剔除目标列
# df_future["Q"] = 0
# df_future = df_future[["Q", "P"]]
# df_past = df_past.values
# df_future

In [None]:
# # 对数据进行缩放
# old_scaled_array = scaler.transform(df_past)
# new_scaled_array = scaler.transform(df_future)
# new_scaled_df = pd.DataFrame(new_scaled_array)
# new_scaled_df.iloc[:, 0] = np.nan
# full_df = pd.concat([pd.DataFrame(old_scaled_array), new_scaled_df]).reset_index().drop(["index"], axis=1)
# full_df

In [None]:
# 使用特征值进行预测
# full_df_scaled_array = full_df.values
# all_data = []
# time_step = 30
# for i in range(time_step, len(full_df_scaled_array)):
#     data_x = []
#     data_x.append(
#         full_df_scaled_array[i - time_step:i, 0:full_df_scaled_array.shape[1]])
#     data_x = np.array(data_x)
#     prediction = my_model.predict(data_x)
#     all_data.append(prediction)
#     full_df.iloc[i, 0] = prediction

In [None]:
# 对预测进行逆变换
# new_array = np.array(all_data)
# new_array = new_array.reshape(-1, 1)
# prediction_copies_array = np.repeat(new_array, 2, axis=-1)
# y_pred_future = scaler.inverse_transform(np.reshape(prediction_copies_array, (len(new_array), 2)))[:, 0]
# y_pred_future = pd.DataFrame(y_pred_future)
# y_pred_future

In [None]:
# Qobs_future=pd.DataFrame(Qobs_future)
# Qobs_future.reset_index(inplace=True)
# Qobs_future

In [None]:
# df_future_full=pd.concat([Qobs_future,y_pred_future],axis=1)
# df_future_full.columns=['Date',"Qobs","Qsim"]
# df_future_full.reset_index(drop=True)
# df_future_full=df_future_full.set_index('Date')


In [None]:
# 绘图进行数据展示
# nash_futrue = evalIndicator.eval(df_future_full['Qsim'], df_future_full['Qobs'])
# plt.plot(df_future_full['Qsim'], color='green', label='Qsim')
# plt.plot(df_future_full['Qobs'], color='yellow', label='Qobs')
# plt.title('Verify Prediction-' + 'NSE: ' + format(nash_futrue, '.3f'))
# plt.xlabel('Time')
# plt.ylabel('Runoff(m3/s)')
# plt.legend()
# plt.show()