In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import r2_score
from sklearn.preprocessing import MinMaxScaler
from scikeras.wrappers import KerasRegressor
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.callbacks import EarlyStopping
from keras.losses import mean_squared_error
from sklearn.model_selection import train_test_split
from keras import regularizers
from tools import calculate_nse, calculate_mse, calculate_rmse, calculate_mae, calculate_mape, calculate_si

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

2023-11-27 06:59:14.951156: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-11-27 06:59:14.994932: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-11-27 06:59:14.994967: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-11-27 06:59:14.994997: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-11-27 06:59:15.003305: I tensorflow/core/platform/cpu_feature_g

In [2]:
# 读取数据集
datalabel210103 = pd.read_excel('fakoulabeldata/202101-03.xls')
datalabel210406 = pd.read_excel('fakoulabeldata/202104-06.xls')
datalabel210709 = pd.read_excel('fakoulabeldata/202107-09.xls')
datalabel220103 = pd.read_excel('fakoulabeldata/202201-03.xls')
datalabel220406 = pd.read_excel('fakoulabeldata/202204-06.xls')
datalabel220709 = pd.read_excel('fakoulabeldata/202207-09.xls')
datalabel221012 = pd.read_excel('fakoulabeldata/202210-12.xls')
datalabel230103 = pd.read_excel('fakoulabeldata/202301-03.xls')
datalabel230406 = pd.read_excel('fakoulabeldata/202304-06.xls')
datalabel230709 = pd.read_excel('fakoulabeldata/202307-09.xls')

frame = (datalabel210103.iloc[:,:65333],datalabel210406.iloc[:,:65529],datalabel210709.iloc[:,:65530],
       datalabel220103.iloc[:,:65531],datalabel220406.iloc[:,:65525],datalabel220709.iloc[:,:65517],
       datalabel221012.iloc[:,:65522],datalabel230103.iloc[:,:65503],datalabel230406.iloc[:,:65495],
       datalabel230709.iloc[:,:65534])

all_data = pd.concat(frame, ignore_index=True)
selected_data = all_data.iloc[:,1]
selected_data = np.r_[selected_data].reshape(-1, 1)

# 归一化数据
scaler = MinMaxScaler(feature_range=(0, 1))
selected_data = scaler.fit_transform(selected_data)

lookback = 8

# 将数据转换为适用于LSTM的输入格式
def create_dataset(dataset):
    X, y = [], []
    for i in range(len(dataset) - lookback):
        X.append(dataset[i:i+lookback])
        y.append(dataset[i+lookback])
    return np.array(X), np.array(y).reshape(-1, 1)

# 将训练集转换为LSTM的输入格式
X, y = create_dataset(selected_data)
X = np.reshape(X, (X.shape[0], -1))

train_size = int(len(X)*0.8)
X_train, y_train = X[:train_size], y[:train_size]
X_test, y_test = X[train_size:], y[train_size:]

np.random.seed(2023)
shuffle_indices = np.random.permutation(len(X_train))
X_train = X_train[shuffle_indices]
y_train = y_train[shuffle_indices]

In [3]:
# 定义LSTM模型

def create_lstm_model(lookback):
    model = Sequential()
    model.add(LSTM(units=32, input_shape=(lookback, 1)))
    model.add(Dropout(0.2)) 
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model
lstm_model = create_lstm_model(lookback=lookback)
lstm_model.fit(X_train, y_train, epochs=20, batch_size=48, verbose=0)


2023-11-27 06:59:26.995727: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1886] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 18923 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:20:00.0, compute capability: 8.6
2023-11-27 06:59:30.673330: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:442] Loaded cuDNN version 8905
2023-11-27 06:59:32.082232: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7f00d427e980 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-11-27 06:59:32.082301: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 3090, Compute Capability 8.6
2023-11-27 06:59:32.098794: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-11-27 06:59:32.267809: I ./tensorflow/compiler/jit/device_compiler.h:186] Compile

<keras.src.callbacks.History at 0x7f03ccdb0290>

In [4]:
# 进行预测
predictions = lstm_model.predict(X_test)
# 反归一化预测结果
predictions = scaler.inverse_transform(predictions.reshape(-1, 1))
y_test = scaler.inverse_transform(y_test.reshape(-1, 1))

# 计算评价指标
nse = calculate_nse(y_test[:,0], predictions[:,0])
mse = calculate_mse(y_test[:,0], predictions[:,0])
mae = calculate_mae(y_test[:,0], predictions[:,0])
rmse = calculate_rmse(y_test, predictions)
si = calculate_si(y_test[:,0], predictions[:,0])
mape = calculate_mape(y_test[:,0], predictions[:,0])

# 输出结果
print("NSE:", nse)
print("MSE:", mse)
print("MAE:", mae)
print("RMSE:", rmse)
print("SI:", si)
print("MAPE:", mape)

 472/4094 [==>...........................] - ETA: 12s

NSE: 0.9997625959632451
MSE: 2424.821914314401
MAE: 32.93622089724322
RMSE: 49.2424807896028
SI: 0.011718867955212566
MAPE: 1.350800846030144


In [5]:
test_datalabel = pd.read_csv('selected_data.csv').iloc[:,0]
test_datalabel = np.r_[test_datalabel].reshape(-1, 1)
test_scaler = MinMaxScaler(feature_range=(0, 1))
test_data = test_scaler.fit_transform(test_datalabel)
test_dataX, test_datay = create_dataset(test_data)
test_dataX = np.reshape(test_dataX, (test_dataX.shape[0], -1))
test_predictions = lstm_model.predict(test_dataX)

# 反归一化预测结果
test_predictions = test_scaler.inverse_transform(test_predictions.reshape(-1, 1))
test_datay = test_scaler.inverse_transform(test_datay.reshape(-1, 1))

# 计算评价指标
nse = calculate_nse(test_datay[:,0], test_predictions[:,0])
mae = calculate_mae(test_datay[:,0], test_predictions[:,0])
rmse = calculate_rmse(test_datay[:,0], test_predictions[:,0])
mse = calculate_mse(test_datay[:,0], test_predictions[:,0])
si = calculate_si(test_datay[:,0], test_predictions[:,0])
mape = calculate_mape(test_datay[:,0], test_predictions[:,0])

# 输出结果
print("NSE:", nse)
print("MAE:", mae)
print("RMSE:", rmse)
print("MSE:", mse)
print("SI:", si)
print("MAPE:", mape)

NSE: 0.9997207325010112
MAE: 32.93975505717233
RMSE: 52.75423056933316
MSE: 2783.008842962365
SI: 0.01183849771990747
MAPE: 1.4203313907396728
