In [1]:
import os
import warnings
import pandas as pd
import numpy as np
from sklearn.linear_model import Ridge
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV
from tools import calculate_nse, calculate_mse, calculate_rmse, calculate_mae, calculate_mape, calculate_si

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
warnings.filterwarnings("ignore")
import tensorflow as tf
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)


2023-11-28 00:36:03.631643: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-11-28 00:36:03.678033: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-11-28 00:36:03.678066: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-11-28 00:36:03.678110: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-11-28 00:36:03.687164: I tensorflow/core/platform/cpu_feature_g

In [2]:
# 读取数据集
datalabel210103 = pd.read_excel('fakoulabeldata/202101-03.xls')
datalabel210406 = pd.read_excel('fakoulabeldata/202104-06.xls')
datalabel210709 = pd.read_excel('fakoulabeldata/202107-09.xls')
datalabel220103 = pd.read_excel('fakoulabeldata/202201-03.xls')
datalabel220406 = pd.read_excel('fakoulabeldata/202204-06.xls')
datalabel220709 = pd.read_excel('fakoulabeldata/202207-09.xls')
datalabel221012 = pd.read_excel('fakoulabeldata/202210-12.xls')
datalabel230103 = pd.read_excel('fakoulabeldata/202301-03.xls')
datalabel230406 = pd.read_excel('fakoulabeldata/202304-06.xls')
datalabel230709 = pd.read_excel('fakoulabeldata/202307-09.xls')

frame = (datalabel210103.iloc[:,:65333],datalabel210406.iloc[:,:65529],datalabel210709.iloc[:,:65530],
       datalabel220103.iloc[:,:65531],datalabel220406.iloc[:,:65525],datalabel220709.iloc[:,:65517],
       datalabel221012.iloc[:,:65522],datalabel230103.iloc[:,:65503],datalabel230406.iloc[:,:65495],
       datalabel230709.iloc[:,:65534])

all_data = pd.concat(frame, ignore_index=True)
selected_data = all_data.iloc[:,1]
selected_data = np.r_[selected_data].reshape(-1, 1)

# 归一化数据
scaler = MinMaxScaler(feature_range=(0, 1))
selected_data = scaler.fit_transform(selected_data)
lookback = 8
# 将数据转换为适用于LSTM的输入格式
def create_dataset(dataset):
    X, y = [], []
    for i in range(len(dataset) - lookback):
        X.append(dataset[i:i+lookback])
        y.append(dataset[i+lookback])
    return np.array(X), np.array(y).reshape(-1, 1)

In [3]:
X, y = create_dataset(selected_data)
X = np.reshape(X, (X.shape[0], -1))

train_size = int(len(X)*0.8)
X_train, y_train = X[:train_size], y[:train_size]
X_test, y_test = X[train_size:], y[train_size:]
np.random.seed(2023)
shuffle_indices = np.random.permutation(len(X_train))
X_train = X_train[shuffle_indices]
y_train = y_train[shuffle_indices]

In [16]:
# 创建AdaBoost回归器模型
model = AdaBoostRegressor(DecisionTreeRegressor(), n_estimators=5)

# 训练模型
model.fit(X_train, y_train)

In [17]:
# 预测
X_test, y_test = X[train_size:], y[train_size:]

predictions = model.predict(X_test)

# 反归一化预测结果
predictions = scaler.inverse_transform(predictions.reshape(-1, 1))
y_test = scaler.inverse_transform(y_test.reshape(-1, 1))
# 计算评价指标
nse = calculate_nse(y_test[:,0], predictions[:,0])
mse = calculate_mse(y_test[:,0], predictions[:,0])
mae = calculate_mae(y_test[:,0], predictions[:,0])
rmse = calculate_rmse(y_test, predictions)
si = calculate_si(y_test[:,0], predictions[:,0])
mape = calculate_mape(y_test[:,0], predictions[:,0])

# 输出结果
print("NSE:", nse)
print("MSE:", mse)
print("MAE:", mae)
print("RMSE:", rmse)
print("SI:", si)
print("MAPE:", mape)

NSE: 0.9998239835207737
MSE: 1797.8153275843808
MAE: 19.112354606396458
RMSE: 42.400652442909184
SI: 0.006800268939303403
MAPE: 0.7480988840940312


In [18]:
test_datalabel = pd.read_csv('selected_data.csv').iloc[:,0]
test_datalabel = np.r_[test_datalabel].reshape(-1, 1)
test_scaler = MinMaxScaler(feature_range=(0, 1))
test_data = test_scaler.fit_transform(test_datalabel)
test_dataX, test_datay = create_dataset(test_data)
test_dataX = np.reshape(test_dataX, (test_dataX.shape[0], -1))
test_predictions = model.predict(test_dataX)

# 反归一化预测结果
test_predictions = test_scaler.inverse_transform(test_predictions.reshape(-1, 1))
test_datay = test_scaler.inverse_transform(test_datay.reshape(-1, 1))

# 计算评价指标
nse = calculate_nse(test_datay[:,0], test_predictions[:,0])
mae = calculate_mae(test_datay[:,0], test_predictions[:,0])
rmse = calculate_rmse(test_datay[:,0], test_predictions[:,0])
mse = calculate_mse(test_datay[:,0], test_predictions[:,0])
si = calculate_si(test_datay[:,0], test_predictions[:,0])
mape = calculate_mape(test_datay[:,0], test_predictions[:,0])

# 输出结果
print("NSE:", nse)
print("MAE:", mae)
print("RMSE:", rmse)
print("MSE:", mse)
print("SI:", si)
print("MAPE:", mape)

NSE: 0.9997613270224214
MAE: 19.97875309856599
RMSE: 48.76954692042784
MSE: 2378.4687068238127
SI: 0.007180333387223171
MAPE: 0.8459835071482515
