In [None]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import matplotlib.pyplot as plt


# 合并 CSV 文件的函数
def merge_csv_files(directory, target_csv_file):
    df_list = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file == target_csv_file:
                df = pd.read_csv(os.path.join(root, file))
                df_list.append(df)
    merged_df = pd.concat(df_list, ignore_index=True)
    return merged_df


# 数据预处理函数
def create_dataset_with_history(X, y, history_steps=50, forecast_horizon=10):
    """
    构建数据集，输入包括过去 history_steps 步的 y 和当前时间步的 X，
    输出为当前时间步的 y[i]。
    """
    X_data, y_data = [], []
    for i in range(history_steps, len(X) - forecast_horizon + 1):
        # 当前时间步的输入 X[i]
        current_x = np.tile(X[i], (history_steps, 1))  # 将 X[i] 扩展为与 history_steps 匹配的形状

        # 过去 history_steps 步的输出 y[i-1], y[i-2], ..., y[i-history_steps]
        past_y = y[i - history_steps:i]

        # 将过去的 y 和扩展后的 X[i] 在特征维度上拼接
        input_data = np.hstack((past_y, current_x))  # 拼接为二维数组
        X_data.append(input_data)

        # 目标为当前时间步的 y[i]
        target_y = y[i:i + forecast_horizon]
        y_data.append(target_y)

    return np.array(X_data, dtype=np.float32), np.array(y_data, dtype=np.float32)


# 数据读取和合并
directory_path = './data'
Import_csv_file = 'Importdata_4ms.csv'
Exoport_csv_file = 'Exportdata_4ms.csv'

Import_merged_data = merge_csv_files(directory_path, Import_csv_file)
Exoport_merged_data = merge_csv_files(directory_path, Exoport_csv_file)
merged_data = pd.merge(Import_merged_data, Exoport_merged_data, left_index=True, right_index=True)

# 获取输入和输出列
headers = merged_data.columns.tolist()
X = merged_data[headers[0:6]].values  # 输入列
y = merged_data[headers[7:14]].values  # 输出列

# 数据归一化
scaler_X = MinMaxScaler(feature_range=(0, 1))
scaler_y = MinMaxScaler(feature_range=(0, 1))

X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y)

# 构建数据集
history_steps = 50
forecast_horizon = 10
X_data, y_data = create_dataset_with_history(X_scaled, y_scaled, history_steps, forecast_horizon)

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2, random_state=42)

# Create the LSTM model
#model = Sequential()
#model.add(LSTM(units=128, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
#model.add(Dropout(0.2))
#model.add(LSTM(units=64, return_sequences=True))
#model.add(Dropout(0.2))
#model.add(LSTM(units=32, return_sequences=False))
#model.add(Dropout(0.2))
#model.add(Dense(units=forecast_horizon * y_scaled.shape[1]))
#model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae', 'mse'])

# 创建 LSTM 模型
model = Sequential()
model.add(LSTM(units=100, return_sequences=False, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(units=forecast_horizon * y_scaled.shape[1]))
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae', 'mse'])


# 训练模型
history = model.fit(
    X_train,
    y_train.reshape(-1, forecast_horizon * y_scaled.shape[1]), 
    epochs=40,
    batch_size=64,
    validation_data=(X_test, y_test.reshape(-1, forecast_horizon * y_scaled.shape[1]))
)

# 保存模型
model.save('./predict_model_lstm.h5')

# 预测
y_pred = model.predict(X_test)

# 反归一化
y_pred_reshaped = y_pred.reshape(-1, y.shape[1])
y_test_reshaped = y_test.reshape(-1, y.shape[1])
y_pred_rescaled = scaler_y.inverse_transform(y_pred_reshaped)
y_test_rescaled = scaler_y.inverse_transform(y_test_reshaped)

# 可视化预测结果
plt.figure(figsize=(12, 6))
feature_names = headers[7:14]
for i in range(y.shape[1]):
    plt.plot(y_test_rescaled[:, i], label=f'Actual {feature_names[i]}')
    plt.plot(y_pred_rescaled[:, i], label=f'Predicted {feature_names[i]}', linestyle='--')
    plt.legend()
    plt.title(f'Feature: {feature_names[i]}')
    plt.show()


Epoch 1/40


  super().__init__(**kwargs)


[1m937/937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 16ms/step - loss: 0.0239 - mae: 0.0623 - mse: 0.0239 - val_loss: 1.5969e-04 - val_mae: 0.0081 - val_mse: 1.5969e-04
Epoch 2/40
[1m937/937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 16ms/step - loss: 7.5210e-05 - mae: 0.0044 - mse: 7.5210e-05 - val_loss: 7.4488e-05 - val_mae: 0.0032 - val_mse: 7.4488e-05
Epoch 3/40
[1m937/937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 16ms/step - loss: 8.8859e-05 - mae: 0.0040 - mse: 8.8859e-05 - val_loss: 1.0665e-04 - val_mae: 0.0056 - val_mse: 1.0665e-04
Epoch 4/40
[1m937/937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 16ms/step - loss: 1.0497e-04 - mae: 0.0037 - mse: 1.0497e-04 - val_loss: 1.0252e-04 - val_mae: 0.0052 - val_mse: 1.0252e-04
Epoch 5/40
[1m937/937[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 16ms/step - loss: 5.9914e-05 - mae: 0.0031 - mse: 5.9914e-05 - val_loss: 6.2420e-05 - val_mae: 0.0021 - val_mse: 6.2420e-05
Epoch 6/4