In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
import numpy as np

# 检查是否有可用的 GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [2]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# 读取数据
data = pd.read_csv("../../data/dataset.csv")
data['target_class'] = pd.qcut(data['Cs'], q=10, labels=False)
X = data.drop(['Cs', 'target_class'], axis=1)
y = data['Cs']
stratify_column = data['target_class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=21, stratify=stratify_column)

# 数据标准化
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 将数据转换为张量
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32).to(device)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).to(device)

# Create DataLoader for batch processing
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

batch_size = 20
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [3]:
from torch_function import MAPE_Loss, RMSE_Loss
# 定义 LSTM 模型
class LSTMRegressor(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size, dropout=0.3):
        super(LSTMRegressor, self).__init__()
        self.hidden_sizes = hidden_sizes
        self.num_layers = len(hidden_sizes)

        # Defining multiple LSTM layers with configurable hidden sizes
        self.lstm_layers = nn.ModuleList()
        for i in range(self.num_layers):
            input_dim = input_size if i == 0 else hidden_sizes[i-1]
            lstm_dropout = dropout if self.num_layers > 1 and i < self.num_layers - 1 else 0
            self.lstm_layers.append(nn.LSTM(input_dim, hidden_sizes[i], batch_first=True, dropout=lstm_dropout))

        # Fully connected layer for output
        self.fc = nn.Linear(hidden_sizes[-1], output_size)

    def forward(self, x, seq_lengths=None):
        # Initial hidden and cell state for each layer
        h0 = [torch.zeros(1, x.size(0), hidden_size).to(x.device) for hidden_size in self.hidden_sizes]
        c0 = [torch.zeros(1, x.size(0), hidden_size).to(x.device) for hidden_size in self.hidden_sizes]

        # Forward propagate through each LSTM layer
        out = x
        for i, lstm in enumerate(self.lstm_layers):
            if seq_lengths is not None:
                packed_input = nn.utils.rnn.pack_padded_sequence(out, seq_lengths, batch_first=True, enforce_sorted=False)
                packed_output, (h, c) = lstm(packed_input, (h0[i], c0[i]))
                out, _ = nn.utils.rnn.pad_packed_sequence(packed_output, batch_first=True)
            else:
                out, (h, c) = lstm(out, (h0[i], c0[i]))

        # Decode the hidden state of the last time step using average pooling
        out = torch.mean(out, dim=1)
        out = self.fc(out)
        return out

# Set model parameters
input_size = X_train_scaled.shape[1]  # Number of features per time step
hidden_sizes = [64, 64]  # LSTM hidden sizes
output_size = 1

model = LSTMRegressor(input_size, hidden_sizes, output_size)

# Loss and optimizer
# criterion = nn.MSELoss().to(device)
# criterion = MAPE_Loss().to(device)
criterion = RMSE_Loss().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.002)

model.to(device)

# Reshape input for LSTM (batch_size, sequence_length, input_size)
def reshape_for_lstm(X):
    return X.unsqueeze(1)  # Add a sequence length dimension of 1



In [4]:
# Training the model
num_epochs = 3000
best_loss = float('inf')
cumulative_loss = 0.0
patience = 30  # 允许的最大连续未改进 epoch 数
epochs_without_improvement = 0  # 连续未改进的 epoch 数
model.train()

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        X_batch = reshape_for_lstm(X_batch)

        # Forward pass
        outputs = model(X_batch)
        loss = criterion(outputs.squeeze(), y_batch)

        # Backward pass and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * X_batch.size(0)
        cumulative_loss += loss.item()

    if (epoch + 1) % 10 == 0:
        average_loss = cumulative_loss / 10
        print(f'Epoch {epoch+1}, Average Loss: {average_loss:.4f}')
        cumulative_loss = 0.0  # Reset cumulative loss

    # 计算验证损失
    model.eval()
    with torch.no_grad():
        val_loss = 0.0
        for X_batch, y_batch in test_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            X_batch = reshape_for_lstm(X_batch)
            outputs = model(X_batch)
            loss = criterion(outputs.squeeze(), y_batch)
            val_loss += loss.item() * X_batch.size(0)

        val_loss /= len(test_loader.dataset)

    # 判断验证损失是否改善
    if val_loss < best_loss:
        best_loss = val_loss
        epochs_without_improvement = 0  # 重置计数器
        # 保存最佳模型
        torch.save(model.state_dict(), "lstm_best_model.pth")
    else:
        epochs_without_improvement += 1

    # 如果验证损失在一定次数的 epoch 内没有改进，则停止训练
    if epochs_without_improvement >= patience:
        print(f"Early stopping at epoch {epoch + 1}")
        break

Epoch 10, Average Loss: 2072.9601
Epoch 20, Average Loss: 1472.7975
Epoch 30, Average Loss: 1118.3476
Epoch 40, Average Loss: 911.6968
Epoch 50, Average Loss: 777.2453
Epoch 60, Average Loss: 681.1015
Epoch 70, Average Loss: 589.1796
Epoch 80, Average Loss: 499.4675
Epoch 90, Average Loss: 430.2941
Epoch 100, Average Loss: 375.4505
Epoch 110, Average Loss: 337.4149
Epoch 120, Average Loss: 309.2961
Epoch 130, Average Loss: 286.1842
Epoch 140, Average Loss: 270.2180
Epoch 150, Average Loss: 258.9182
Epoch 160, Average Loss: 251.4347
Epoch 170, Average Loss: 242.0240
Epoch 180, Average Loss: 234.8365
Epoch 190, Average Loss: 224.9844
Epoch 200, Average Loss: 220.4263
Epoch 210, Average Loss: 210.3852
Epoch 220, Average Loss: 206.6671
Epoch 230, Average Loss: 205.0047
Epoch 240, Average Loss: 203.2370
Epoch 250, Average Loss: 193.1271
Epoch 260, Average Loss: 188.1166
Epoch 270, Average Loss: 186.0072
Epoch 280, Average Loss: 177.5227
Epoch 290, Average Loss: 174.1834
Epoch 300, Average L

In [5]:
from function import calculate_metrics, metrics_to_dataframe

# 加载最佳模型的状态字典
model.load_state_dict(torch.load("lstm_best_model.pth", weights_only=True))

# 将模型设置为评估模式
model.eval()
model.to(device)

with torch.no_grad():
    # 准备训练数据
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

    train_predictions = []
    y_train_true = []
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        X_batch = reshape_for_lstm(X_batch)
        outputs = model(X_batch)
        train_predictions.append(outputs.cpu().numpy())
        y_train_true.append(y_batch.cpu().numpy())

    train_predictions = np.concatenate(train_predictions, axis=0)
    y_train_true = np.concatenate(y_train_true, axis=0)

    # 计算训练集的指标
    train_metrics = calculate_metrics(y_train_true, train_predictions)
    print("训练集指标:", train_metrics)

    # 准备测试数据
    test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    test_predictions = []
    y_test_true = []
    for X_batch, y_batch in test_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        X_batch = reshape_for_lstm(X_batch)
        outputs = model(X_batch)
        test_predictions.append(outputs.cpu().numpy())
        y_test_true.append(y_batch.cpu().numpy())

    test_predictions = np.concatenate(test_predictions, axis=0)
    y_test_true = np.concatenate(y_test_true, axis=0)

    # 计算测试集的指标
    test_metrics = calculate_metrics(y_test_true, test_predictions)
    print("测试集指标:", test_metrics)

    # 将结果转换为DataFrame
    lstm_metrics = metrics_to_dataframe(
        y_train_true, train_predictions,
        y_test_true, test_predictions, "LSTM").round(3)
    lstm_metrics.to_csv('LSTM_metrics.csv', index=False)
    print(lstm_metrics)


训练集指标: (0.975204586982727, 3.1265738, 2.972951903939247, 6.2861323)
测试集指标: (0.9539355039596558, 5.4024215, 6.594595313072205, 8.51768)
  model  R2_train  MAE_train  MAPE_train  RMSE_train  R2_test  MAE_test  \
0  LSTM     0.975      3.127       2.973       6.286    0.954     5.402   

   MAPE_test  RMSE_test  
0      6.595      8.518  


In [7]:
# 保存预测结果
lstm_train = pd.DataFrame({'Actual': y_train_true, 'Predicted': train_predictions.squeeze()})
lstm_test = pd.DataFrame({'Actual': y_test_true, 'Predicted': test_predictions.squeeze()})
lstm_train.to_csv('lstm_train.csv', index=False)
lstm_test.to_csv('lstm_test.csv', index=False)