In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from function import split_data
from sklearn.preprocessing import StandardScaler

# 检查是否有可用的 GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [2]:
# 读取数据
data = pd.read_csv("../../data/dataset_reduced.csv")
X_train, X_test, y_train, y_test = split_data(data, 'Cs')

# 数据标准化
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 将数据转换为张量
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32).to(device)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).to(device)

# Create DataLoader for batch processing
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [3]:
from torch_function import MAPE_Loss, RMSE_Loss
# 定义 LSTM 模型
class LSTMRegressor(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size, dropout=0.28):
        super(LSTMRegressor, self).__init__()
        self.hidden_sizes = hidden_sizes
        self.num_layers = len(hidden_sizes)

        # Defining multiple LSTM layers with configurable hidden sizes
        self.lstm_layers = nn.ModuleList()
        for i in range(self.num_layers):
            input_dim = input_size if i == 0 else hidden_sizes[i-1]
            lstm_dropout = dropout if self.num_layers > 1 and i < self.num_layers - 1 else 0
            self.lstm_layers.append(nn.LSTM(input_dim, hidden_sizes[i], batch_first=True, dropout=lstm_dropout))

        # Fully connected layer for output
        self.fc = nn.Linear(hidden_sizes[-1], output_size)

    def forward(self, x, seq_lengths=None):
        # Initial hidden and cell state for each layer
        h0 = [torch.zeros(1, x.size(0), hidden_size).to(x.device) for hidden_size in self.hidden_sizes]
        c0 = [torch.zeros(1, x.size(0), hidden_size).to(x.device) for hidden_size in self.hidden_sizes]

        # Forward propagate through each LSTM layer
        out = x
        for i, lstm in enumerate(self.lstm_layers):
            if seq_lengths is not None:
                packed_input = nn.utils.rnn.pack_padded_sequence(out, seq_lengths, batch_first=True, enforce_sorted=False)
                packed_output, (h, c) = lstm(packed_input, (h0[i], c0[i]))
                out, _ = nn.utils.rnn.pad_packed_sequence(packed_output, batch_first=True)
            else:
                out, (h, c) = lstm(out, (h0[i], c0[i]))

        # Decode the hidden state of the last time step using average pooling
        out = torch.mean(out, dim=1)
        out = self.fc(out)
        return out

# Set model parameters
input_size = X_train_scaled.shape[1]  # Number of features per time step
hidden_sizes = [64, 64]  # LSTM hidden sizes
output_size = 1

model = LSTMRegressor(input_size, hidden_sizes, output_size)

# Loss and optimizer
criterion = RMSE_Loss().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.002)

model.to(device)

# Reshape input for LSTM (batch_size, sequence_length, input_size)
def reshape_for_lstm(X):
    return X.unsqueeze(1)  # Add a sequence length dimension of 1



In [4]:
# Training the model
num_epochs = 3000
best_loss = float('inf')
patience = 100  # 允许的最大连续未改进 epoch 数
epochs_without_improvement = 0  # 连续未改进的 epoch 数

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        X_batch = reshape_for_lstm(X_batch)

        # Forward pass
        outputs = model(X_batch)
        loss = criterion(outputs.squeeze(), y_batch)

        # Backward pass and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # 打印每10个 epoch 的损失
    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}")

    # 计算验证损失
    model.eval()
    with torch.no_grad():
        val_loss = 0.0
        for X_batch, y_batch in test_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            X_batch = reshape_for_lstm(X_batch)
            outputs = model(X_batch)
            loss = criterion(outputs.squeeze(), y_batch)
            val_loss += loss.item() * X_batch.size(0)

        val_loss /= len(test_loader.dataset)

    # 判断验证损失是否改善
    if val_loss < best_loss:
        best_loss = val_loss
        epochs_without_improvement = 0  # 重置计数器
        # 保存最佳模型
        torch.save(model.state_dict(), "lstm_best_model.pth")
    else:
        epochs_without_improvement += 1

    # 如果验证损失在一定次数的 epoch 内没有改进，则停止训练
    if epochs_without_improvement >= patience:
        print(f"Early stopping at epoch {epoch + 1}")
        break

Epoch [10/3000], Loss: 77.9750
Epoch [20/3000], Loss: 62.9137
Epoch [30/3000], Loss: 51.4748
Epoch [40/3000], Loss: 42.7763
Epoch [50/3000], Loss: 37.7576
Epoch [60/3000], Loss: 33.6896
Epoch [70/3000], Loss: 30.0957
Epoch [80/3000], Loss: 27.1722
Epoch [90/3000], Loss: 24.2342
Epoch [100/3000], Loss: 20.5811
Epoch [110/3000], Loss: 17.7743
Epoch [120/3000], Loss: 15.9934
Epoch [130/3000], Loss: 15.0194
Epoch [140/3000], Loss: 13.6799
Epoch [150/3000], Loss: 12.6443
Epoch [160/3000], Loss: 12.0691
Epoch [170/3000], Loss: 11.1754
Epoch [180/3000], Loss: 10.5502
Epoch [190/3000], Loss: 10.0584
Epoch [200/3000], Loss: 9.6915
Epoch [210/3000], Loss: 9.5556
Epoch [220/3000], Loss: 9.4715
Epoch [230/3000], Loss: 9.1901
Epoch [240/3000], Loss: 8.8240
Epoch [250/3000], Loss: 8.6781
Epoch [260/3000], Loss: 8.2789
Epoch [270/3000], Loss: 8.3101
Epoch [280/3000], Loss: 8.3272
Epoch [290/3000], Loss: 7.8744
Epoch [300/3000], Loss: 7.6760
Epoch [310/3000], Loss: 7.5668
Epoch [320/3000], Loss: 7.708

In [5]:
from function import metrics_to_dataframe

# 加载最佳模型的状态字典
model.load_state_dict(torch.load("lstm_best_model.pth", weights_only=True))

# 将模型设置为评估模式
model.eval()
model.to(device)

with torch.no_grad():
    train_predictions = []
    y_train_true = []
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        X_batch = reshape_for_lstm(X_batch)
        outputs = model(X_batch)
        train_predictions.append(outputs.cpu().numpy())
        y_train_true.append(y_batch.cpu().numpy())

    train_predictions = np.concatenate(train_predictions, axis=0)
    y_train_true = np.concatenate(y_train_true, axis=0)

    test_predictions = []
    y_test_true = []
    for X_batch, y_batch in test_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        X_batch = reshape_for_lstm(X_batch)
        outputs = model(X_batch)
        test_predictions.append(outputs.cpu().numpy())
        y_test_true.append(y_batch.cpu().numpy())

    test_predictions = np.concatenate(test_predictions, axis=0)
    y_test_true = np.concatenate(y_test_true, axis=0)

    # 将结果转换为DataFrame
    lstm_metrics = metrics_to_dataframe(
        y_train_true, train_predictions,
        y_test_true, test_predictions, "LSTM").round(3)
    lstm_metrics.to_csv('LSTM_metrics.csv', index=False)

lstm_metrics

Unnamed: 0,model,R2_train,MAE_train,MAPE_train,RMSE_train,R2_test,MAE_test,MAPE_test,RMSE_test
0,LSTM,0.986,2.433,2.552,4.83,0.96,4.845,8.748,8.322


In [6]:
# 保存预测结果
lstm_train = pd.DataFrame({'Actual': y_train_true, 'Predicted': train_predictions.squeeze()})
lstm_test = pd.DataFrame({'Actual': y_test_true, 'Predicted': test_predictions.squeeze()})
lstm_train.to_csv('lstm_train.csv', index=False)
lstm_test.to_csv('lstm_test.csv', index=False)