In [5]:
len([
    "ap_from_ap_1_mean_ant_rssi", "ap_from_ap_2_mean_ant_rssi", 
    "sta_to_ap_0_mean_ant_rssi", "sta_to_ap_1_mean_ant_rssi", "sta_to_ap_2_mean_ant_rssi", 
    "sta_from_ap_0_mean_ant_rssi", "sta_from_ap_1_mean_ant_rssi", "sta_from_ap_2_mean_ant_rssi", 
    "sta_from_sta_1_rssi", "sta_from_sta_2_rssi",
    "ap_from_ap_1_sum_ant_rssi", "ap_from_ap_2_sum_ant_rssi", 
    "sta_to_ap_0_sum_ant_rssi", "sta_to_ap_1_sum_ant_rssi", "sta_to_ap_2_sum_ant_rssi", 
    "sta_from_ap_0_sum_ant_rssi", "sta_from_ap_1_sum_ant_rssi", "sta_from_ap_2_sum_ant_rssi", 
    "ap_from_ap_1_max_ant_rssi", "ap_from_ap_2_max_ant_rssi", 
    "sta_to_ap_0_max_ant_rssi", "sta_to_ap_1_max_ant_rssi", "sta_to_ap_2_max_ant_rssi", 
    "sta_from_ap_0_max_ant_rssi", "sta_from_ap_1_max_ant_rssi", "sta_from_ap_2_max_ant_rssi"
])

26

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.model_selection import train_test_split


# 定义Transformer回归模型
class TransformerRegressor(nn.Module):
    def __init__(self, input_dim, model_dim=64, num_heads=4, num_layers=2, dropout=0.1):
        super(TransformerRegressor, self).__init__()
        self.model_dim = model_dim

        self.input_fc = nn.Linear(input_dim, model_dim)
        self.pos_encoder = PositionalEncoding(model_dim)
        encoder_layer = nn.TransformerEncoderLayer(d_model=model_dim, nhead=num_heads, dropout=dropout)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.regressor = nn.Linear(model_dim, 1)

    def forward(self, src):
        # src shape: [batch_size, input_dim]
        src = self.input_fc(src) * np.sqrt(self.model_dim)  # [batch_size, model_dim]
        src = self.pos_encoder(src)  # [batch_size, model_dim]
        src = src.unsqueeze(1)  # Transformer expects [sequence_length, batch_size, model_dim]
        memory = self.transformer_encoder(src)  # [sequence_length, batch_size, model_dim]
        memory = memory.squeeze(0)  # [batch_size, model_dim]
        output = self.regressor(memory)  # [batch_size, 1]
        return output.squeeze(1)  # [batch_size]


class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-np.log(10000.0) / d_model))

        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)

        pe = pe.unsqueeze(0)  # [1, max_len, d_model]
        self.register_buffer("pe", pe)

    def forward(self, x):
        # x shape: [batch_size, model_dim]
        x = x + self.pe[:, : x.size(1), :]
        return x


# 封装为类似于XGBRegressor的类
class PyTorchTransformerRegressor(BaseEstimator, RegressorMixin):
    def __init__(self, model_dim=64, num_heads=4, num_layers=2, dropout=0.1, lr=1e-3, batch_size=32, epochs=100, device=None, verbose=False):
        self.model_dim = model_dim
        self.num_heads = num_heads
        self.num_layers = num_layers
        self.dropout = dropout
        self.lr = lr
        self.batch_size = batch_size
        self.epochs = epochs
        self.verbose = verbose
        self.device = device if device else ("cuda" if torch.cuda.is_available() else "cpu")
        self.model = None
        self.optimizer = None
        self.criterion = None

    def fit(self, X, y, X_val=None, y_val=None):
        X = torch.tensor(X, dtype=torch.float32)
        y = torch.tensor(y, dtype=torch.float32)

        if X_val is not None and y_val is not None:
            X_val = torch.tensor(X_val, dtype=torch.float32)
            y_val = torch.tensor(y_val, dtype=torch.float32)
            val_dataset = TensorDataset(X_val, y_val)
            val_loader = DataLoader(val_dataset, batch_size=self.batch_size, shuffle=False)
        else:
            val_loader = None

        dataset = TensorDataset(X, y)
        loader = DataLoader(dataset, batch_size=self.batch_size, shuffle=True)

        input_dim = X.shape[1]
        self.model = TransformerRegressor(input_dim, self.model_dim, self.num_heads, self.num_layers, self.dropout).to(self.device)
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr)
        self.criterion = nn.MSELoss()

        for epoch in range(1, self.epochs + 1):
            self.model.train()
            epoch_loss = 0
            for batch_X, batch_y in loader:
                batch_X, batch_y = batch_X.to(self.device), batch_y.to(self.device)
                self.optimizer.zero_grad()
                outputs = self.model(batch_X)
                loss = self.criterion(outputs, batch_y)
                loss.backward()
                self.optimizer.step()
                epoch_loss += loss.item() * batch_X.size(0)

            epoch_loss /= len(loader.dataset)

            if self.verbose:
                if val_loader:
                    val_loss = self.evaluate(val_loader)
                    print(f"Epoch {epoch}/{self.epochs}, Training Loss: {epoch_loss:.4f}, Validation Loss: {val_loss:.4f}")
                else:
                    print(f"Epoch {epoch}/{self.epochs}, Training Loss: {epoch_loss:.4f}")

        return self

    def evaluate(self, loader):
        self.model.eval()
        total_loss = 0
        with torch.no_grad():
            for batch_X, batch_y in loader:
                batch_X, batch_y = batch_X.to(self.device), batch_y.to(self.device)
                outputs = self.model(batch_X)
                loss = self.criterion(outputs, batch_y)
                total_loss += loss.item() * batch_X.size(0)
        total_loss /= len(loader.dataset)
        return total_loss

    def predict(self, X):
        self.model.eval()
        X = torch.tensor(X, dtype=torch.float32).to(self.device)
        with torch.no_grad():
            outputs = self.model(X)
        return outputs.cpu().numpy()


# 示例使用代码
if __name__ == "__main__":
    # 生成一些示例数据
    from sklearn.datasets import make_regression
    from sklearn.preprocessing import StandardScaler

    random_state_num = 42
    X, y = make_regression(n_samples=1000, n_features=20, noise=0.1, random_state=random_state_num)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random_state_num)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # 初始化并训练模型
    model = PyTorchTransformerRegressor(model_dim=64, num_heads=4, num_layers=2, dropout=0.1, lr=1e-3, batch_size=32, epochs=100, verbose=True)
    model.fit(X_train_scaled, y_train, X_val=X_test_scaled, y_val=y_test)

    # 对测试集进行预测
    y_pred = model.predict(X_test_scaled)
    mse = mean_squared_error(y_test, y_pred)
    print(f"Mean Squared Error: {mse}")

    # 对比曲线图：真实值 vs 预测值
    plt.figure(figsize=(10, 6))
    plt.plot(np.arange(len(y_test)), y_test, label="true seq_time", color="blue", marker="o")
    plt.plot(np.arange(len(y_pred)), y_pred, label="pred seq_time", color="red", marker="x")
    plt.xlabel("Test Samples")
    plt.ylabel("seq_time")
    plt.title("True vs Predicted seq_time")
    plt.legend()
    plt.show()



RuntimeError: The size of tensor a (32) must match the size of tensor b (64) at non-singleton dimension 1