In [2]:
import pandas as pd

# 加载 Excel 文件数据（请确保路径正确）
data = pd.read_excel("database.xlsx")

# 打印所有列名，确认实际的列名称
print("Excel 文件中的列名：")
print(data.columns.tolist())


Excel 文件中的列名：
['Paper No', 'Specimen', 'conformity_tbec2018', 'tw', 'lw', 'hw', 'M/(V.lw)', 'hw/lw', 'P/(Ag.fc)', 'fc', 'Agb', 'Ag', 'Agb/Ag', 'ρbl.fybl', 'ρsh.fysh', 'ρl.fyl', 'ρt.fyt', 'v_test', 'failure_mode']


In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.utils import to_categorical

# 1. 从 Excel 文件加载数据
# 请确保database.xlsx与该脚本在同一目录下
data = pd.read_excel("database.xlsx")

# 查看数据基本信息，确认列名称
print("数据预览：")
print(data.head())

# 2. 分离特征和目标变量
# 假设输入特征列名称如下（根据实际数据调整）
feature_cols = ['tw', 'lw', 'hw', 'M/(V.lw)', 'hw/lw', 'P/(Ag.fc)', 'fc', 'Agb', 'Ag', 'Agb/Ag', 'ρbl.fybl', 'ρsh.fysh', 'ρl.fyl', 'ρt.fyt', 'v_test']

# 目标变量列名称（请根据实际情况修改）
target_col = "failure_mode"

# 获取输入特征 X 和目标变量 y
X = data[feature_cols].values
y = data[target_col].values

# 3. 对输入特征进行归一化处理，将特征缩放到[-1, 1]区间
scaler = MinMaxScaler(feature_range=(-1, 1))
X_scaled = scaler.fit_transform(X)

# 4. 对目标变量进行独热编码
# 首先对目标类别进行编码（如果目标为字符串形式）
y_encoded = pd.get_dummies(y).values
# 或者使用to_categorical（如果先将目标转换为数值型标签）
# from sklearn.preprocessing import LabelEncoder
# le = LabelEncoder()
# y_int = le.fit_transform(y)
# y_encoded = to_categorical(y_int)

# 5. 划分训练集和测试集（例如80%训练，20%测试）
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

# 输出数据维度以确认处理无误
print("训练集样本数：", X_train.shape[0])
print("测试集样本数：", X_test.shape[0])
print("输入特征数：", X_train.shape[1])
print("目标类别数：", y_train.shape[1])

# 保存预处理后的数据（可选）
np.save("X_train.npy", X_train)
np.save("X_test.npy", X_test)
np.save("y_train.npy", y_train)
np.save("y_test.npy", y_test)


数据预览：
                      Paper No                   Specimen  \
0  Abdulridha & Palermo (2017)                      W1-SR   
1      [114] Adajar et al.1995                       RCW1   
2                          NaN                       RCW3   
3             [98] Adebar,2007  High-Rise Core Wall (265)   
4            [26] Alarcon,2014                    W1 (49)   

   conformity_tbec2018     tw    lw       hw  M/(V.lw)     hw/lw  P/(Ag.fc)  \
0                  0.0  150.0  1000   2200.0      2.20  2.200000       0.00   
1                  0.0  150.0  1400   2000.0      1.43  1.428571       0.01   
2                  0.0  150.0  1400   2000.0      1.43  1.428571       0.01   
3                  0.0  127.0  1625  12000.0      7.38  7.384615       0.10   
4                  0.0  100.0   700   1600.0      2.50  2.285714       0.15   

     fc      Agb        Ag    Agb/Ag   ρbl.fybl  ρsh.fysh    ρl.fyl    ρt.fyt  \
0  30.5  30000.0  150000.0  0.200000   5.652500  7.395000  2.847500  3.

In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

# -----------------------------
# 1. 数据加载与预处理
# -----------------------------
# 从 Excel 文件加载数据
data = pd.read_excel("database.xlsx")

# 输出数据预览，检查列名称
print("数据预览：")
print(data.head())

# 定义特征和目标变量的列名称（请根据实际数据修改）
feature_cols = ['tw', 'lw', 'hw', 'M/(V.lw)', 'hw/lw', 'P/(Ag.fc)', 'fc', 'Agb', 'Ag', 'Agb/Ag', 'ρbl.fybl', 'ρsh.fysh', 'ρl.fyl', 'ρt.fyt', 'v_test']
target_col = "failure_mode"

# 分离特征和目标变量
X = data[feature_cols].values
y = data[target_col].values

# 对输入特征进行归一化处理，将特征缩放到[-1, 1]区间
scaler = MinMaxScaler(feature_range=(-1, 1))
X_scaled = scaler.fit_transform(X)

# 对目标变量进行独热编码（若目标为字符串类型）
y_encoded = pd.get_dummies(y).values

# 划分训练集和测试集（80%训练，20%测试），使用 stratify 保持类别比例
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

# 输出数据维度以确认处理无误
print("训练集样本数：", X_train.shape[0])
print("测试集样本数：", X_test.shape[0])
print("输入特征数：", X_train.shape[1])
print("目标类别数：", y_train.shape[1])

# -----------------------------
# 2. 构建基础模型函数
# -----------------------------
def build_base_model(input_dim, output_dim):
    """
    构建一个包含5层隐藏层的深度神经网络模型
    参数:
        input_dim: 输入特征数
        output_dim: 输出类别数
    返回:
        model: 编译后的Keras模型
    """
    model = Sequential()
    # 第一隐藏层，40个神经元，激活函数Tanh
    model.add(Dense(40, activation='tanh', input_dim=input_dim))
    model.add(Dropout(0.02))
    # 第二隐藏层，80个神经元
    model.add(Dense(80, activation='tanh'))
    # 第三隐藏层，60个神经元
    model.add(Dense(60, activation='tanh'))
    # 第四隐藏层，40个神经元
    model.add(Dense(40, activation='tanh'))
    # 第五隐藏层，25个神经元
    model.add(Dense(25, activation='tanh'))
    # 输出层，输出类别数（采用softmax激活，适用于多分类任务）
    model.add(Dense(output_dim, activation='softmax'))
    
    # 使用Adam优化器，学习率设置为0.01（可根据需要调节）
    model.compile(optimizer=Adam(learning_rate=0.01), 
                  loss='categorical_crossentropy', 
                  metrics=['accuracy'])
    return model

# -----------------------------
# 3. 模型平均集成 (MAE)
# -----------------------------
ensemble_size = 5  # 可根据需要调整集成模型的数量
input_dim = X_train.shape[1]
output_dim = y_train.shape[1]
base_models = []

# 训练多个基础模型，每个模型因随机初始化及数据拆分不同会有一定差异
for i in range(ensemble_size):
    print(f"训练第 {i+1} 个子模型...")
    model = build_base_model(input_dim, output_dim)
    # 此处训练轮次和批次大小可根据实际情况进行调节
    model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.1, verbose=0)
    base_models.append(model)

# 定义集成预测函数：对每个子模型的预测结果取平均
def ensemble_predict(models, X):
    predictions = [model.predict(X) for model in models]
    avg_prediction = np.mean(predictions, axis=0)
    return avg_prediction

# -----------------------------
# 4. 模型评估
# -----------------------------
# 在测试集上获得集成模型预测结果
ensemble_output = ensemble_predict(base_models, X_test)
y_pred = np.argmax(ensemble_output, axis=1)
y_true = np.argmax(y_test, axis=1)

# 计算准确率
acc = accuracy_score(y_true, y_pred)
print("集成模型准确率：", acc)

# 输出混淆矩阵
conf_mat = confusion_matrix(y_true, y_pred)
print("混淆矩阵：")
print(conf_mat)

# 输出详细的分类报告，包括精度、召回率、F1分数等
class_report = classification_report(y_true, y_pred)
print("分类报告：")
print(class_report)


数据预览：
                      Paper No                   Specimen  \
0  Abdulridha & Palermo (2017)                      W1-SR   
1      [114] Adajar et al.1995                       RCW1   
2                          NaN                       RCW3   
3             [98] Adebar,2007  High-Rise Core Wall (265)   
4            [26] Alarcon,2014                    W1 (49)   

   conformity_tbec2018     tw    lw       hw  M/(V.lw)     hw/lw  P/(Ag.fc)  \
0                  0.0  150.0  1000   2200.0      2.20  2.200000       0.00   
1                  0.0  150.0  1400   2000.0      1.43  1.428571       0.01   
2                  0.0  150.0  1400   2000.0      1.43  1.428571       0.01   
3                  0.0  127.0  1625  12000.0      7.38  7.384615       0.10   
4                  0.0  100.0   700   1600.0      2.50  2.285714       0.15   

     fc      Agb        Ag    Agb/Ag   ρbl.fybl  ρsh.fysh    ρl.fyl    ρt.fyt  \
0  30.5  30000.0  150000.0  0.200000   5.652500  7.395000  2.847500  3.

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [9]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# 设备设置
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# -----------------------------
# 1. 数据加载与预处理
# -----------------------------
data = pd.read_excel("database.xlsx")
print("数据预览：")
print(data.head())

# 定义特征和目标变量的列名（请根据实际数据修改）
feature_cols = ['tw', 'lw', 'hw', 'M/(V.lw)', 'hw/lw', 'P/(Ag.fc)', 'fc', 'Agb', 'Ag', 'Agb/Ag', 'ρbl.fybl', 'ρsh.fysh', 'ρl.fyl', 'ρt.fyt', 'v_test']
target_col = "failure_mode"

X = data[feature_cols].values
y = data[target_col].values

# 归一化特征到[-1,1]
scaler = MinMaxScaler(feature_range=(-1, 1))
X_scaled = scaler.fit_transform(X)

# 对目标变量进行独热编码
y_encoded = pd.get_dummies(y).values

# 划分训练集和测试集（80%训练，20%测试）
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

# 将数据转换为Tensor
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
# 注意：这里将one-hot目标转换为类别标签（整数）以便使用CrossEntropyLoss
y_train_labels = torch.tensor(np.argmax(y_train, axis=1), dtype=torch.long).to(device)
y_test_labels = torch.tensor(np.argmax(y_test, axis=1), dtype=torch.long).to(device)

# -----------------------------
# 2. 自定义 Dataset 与 DataLoader
# -----------------------------
class FailureModeDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y  # 这里y为类别标签
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_dataset = FailureModeDataset(X_train_tensor, y_train_labels)
test_dataset = FailureModeDataset(X_test_tensor, y_test_labels)

# 计算训练集各类别样本数量，进而计算类别权重
unique_classes, counts = np.unique(y_train_labels.cpu().numpy(), return_counts=True)
class_weights = 1. / counts
weights = [class_weights[label] for label in y_train_labels.cpu().numpy()]
weights = torch.DoubleTensor(weights)
sampler = WeightedRandomSampler(weights, num_samples=len(weights), replacement=True)

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# -----------------------------
# 3. 定义5层神经网络（改进版）
# -----------------------------
class DeepNeuralNetwork(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(DeepNeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(input_dim, 40)
        self.fc2 = nn.Linear(40, 80)
        self.fc3 = nn.Linear(80, 60)
        self.fc4 = nn.Linear(60, 40)
        self.fc5 = nn.Linear(40, 25)
        self.output = nn.Linear(25, output_dim)
        self.activation = nn.ReLU()  # 使用ReLU激活函数
        self.dropout = nn.Dropout(0.2)  # 提高Dropout比例
    def forward(self, x):
        x = self.activation(self.fc1(x))
        x = self.dropout(x)
        x = self.activation(self.fc2(x))
        x = self.activation(self.fc3(x))
        x = self.activation(self.fc4(x))
        x = self.activation(self.fc5(x))
        x = self.output(x)
        return x  # 注意：这里返回的原始logits，交叉熵内部会处理softmax

# -----------------------------
# 4. 训练多个子模型（MAE）
# -----------------------------
ensemble_size = 5
input_dim = X_train.shape[1]
output_dim = len(unique_classes)  # 输出类别数
models = []

# 计算类别权重Tensor，用于损失函数
class_weights_tensor = torch.tensor(class_weights, dtype=torch.float32).to(device)

for i in range(ensemble_size):
    print(f"训练第 {i+1} 个子模型...")
    model = DeepNeuralNetwork(input_dim, output_dim).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    # 使用带权重的交叉熵损失
    criterion = nn.CrossEntropyLoss(weight=class_weights_tensor)
    # 学习率调度器：每30个epoch降低学习率
    scheduler = StepLR(optimizer, step_size=30, gamma=0.5)
    epochs = 150
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * X_batch.size(0)
        scheduler.step()
        # 每30个epoch打印一次平均损失
        if (epoch+1) % 30 == 0:
            avg_loss = running_loss / len(train_dataset)
            print(f"子模型 {i+1}, Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}")
    models.append(model)

# -----------------------------
# 5. 集成预测与模型评估
# -----------------------------
def ensemble_predict(models, loader):
    all_preds = []
    all_labels = []
    for X_batch, y_batch in loader:
        batch_preds = []
        for model in models:
            model.eval()
            with torch.no_grad():
                outputs = model(X_batch)
                # 取softmax后预测类别
                probs = torch.softmax(outputs, dim=1)
                batch_preds.append(probs.cpu().numpy())
        avg_preds = np.mean(batch_preds, axis=0)
        all_preds.append(avg_preds)
        all_labels.append(y_batch.cpu().numpy())
    return np.concatenate(all_preds), np.concatenate(all_labels)

ensemble_output, y_true = ensemble_predict(models, test_loader)
y_pred = np.argmax(ensemble_output, axis=1)

acc = accuracy_score(y_true, y_pred)
print("集成模型准确率：", acc)

conf_mat = confusion_matrix(y_true, y_pred)
print("混淆矩阵：")
print(conf_mat)

class_report = classification_report(y_true, y_pred)
print("分类报告：")
print(class_report)


数据预览：
                      Paper No                   Specimen  \
0  Abdulridha & Palermo (2017)                      W1-SR   
1      [114] Adajar et al.1995                       RCW1   
2                          NaN                       RCW3   
3             [98] Adebar,2007  High-Rise Core Wall (265)   
4            [26] Alarcon,2014                    W1 (49)   

   conformity_tbec2018     tw    lw       hw  M/(V.lw)     hw/lw  P/(Ag.fc)  \
0                  0.0  150.0  1000   2200.0      2.20  2.200000       0.00   
1                  0.0  150.0  1400   2000.0      1.43  1.428571       0.01   
2                  0.0  150.0  1400   2000.0      1.43  1.428571       0.01   
3                  0.0  127.0  1625  12000.0      7.38  7.384615       0.10   
4                  0.0  100.0   700   1600.0      2.50  2.285714       0.15   

     fc      Agb        Ag    Agb/Ag   ρbl.fybl  ρsh.fysh    ρl.fyl    ρt.fyt  \
0  30.5  30000.0  150000.0  0.200000   5.652500  7.395000  2.847500  3.

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
