In [1]:
import pandas as pd

# Load the Excel file (ensure the correct file path)
data = pd.read_excel("database.xlsx")

# Print all column names to verify actual column names
print("Column names in the Excel file:")
print(data.columns.tolist())

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# -----------------------------
# 1. Data Loading and Preprocessing
# -----------------------------
data = pd.read_excel("database.xlsx")
print("Data Preview:")
print(data.head())

# Define feature columns and target column (modify as needed)
feature_cols = ['tw', 'lw', 'hw', 'M/(V.lw)', 'hw/lw', 'P/(Ag.fc)', 'fc', 'Agb', 'Ag', 'Agb/Ag', 'ρbl.fybl', 'ρsh.fysh', 'ρl.fyl', 'ρt.fyt', 'v_test']
target_col = "failure_mode"

X = data[feature_cols].values
y = data[target_col].values

# Normalize features to the range [-1,1]
scaler = MinMaxScaler(feature_range=(-1, 1))
X_scaled = scaler.fit_transform(X)

# One-hot encode the target variable
y_encoded = pd.get_dummies(y).values

# Split data into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)

# Convert one-hot encoded targets to class labels (integers) for CrossEntropyLoss
y_train_labels = torch.tensor(np.argmax(y_train, axis=1), dtype=torch.long).to(device)
y_test_labels = torch.tensor(np.argmax(y_test, axis=1), dtype=torch.long).to(device)

# -----------------------------
# 2. Custom Dataset and DataLoader
# -----------------------------
class FailureModeDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y  # Target variable as class labels
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_dataset = FailureModeDataset(X_train_tensor, y_train_labels)
test_dataset = FailureModeDataset(X_test_tensor, y_test_labels)

# Compute class weights based on training set distribution
unique_classes, counts = np.unique(y_train_labels.cpu().numpy(), return_counts=True)
class_weights = 1. / counts
weights = [class_weights[label] for label in y_train_labels.cpu().numpy()]
weights = torch.DoubleTensor(weights)
sampler = WeightedRandomSampler(weights, num_samples=len(weights), replacement=True)

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# -----------------------------
# 3. Define 5-Layer Neural Network (Improved Version)
# -----------------------------
class DeepNeuralNetwork(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(DeepNeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(input_dim, 40)
        self.fc2 = nn.Linear(40, 80)
        self.fc3 = nn.Linear(80, 60)
        self.fc4 = nn.Linear(60, 40)
        self.fc5 = nn.Linear(40, 25)
        self.output = nn.Linear(25, output_dim)
        self.activation = nn.ReLU()  # Using ReLU activation function
        self.dropout = nn.Dropout(0.2)  # Increase Dropout rate
    def forward(self, x):
        x = self.activation(self.fc1(x))
        x = self.dropout(x)
        x = self.activation(self.fc2(x))
        x = self.activation(self.fc3(x))
        x = self.activation(self.fc4(x))
        x = self.activation(self.fc5(x))
        x = self.output(x)
        return x  # Return raw logits, CrossEntropyLoss will handle softmax

# -----------------------------
# 4. Train Multiple Models (MAE)
# -----------------------------
ensemble_size = 5
input_dim = X_train.shape[1]
output_dim = len(unique_classes)  # Number of output classes
models = []

# Compute class weights tensor for loss function
class_weights_tensor = torch.tensor(class_weights, dtype=torch.float32).to(device)

for i in range(ensemble_size):
    print(f"Training model {i+1}...")
    model = DeepNeuralNetwork(input_dim, output_dim).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    # Use weighted cross-entropy loss
    criterion = nn.CrossEntropyLoss(weight=class_weights_tensor)
    # Learning rate scheduler: Reduce LR every 30 epochs
    scheduler = StepLR(optimizer, step_size=30, gamma=0.5)
    epochs = 150
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * X_batch.size(0)
        scheduler.step()
        # Print average loss every 30 epochs
        if (epoch+1) % 30 == 0:
            avg_loss = running_loss / len(train_dataset)
            print(f"Model {i+1}, Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}")
    models.append(model)

# -----------------------------
# 5. Ensemble Prediction & Model Evaluation
# -----------------------------
def ensemble_predict(models, loader):
    all_preds = []
    all_labels = []
    for X_batch, y_batch in loader:
        batch_preds = []
        for model in models:
            model.eval()
            with torch.no_grad():
                outputs = model(X_batch)
                # Apply softmax to get class probabilities
                probs = torch.softmax(outputs, dim=1)
                batch_preds.append(probs.cpu().numpy())
        avg_preds = np.mean(batch_preds, axis=0)
        all_preds.append(avg_preds)
        all_labels.append(y_batch.cpu().numpy())
    return np.concatenate(all_preds), np.concatenate(all_labels)

ensemble_output, y_true = ensemble_predict(models, test_loader)
y_pred = np.argmax(ensemble_output, axis=1)

acc = accuracy_score(y_true, y_pred)
print("Ensemble Model Accuracy:", acc)

conf_mat = confusion_matrix(y_true, y_pred)
print("Confusion Matrix:")
print(conf_mat)

class_report = classification_report(y_true, y_pred)
print("Classification Report:")
print(class_report)


Column names in the Excel file:
['Paper No', 'Specimen', 'conformity_tbec2018', 'tw', 'lw', 'hw', 'M/(V.lw)', 'hw/lw', 'P/(Ag.fc)', 'fc', 'Agb', 'Ag', 'Agb/Ag', 'ρbl.fybl', 'ρsh.fysh', 'ρl.fyl', 'ρt.fyt', 'v_test', 'failure_mode']
Data Preview:
                      Paper No                   Specimen  \
0  Abdulridha & Palermo (2017)                      W1-SR   
1      [114] Adajar et al.1995                       RCW1   
2                          NaN                       RCW3   
3             [98] Adebar,2007  High-Rise Core Wall (265)   
4            [26] Alarcon,2014                    W1 (49)   

   conformity_tbec2018     tw    lw       hw  M/(V.lw)     hw/lw  P/(Ag.fc)  \
0                  0.0  150.0  1000   2200.0      2.20  2.200000       0.00   
1                  0.0  150.0  1400   2000.0      1.43  1.428571       0.01   
2                  0.0  150.0  1400   2000.0      1.43  1.428571       0.01   
3                  0.0  127.0  1625  12000.0      7.38  7.384615       0

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
