In [3]:
# Step 1: Import libraries
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from torch.utils.data import TensorDataset, DataLoader

In [4]:
df = pd.read_csv("predictive_maintenance.csv")
df = df.drop(columns=['UDI', 'Product ID', 'Target'])

failure_encoder = LabelEncoder()
df["Failure Type"] = failure_encoder.fit_transform(df["Failure Type"])
df

Unnamed: 0,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Failure Type
0,M,298.1,308.6,1551,42.8,0,1
1,L,298.2,308.7,1408,46.3,3,1
2,L,298.1,308.5,1498,49.4,5,1
3,L,298.2,308.6,1433,39.5,7,1
4,L,298.2,308.7,1408,40.0,9,1
...,...,...,...,...,...,...,...
9995,M,298.8,308.4,1604,29.5,14,1
9996,H,298.9,308.4,1632,31.8,17,1
9997,M,299.0,308.6,1645,33.4,22,1
9998,H,299.0,308.7,1408,48.5,25,1


In [5]:
df = pd.get_dummies(df, columns=['Type'], drop_first=False, dtype=int)  # One-hot encoding
df

Unnamed: 0,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Failure Type,Type_H,Type_L,Type_M
0,298.1,308.6,1551,42.8,0,1,0,0,1
1,298.2,308.7,1408,46.3,3,1,0,1,0
2,298.1,308.5,1498,49.4,5,1,0,1,0
3,298.2,308.6,1433,39.5,7,1,0,1,0
4,298.2,308.7,1408,40.0,9,1,0,1,0
...,...,...,...,...,...,...,...,...,...
9995,298.8,308.4,1604,29.5,14,1,0,0,1
9996,298.9,308.4,1632,31.8,17,1,1,0,0
9997,299.0,308.6,1645,33.4,22,1,0,0,1
9998,299.0,308.7,1408,48.5,25,1,1,0,0


In [6]:
# Step 3: Preprocessing
X = df.drop(columns=['Failure Type'])
y = df['Failure Type']

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).unsqueeze(1)

# Create DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
import numpy as np
import torch.nn.functional as F

# detect number of classes from y_train
n_classes = len(np.unique(y_train))
print(f"Detected n_classes = {n_classes}")

input_dim = X_train.shape[1]

# Define model: last layer size depends on n_classes.
class ClassificationModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(ClassificationModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, output_dim)
        self.relu = nn.ReLU()
        # Do NOT put sigmoid/softmax here when using CrossEntropyLoss or BCEWithLogitsLoss.
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)  # raw logits
        return x

# Prepare targets and datasets according to number of classes
if n_classes > 2:
    # Multi-class classification: outputs shape (N, n_classes), targets are LongTensor shape (N,)
    model = ClassificationModel(input_dim, n_classes)
    criterion = nn.CrossEntropyLoss()
    # convert y to long 1D tensors (class indices)
    y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)    # shape (N,)
    y_test_tensor = torch.tensor(y_test.values, dtype=torch.long)      # shape (N,)
    # create datasets (ensure y is 1D)
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
else:
    # Binary classification: single logit output, use BCEWithLogitsLoss (more stable than BCELoss + sigmoid)
    model = ClassificationModel(input_dim, 1)  # single logit
    criterion = nn.BCEWithLogitsLoss()
    # keep y as float in shape (N,1)
    y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1)
    y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).unsqueeze(1)
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# DataLoaders (recreate with possibly-updated tensors)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# device handling (optional)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# move tensors? DataLoader will yield CPU tensors; we'll move to device inside loop
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
epochs = 100
for epoch in range(epochs):
    model.train()
    epoch_loss = 0.0
    for X_batch, y_batch in train_loader:
        X_batch = X_batch.to(device)
        y_batch = y_batch.to(device)

        optimizer.zero_grad()
        outputs = model(X_batch)  # logits

        # For multi-class CrossEntropyLoss expects outputs (N, C) and targets (N,)
        # For BCEWithLogitsLoss expects outputs (N,1) and targets (N,1) floats
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item() * X_batch.size(0)

    epoch_loss /= len(train_loader.dataset)
    if (epoch + 1) % 10 == 0 or epoch == 0:
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss:.4f}")

# Evaluation
model.eval()
with torch.no_grad():
    X_test_device = X_test_tensor.to(device)
    outputs = model(X_test_device)  # logits

    if n_classes > 2:
        # outputs shape (N, C); predicted class = argmax
        preds = torch.argmax(outputs, dim=1).cpu()
        true = y_test_tensor.cpu()
        accuracy = (preds == true).float().mean().item()
    else:
        # binary: outputs shape (N,1) logits -> apply sigmoid -> threshold 0.5
        probs = torch.sigmoid(outputs).cpu()
        preds = (probs >= 0.5).float()
        true = y_test_tensor.cpu()
        accuracy = (preds.eq(true).sum().float() / true.shape[0]).item()

    print(f"Test Accuracy: {accuracy:.4f}")

Detected n_classes = 6
Epoch [1/100], Loss: 0.3726
Epoch [10/100], Loss: 0.0700
Epoch [20/100], Loss: 0.0577
Epoch [30/100], Loss: 0.0525
Epoch [40/100], Loss: 0.0474
Epoch [50/100], Loss: 0.0443
Epoch [60/100], Loss: 0.0412
Epoch [70/100], Loss: 0.0393
Epoch [80/100], Loss: 0.0354
Epoch [90/100], Loss: 0.0340
Epoch [100/100], Loss: 0.0321
Test Accuracy: 0.9830


In [8]:
# Save checkpoint but do NOT pickle the scaler/encoder objects.
import numpy as np

checkpoint_path = "model_checkpoint.pth"

checkpoint = {
    "epoch": epochs,
    "model_state_dict": model.state_dict(),
    "optimizer_state_dict": optimizer.state_dict(),
    # Save scaler parameters instead of the object
    "scaler_state": {
        "mean": scaler.mean_.tolist(),
        "scale": scaler.scale_.tolist(),
        "var": getattr(scaler, "var_", None),   # optional
        "n_features_in": int(getattr(scaler, "n_features_in_", scaler.mean_.shape[0]))
    },
    # Save label encoder classes_ so we can reconstruct LabelEncoder without pickling it
    "label_encoder_classes": failure_encoder.classes_.tolist() if 'failure_encoder' in globals() else None,
    "n_classes": n_classes,
    "input_dim": input_dim,
    "feature_columns": list(X.columns)  # important for raw-DataFrame inference
}

torch.save(checkpoint, checkpoint_path)
print(f"Saved checkpoint to {checkpoint_path}")

Saved checkpoint to model_checkpoint.pth


### Inference Model

In [17]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd

# ---------- 1) Load checkpoint ----------
checkpoint_path = "model_checkpoint.pth"
checkpoint = torch.load(checkpoint_path, map_location=torch.device('cpu'), weights_only=False)

# ---------- 2) Rebuild the model ----------
class ClassificationModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(ClassificationModel, self).__init__()
        self.fc1 = nn.Linear(input_dim, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, output_dim)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

input_dim = checkpoint["input_dim"]
n_classes = checkpoint["n_classes"]
model = ClassificationModel(input_dim, n_classes)
model.load_state_dict(checkpoint["model_state_dict"])
model.eval()

# ---------- 3) Rebuild scaler ----------
scaler_state = checkpoint["scaler_state"]
mean = np.array(scaler_state["mean"])
scale = np.array(scaler_state["scale"])
var = scaler_state.get("var", None)

# Define a simple scaling function using saved stats
def scale_input(X_raw):
    return (X_raw - mean) / scale

# ---------- 4) Rebuild label encoder ----------
failure_classes = checkpoint["label_encoder_classes"]
def decode_label(label_id):
    return failure_classes[label_id]

# ---------- 5) Prepare new sample ----------
# Must match the same columns and order as during training
feature_columns = checkpoint["feature_columns"]

# Example input sample (replace with your own data)
new_sample = pd.DataFrame([{
    "Air temperature [K]": 299,
    "Process temperature [K]": 309,
    "Rotational speed [rpm]": 2861,
    "Torque [Nm]": 4.5,
    "Tool wear [min]": 143,
    "Type_H": 0,
    "Type_L": 1,
    "Type_M": 0
}])[feature_columns]  # enforce column order

# ---------- 6) Scale and convert to tensor ----------
X_input = torch.tensor(scale_input(new_sample.values), dtype=torch.float32)

# ---------- 7) Predict ----------
with torch.no_grad():
    outputs = model(X_input)
    _, predicted = torch.max(outputs, 1)
    predicted_label_id = predicted.item()

# ---------- 8) Decode label ----------
predicted_failure_type = decode_label(predicted_label_id)
print(f"Predicted Failure Type (numeric): {predicted_label_id}")
print(f"Predicted Failure Type (string): {predicted_failure_type}")

Predicted Failure Type (numeric): 3
Predicted Failure Type (string): Power Failure
