# ⚙️ Predictive Maintenance: Failure Classification with SMOTE
**Goal:** Predict machine failures using sensor data with class imbalance handling using SMOTE and Random Forest.

**Libraries:** Pandas, Scikit-learn, imbalanced-learn, Matplotlib, Seaborn


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve
from sklearn.preprocessing import MinMaxScaler
import joblib

from imblearn.over_sampling import SMOTE

# Display settings
pd.set_option('display.float_format', lambda x: '%.4f' % x)
plt.style.use('ggplot')

In [None]:
# Load dataset
df = pd.read_csv('Normalized_and_Cleaned_Predictive_Maintenance_Data.csv', parse_dates=['Timestamp'])
print("Data shape:", df.shape)
df.head()

In [None]:
# Drop unneeded columns
X = df.drop(columns=['failure', 'MachineID', 'Timestamp'])
y = df['failure']

# Check class balance
print("Class distribution before SMOTE:")
print(y.value_counts())

In [None]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

print("Training size:", X_train.shape)
print("Test size:", X_test.shape)

In [None]:
# Apply SMOTE
sm = SMOTE(random_state=42)
X_resampled, y_resampled = sm.fit_resample(X_train, y_train)

print("Class distribution after SMOTE:")
print(pd.Series(y_resampled).value_counts())

In [None]:
# Train Random Forest on balanced data
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_resampled, y_resampled)

# Predict
y_pred_smote = model.predict(X_test)

In [None]:
# Evaluation
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_smote))

print("\nClassification Report:")
print(classification_report(y_test, y_pred_smote, digits=4))

In [None]:
# ROC Curve
y_probs_smote = model.predict_proba(X_test)[:, 1]
fpr, tpr, _ = roc_curve(y_test, y_probs_smote)

plt.figure(figsize=(6, 5))
plt.plot(fpr, tpr, label=f"AUC = {roc_auc_score(y_test, y_probs_smote):.4f}")
plt.plot([0, 1], [0, 1], 'k--')
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve After SMOTE")
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
# Save model
joblib.dump(model, 'rf_failure_prediction_model.pkl')