In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import mutual_info_classif, RFE
from sklearn.ensemble import RandomForestClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score, classification_report

Accuracy: 0.9705
Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.99      0.98      1939
           1       0.52      0.43      0.47        61

    accuracy                           0.97      2000
   macro avg       0.75      0.71      0.73      2000
weighted avg       0.97      0.97      0.97      2000

Predicted Maintenance Category: 0




In [7]:
# Load dataset
df = pd.read_csv("/content/ai4i2020.csv")
df.head()

Unnamed: 0,UDI,Product ID,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Machine failure,TWF,HDF,PWF,OSF,RNF
0,1,M14860,M,298.1,308.6,1551,42.8,0,0,0,0,0,0,0
1,2,L47181,L,298.2,308.7,1408,46.3,3,0,0,0,0,0,0
2,3,L47182,L,298.1,308.5,1498,49.4,5,0,0,0,0,0,0
3,4,L47183,L,298.2,308.6,1433,39.5,7,0,0,0,0,0,0
4,5,L47184,L,298.2,308.7,1408,40.0,9,0,0,0,0,0,0


In [8]:
# Preprocessing
df.drop(columns=['UDI', 'Product ID'], inplace=True)  # Drop irrelevant columns
df['Type'] = df['Type'].astype('category').cat.codes  # Encode categorical variable
X = df.drop(columns=['Machine failure'])  # Features
y = df['Machine failure']  # Target variable

In [9]:
# Feature Selection: Mutual Information & Recursive Feature Elimination
mi = mutual_info_classif(X, y)
mi_rank = np.argsort(mi)[::-1]  # Rank features by mutual information
selector = RFE(RandomForestClassifier(n_estimators=100, random_state=42), n_features_to_select=5)
selector.fit(X.iloc[:, mi_rank], y)
selected_features = X.columns[selector.support_]

In [10]:
# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X[selected_features], y, test_size=0.2, random_state=42)

In [11]:
# Apply LDA
lda = LinearDiscriminantAnalysis()
lda.fit(X_train, y_train)
y_pred = lda.predict(X_test)

In [12]:
# Model Evaluation
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.9705
Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.99      0.98      1939
           1       0.52      0.43      0.47        61

    accuracy                           0.97      2000
   macro avg       0.75      0.71      0.73      2000
weighted avg       0.97      0.97      0.97      2000



In [13]:
# Integration Simulation: Predict new data point
new_data = np.array([X_test.iloc[0]])  # Simulating real-time input
y_new_pred = lda.predict(new_data)
print("Predicted Maintenance Category:", y_new_pred[0])

Predicted Maintenance Category: 0




In [14]:
from sklearn.ensemble import RandomForestClassifier

# Initialize RandomForest model
rf_model = RandomForestClassifier(n_estimators=200, random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)

# Evaluate Model
print("🔹 RandomForest Classifier Results:")
print("Accuracy:", accuracy_score(y_test, y_pred_rf))
print(classification_report(y_test, y_pred_rf))

# Predict new data point
new_data = np.array([X_test.iloc[0]])
y_new_pred_rf = rf_model.predict(new_data)
print("Predicted Maintenance Category (RF):", y_new_pred_rf[0])


🔹 RandomForest Classifier Results:
Accuracy: 0.977
              precision    recall  f1-score   support

           0       0.98      0.99      0.99      1939
           1       0.67      0.48      0.56        61

    accuracy                           0.98      2000
   macro avg       0.83      0.73      0.77      2000
weighted avg       0.97      0.98      0.98      2000

Predicted Maintenance Category (RF): 0




In [16]:
from sklearn.ensemble import GradientBoostingClassifier

# Initialize GradientBoosting model
gb_model = GradientBoostingClassifier(n_estimators=200, learning_rate=0.1, max_depth=5, random_state=42)
gb_model.fit(X_train, y_train)
y_pred_gb = gb_model.predict(X_test)

# Evaluate Model
print("🔹 GradientBoosting Classifier Results:")
print("Accuracy:", accuracy_score(y_test, y_pred_gb))
print(classification_report(y_test, y_pred_gb))

# Predict new data point
new_data = np.array([X_test.iloc[0]])
y_new_pred_gb = gb_model.predict(new_data)
print("Predicted Maintenance Category (GradientBoosting):", y_new_pred_gb[0])


🔹 GradientBoosting Classifier Results:
Accuracy: 0.978
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      1939
           1       0.67      0.56      0.61        61

    accuracy                           0.98      2000
   macro avg       0.83      0.77      0.80      2000
weighted avg       0.98      0.98      0.98      2000

Predicted Maintenance Category (GradientBoosting): 0




In [17]:
from sklearn.neural_network import MLPClassifier

# Initialize MLP model
mlp_model = MLPClassifier(hidden_layer_sizes=(50, 50), activation='relu', solver='adam', max_iter=500, random_state=42)
mlp_model.fit(X_train, y_train)
y_pred_mlp = mlp_model.predict(X_test)

# Evaluate Model
print("🔹 MLP Classifier Results:")
print("Accuracy:", accuracy_score(y_test, y_pred_mlp))
print(classification_report(y_test, y_pred_mlp))

# Predict new data point
new_data = np.array([X_test.iloc[0]])
y_new_pred_mlp = mlp_model.predict(new_data)
print("Predicted Maintenance Category (MLP):", y_new_pred_mlp[0])


🔹 MLP Classifier Results:
Accuracy: 0.96
              precision    recall  f1-score   support

           0       0.98      0.98      0.98      1939
           1       0.38      0.48      0.42        61

    accuracy                           0.96      2000
   macro avg       0.68      0.73      0.70      2000
weighted avg       0.96      0.96      0.96      2000

Predicted Maintenance Category (MLP): 0




In [19]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import RandomizedSearchCV
import scipy.stats as st

# Define parameter distribution (fewer combinations to speed up)
param_dist = {
    'n_estimators': st.randint(100, 300),  # Number of trees
    'learning_rate': st.uniform(0.01, 0.2),  # Learning rate range
    'max_depth': st.randint(3, 7),  # Tree depth
    'min_samples_split': st.randint(2, 10),  # Min samples to split a node
    'min_samples_leaf': st.randint(1, 4)  # Min samples in a leaf node
}

# Initialize model
gb_model = GradientBoostingClassifier(random_state=42)

# Use RandomizedSearchCV (runs faster than GridSearchCV)
random_search = RandomizedSearchCV(estimator=gb_model, param_distributions=param_dist,
                                   n_iter=20, cv=3, scoring='accuracy', n_jobs=-1, verbose=2, random_state=42)
random_search.fit(X_train, y_train)

# Get best parameters
best_params = random_search.best_params_
print("🔹 Best Hyperparameters:", best_params)

# Train Gradient Boosting with best parameters
optimized_gb = GradientBoostingClassifier(**best_params, random_state=42)
optimized_gb.fit(X_train, y_train)
y_pred_optimized_gb = optimized_gb.predict(X_test)

# Evaluate the optimized model
print("🚀 Optimized Gradient Boosting Classifier Results:")
print("Accuracy:", accuracy_score(y_test, y_pred_optimized_gb))
print(classification_report(y_test, y_pred_optimized_gb))


Fitting 3 folds for each of 20 candidates, totalling 60 fits
🔹 Best Hyperparameters: {'learning_rate': np.float64(0.08997219434305109), 'max_depth': 6, 'min_samples_leaf': 3, 'min_samples_split': 7, 'n_estimators': 289}
🚀 Optimized Gradient Boosting Classifier Results:
Accuracy: 0.9795
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      1939
           1       0.70      0.57      0.63        61

    accuracy                           0.98      2000
   macro avg       0.84      0.78      0.81      2000
weighted avg       0.98      0.98      0.98      2000



In [20]:
import joblib

# Save the model
joblib.dump(optimized_gb, "predictive_maintenance_model.pkl")
print("Model saved successfully!")


Model saved successfully!


In [21]:
import joblib
import numpy as np

# After training your model
joblib.dump(optimized_gb, "predictive_maintenance_model.pkl", compress=3)

# Check if it works
model = joblib.load("predictive_maintenance_model.pkl")
print("Model is working fine!")


Model is working fine!
