In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Read csv
df = pd.read_csv('predictive_maintenance.csv')

# Print the first 5 rows
print(df.head())

   UDI Product ID Type  Air temperature [K]  Process temperature [K]  \
0    1     M14860    M                298.1                    308.6   
1    2     L47181    L                298.2                    308.7   
2    3     L47182    L                298.1                    308.5   
3    4     L47183    L                298.2                    308.6   
4    5     L47184    L                298.2                    308.7   

   Rotational speed [rpm]  Torque [Nm]  Tool wear [min]  Target Failure Type  
0                    1551         42.8                0       0   No Failure  
1                    1408         46.3                3       0   No Failure  
2                    1498         49.4                5       0   No Failure  
3                    1433         39.5                7       0   No Failure  
4                    1408         40.0                9       0   No Failure  


Model Creation

In [5]:
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

# Convert categorical variables to dummy variables
X = pd.get_dummies(df.drop('Target', axis=1))

y = df['Target']

# Data preprocessing
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Feature Engineering: PCA for dimensionality reduction
pca = PCA(n_components=0.95)  # retain 95% of variance
X_pca = pca.fit_transform(X_scaled)

# Splitting the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.3, random_state=42)

# Model Training with Hyperparameter Tuning
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth': [4, 6, 8],
    'criterion': ['gini', 'entropy']
}

rfc = RandomForestClassifier(random_state=42)
cv_rfc = GridSearchCV(estimator=rfc, param_grid=param_grid, cv=5)
cv_rfc.fit(X_train, y_train)

# Model Evaluation
y_pred = cv_rfc.best_estimator_.predict(X_test)
print("Best Parameters:", cv_rfc.best_params_)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

# Ensemble Learning: Combining multiple models
from sklearn.svm import SVC
svm = SVC(probability=True)
svm.fit(X_train, y_train)
ensemble = VotingClassifier(estimators=[('rf', cv_rfc.best_estimator_), ('svm', svm)], voting='soft')
ensemble.fit(X_train, y_train)
y_ensemble_pred = ensemble.predict(X_test)
print("Ensemble Accuracy:", accuracy_score(y_test, y_ensemble_pred))

# Save the best model for deployment
import joblib
joblib.dump(cv_rfc.best_estimator_, 'deploy_model.joblib')
