# Machine Learning Predictive Maintenance Classifier Beta
### Created By: Griffin Brown
8/22/2024

### Imports

In [240]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

### Processing

In [241]:
df = pd.read_csv("predictive_maintenance.csv")

df.head()

Unnamed: 0,UDI,Product ID,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target,Failure Type
0,1,M14860,M,298.1,308.6,1551,42.8,0,0,No Failure
1,2,L47181,L,298.2,308.7,1408,46.3,3,0,No Failure
2,3,L47182,L,298.1,308.5,1498,49.4,5,0,No Failure
3,4,L47183,L,298.2,308.6,1433,39.5,7,0,No Failure
4,5,L47184,L,298.2,308.7,1408,40.0,9,0,No Failure


In [242]:
# Drop unwanted columns and Failure Type
df.drop(['Product ID', 'UDI', 'Failure Type'], axis=1, inplace=True)

# Function I created to auto make dummies for objects and categorical columns
def preprocess_dataframe(data):
    object_cols = data.select_dtypes(include='object').columns.to_list()
    for col in object_cols:
        data[col] = data[col].astype('category')

    category_cols = [col for col in data.columns if data[col].dtype == 'category']

    # for col in category_cols:
    #     df[col], _ = pd.factorize(df[col])

    for col in category_cols:
        dummies = pd.get_dummies(data[col], drop_first=True, dummy_na=True, prefix=col)
        data = pd.concat([data.drop(col, axis=1), dummies], axis=1)

    return data

df = preprocess_dataframe(df)

In [243]:
df.head()

Unnamed: 0,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target,Type_L,Type_M,Type_nan
0,298.1,308.6,1551,42.8,0,0,False,True,False
1,298.2,308.7,1408,46.3,3,0,True,False,False
2,298.1,308.5,1498,49.4,5,0,True,False,False
3,298.2,308.6,1433,39.5,7,0,True,False,False
4,298.2,308.7,1408,40.0,9,0,True,False,False


In [244]:
# split target and predictor variables
y = df['Target']
X = df.drop(['Target'], axis=1)

X.head()

Unnamed: 0,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Type_L,Type_M,Type_nan
0,298.1,308.6,1551,42.8,0,False,True,False
1,298.2,308.7,1408,46.3,3,True,False,False
2,298.1,308.5,1498,49.4,5,True,False,False
3,298.2,308.6,1433,39.5,7,True,False,False
4,298.2,308.7,1408,40.0,9,True,False,False


### Feature Engineering

In [245]:
# Some light feature engineering
X['tts_ratio'] = X['Torque [Nm]'] / X['Rotational speed [rpm]']
X['temp_wear_interation'] = X['Process temperature [K]'] * X['Tool wear [min]']

X.head()

Unnamed: 0,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Type_L,Type_M,Type_nan,tts_ratio,temp_wear_interation
0,298.1,308.6,1551,42.8,0,False,True,False,0.027595,0.0
1,298.2,308.7,1408,46.3,3,True,False,False,0.032884,926.1
2,298.1,308.5,1498,49.4,5,True,False,False,0.032977,1542.5
3,298.2,308.6,1433,39.5,7,True,False,False,0.027565,2160.2
4,298.2,308.7,1408,40.0,9,True,False,False,0.028409,2778.3


In [246]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)

In [247]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(7000, 10)
(3000, 10)
(7000,)
(3000,)


### Create Pipeline

In [248]:
# A scaler for another step in the pipeline
scaler = StandardScaler()

In [249]:
imputer = SimpleImputer(missing_values=pd.NA, strategy='constant')

In [250]:
lr = LogisticRegression()

In [251]:
# Create pipeline. In production, this will have a couple more steps.
pipe1 = make_pipeline(scaler, imputer, lr)

In [252]:
pipe1.fit(X_train, y_train)

### Scores of Model

In [253]:
# Scoring of training set predictions
pipe1.score(X_train, y_train)

0.9722857142857143

In [254]:
# Scoring of test set predictions
pipe1.score(X_test, y_test)

0.9676666666666667

In [255]:
pipe1.named_steps.simpleimputer.statistics_

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [256]:
pipe1.named_steps.logisticregression.coef_

array([[ 1.55369379, -1.03558685,  2.06392551, -0.66316023,  0.44837724,
         0.34938715,  0.03368063,  0.        ,  3.48527564,  0.42115303]])

In [257]:
# confusion matrix
y_train_pred = pipe1.predict(X_train)
conf_matrix = confusion_matrix(y_train, y_train_pred)
print(conf_matrix)

[[6742   21]
 [ 173   64]]


In [258]:
# other scores
accuracy = accuracy_score(y_train, y_train_pred)
precision = precision_score(y_train, y_train_pred, average='weighted')
recall = recall_score(y_train, y_train_pred, average='weighted')
f1 = f1_score(y_train, y_train_pred, average='weighted')

print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1: {f1}')

Accuracy: 0.9722857142857143
Precision: 0.9674642580676522
Recall: 0.9722857142857143
F1: 0.9658984450706989


In [259]:
# auc, this could be worked on
roc_auc = roc_auc_score(y_train, pipe1.predict_proba(X_train)[:,1])

print(f'ROC_AUC: {roc_auc}')

ROC_AUC: 0.9226743181283615


In [260]:
# cross validation
cv_scores = cross_val_score(pipe1, X_train, y_train, cv=10, scoring='accuracy')

print(f'Fold Accuracy Scores: {cv_scores}')
print(f'Average Accuracy Score: {cv_scores.mean()}')

Fold Accuracy Scores: [0.96285714 0.97285714 0.97       0.97142857 0.97       0.97428571
 0.97857143 0.97714286 0.97428571 0.97      ]
Average Accuracy Score: 0.9721428571428573


In [261]:
# Top feature performance
coefs = np.abs(pipe1.named_steps['logisticregression'].coef_[0])
feature_importance = pd.Series(coefs, index=X_train.columns)
feature_importance.sort_values(ascending=False, inplace=True)

print(feature_importance)

tts_ratio                  3.485276
Rotational speed [rpm]     2.063926
Air temperature [K]        1.553694
Process temperature [K]    1.035587
Torque [Nm]                0.663160
Tool wear [min]            0.448377
temp_wear_interation       0.421153
Type_L                     0.349387
Type_M                     0.033681
Type_nan                   0.000000
dtype: float64
