<a href="https://colab.research.google.com/github/dhuanca/MantenimientoPreventivo/blob/main/PredictivoMantenimiento.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# !pip install imbalanced-learn

In [5]:
# importar librerias necesarias
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

from imblearn.pipeline import Pipeline as ImbPipeline
from imblearn.over_sampling import SMOTE

In [6]:
# Cargado del dataset
mantenimiento = pd.read_csv('/content/predictive_maintenance.csv')
mantenimiento.head()

Unnamed: 0,UDI,Product ID,Type,Air temperature [K],Process temperature [K],Rotational speed [rpm],Torque [Nm],Tool wear [min],Target,Failure Type
0,1,M14860,M,298.1,308.6,1551,42.8,0,0,No Failure
1,2,L47181,L,298.2,308.7,1408,46.3,3,0,No Failure
2,3,L47182,L,298.1,308.5,1498,49.4,5,0,No Failure
3,4,L47183,L,298.2,308.6,1433,39.5,7,0,No Failure
4,5,L47184,L,298.2,308.7,1408,40.0,9,0,No Failure


In [7]:
mantenimiento.columns

Index(['UDI', 'Product ID', 'Type', 'Air temperature [K]',
       'Process temperature [K]', 'Rotational speed [rpm]', 'Torque [Nm]',
       'Tool wear [min]', 'Target', 'Failure Type'],
      dtype='object')

In [8]:
mantenimiento.rename(columns={'Air temperature [K]':'temperatura', 'Process temperature [K]':'temperatura_proceso',
                              'Rotational speed [rpm]':'velocidad_rotacion', 'Torque [Nm]':'fuerza_torsion',
                              'Tool wear [min]':'desgaste'
                              }, inplace=True)

In [9]:
mantenimiento.head()

Unnamed: 0,UDI,Product ID,Type,temperatura,temperatura_proceso,velocidad_rotacion,fuerza_torsion,desgaste,Target,Failure Type
0,1,M14860,M,298.1,308.6,1551,42.8,0,0,No Failure
1,2,L47181,L,298.2,308.7,1408,46.3,3,0,No Failure
2,3,L47182,L,298.1,308.5,1498,49.4,5,0,No Failure
3,4,L47183,L,298.2,308.6,1433,39.5,7,0,No Failure
4,5,L47184,L,298.2,308.7,1408,40.0,9,0,No Failure


In [10]:
target_percentages = mantenimiento[['Target']].groupby('Target').size() / len(mantenimiento) * 100
display(target_percentages)

Unnamed: 0_level_0,0
Target,Unnamed: 1_level_1
0,96.61
1,3.39


In [11]:
mantenimiento.columns

Index(['UDI', 'Product ID', 'Type', 'temperatura', 'temperatura_proceso',
       'velocidad_rotacion', 'fuerza_torsion', 'desgaste', 'Target',
       'Failure Type'],
      dtype='object')

Crear la tabla minable

In [12]:
df = mantenimiento[['temperatura', 'temperatura_proceso',
                    'velocidad_rotacion', 'fuerza_torsion', 'desgaste', 'Target']]

In [13]:
df.head()

Unnamed: 0,temperatura,temperatura_proceso,velocidad_rotacion,fuerza_torsion,desgaste,Target
0,298.1,308.6,1551,42.8,0,0
1,298.2,308.7,1408,46.3,3,0
2,298.1,308.5,1498,49.4,5,0
3,298.2,308.6,1433,39.5,7,0
4,298.2,308.7,1408,40.0,9,0


In [14]:
X = df.drop('Target', axis=1)
y = df['Target']

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, stratify=y)

In [16]:
escalar = ['temperatura', 'temperatura_proceso',
           'velocidad_rotacion', 'fuerza_torsion', 'desgaste']

In [17]:
transformador_numerico = Pipeline(steps=[
    ("scaler", StandardScaler())
])

In [18]:
procesador = ColumnTransformer(
    transformers=[
        ("numericos", transformador_numerico, escalar)
    ]
)

In [19]:
# Modelo
logreg = LogisticRegression(
    penalty="l2",
    solver="liblinear",
    max_iter=200,
    class_weight=None,       # Usamos SMOTE, por eso no usamos balance aquí
    random_state=42
)

In [20]:
pipeline = ImbPipeline(steps=[
    ("preproceso", procesador),
    ("balance", SMOTE(random_state=42)),
    ("modelo", logreg)
])

In [21]:
# entrenamiento
pipeline.fit(X_train, y_train)

In [22]:
# evaluacion
y_pred = pipeline.predict(X_test)
y_pred_proba = pipeline.predict_proba(X_test)[:, 1]

print("\n Reporte clasificación")
print(classification_report(y_test, y_pred))

print("\n Matriz de confusión")
print(confusion_matrix(y_test, y_pred))

print("\n ROC-AUC ")
print(roc_auc_score(y_test, y_pred_proba))


 Reporte clasificación
              precision    recall  f1-score   support

           0       0.99      0.83      0.91      3188
           1       0.15      0.83      0.25       112

    accuracy                           0.83      3300
   macro avg       0.57      0.83      0.58      3300
weighted avg       0.96      0.83      0.88      3300


 Matriz de confusión
[[2657  531]
 [  19   93]]

 ROC-AUC 
0.8946523794586844


In [23]:
randomForest = RandomForestClassifier(
    n_estimators=300,
    random_state=42,
    class_weight=None  # porque usaremos SMOTE
)

In [24]:
pipeline = ImbPipeline(steps=[
    ("preproceso", procesador),
    ("balance", SMOTE(random_state=42)),
    ("modelo", randomForest)
])

In [25]:
# entrenamiento
pipeline.fit(X_train, y_train)

In [26]:
# evaluacion
y_pred = pipeline.predict(X_test)
y_pred_proba = pipeline.predict_proba(X_test)[:, 1]

print("\n Reporte clasificación")
print(classification_report(y_test, y_pred))

print("\n Matriz de confusión")
print(confusion_matrix(y_test, y_pred))

print("\n ROC-AUC")
print(roc_auc_score(y_test, y_pred_proba))


 Reporte clasificación
              precision    recall  f1-score   support

           0       0.99      0.98      0.98      3188
           1       0.53      0.71      0.61       112

    accuracy                           0.97      3300
   macro avg       0.76      0.85      0.79      3300
weighted avg       0.97      0.97      0.97      3300


 Matriz de confusión
[[3116   72]
 [  32   80]]

 ROC-AUC
0.9730966571070083


In [27]:
X_nuevo = pd.DataFrame({
    "temperatura": [300.4],
    "temperatura_proceso": [207.9],
    "velocidad_rotacion": [1780],
    "fuerza_torsion": [30.9],
    "desgaste": [119]
})

In [28]:
X_nuevo

Unnamed: 0,temperatura,temperatura_proceso,velocidad_rotacion,fuerza_torsion,desgaste
0,300.4,207.9,1780,30.9,119


In [29]:
y_pred = pipeline.predict(X_nuevo)

In [30]:
y_pred

array([0])

In [31]:
X_nuevo = pd.DataFrame({
    "temperatura": [299],
    "temperatura_proceso": [310],
    "velocidad_rotacion": [2859],
    "fuerza_torsion": [4.8],
    "desgaste": [150]
})

In [32]:
X_nuevo

Unnamed: 0,temperatura,temperatura_proceso,velocidad_rotacion,fuerza_torsion,desgaste
0,299,310,2859,4.8,150


In [33]:
y_pred = pipeline.predict(X_nuevo)
y_pred

array([1])