<a href="https://colab.research.google.com/github/NancyAmer0/PM-time-series/blob/main/PM_pipeline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Libraries

In [22]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.compose import ColumnTransformer
import xgboost as xgb
from xgboost import  XGBRegressor
from scipy import stats
import joblib

#Pipeline

In [23]:
categorical_features = ['flight_phase']
numerical_features = ['flight_cycle',	'egt_probe_average',	'fuel_flw',	'core_spd',	'zpn12p',	'vib_n1_#1_bearing',	'vib_n2_#1_bearing',	'vib_n2_turbine_frame']
numerical_features_for_outlier = ['egt_probe_average', 'fuel_flw', 'core_spd',
                                  'zpn12p', 'vib_n1_#1_bearing', 'vib_n2_#1_bearing',
                                  'vib_n2_turbine_frame'] #'corrected_fan_spd'

In [24]:
# Custom Transformer for outliers removal: OutlierRemoverGroup
class OutlierRemoverGroup(BaseEstimator, TransformerMixin):
    def __init__(self, groupby_col, columns, threshold=2.5):
        self.groupby_col = groupby_col
        self.columns = columns
        self.threshold = threshold

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        X_clean = X.copy()
        z_scores = X_clean.groupby(self.groupby_col)[self.columns].transform(
            lambda x: np.abs(stats.zscore(x, nan_policy='omit'))
        )
        mask = (z_scores <= self.threshold).all(axis=1)
        return X_clean[mask].reset_index(drop=True)

In [25]:
#encoding categorical column
categorical_pipeline = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore', drop='first'))
])

In [26]:
#merging the transformers together
col_transformer = ColumnTransformer(
    transformers=[
        ('num', 'passthrough', numerical_features),
        ('cat', categorical_pipeline, categorical_features)
    ]
)

In [27]:
xgb_model = XGBRegressor(random_state=42)
pipefinal = make_pipeline(col_transformer, xgb_model)

#Data Importing

In [28]:
df = pd.read_csv('engines2_data_cleaned_no_outliers.csv')#Data/

In [29]:
#applying the above outlierRemover
X = df[['flight_cycle', 'flight_phase', 'egt_probe_average', 'fuel_flw', 'core_spd', 'zpn12p', 'vib_n1_#1_bearing', 'vib_n2_#1_bearing', 'vib_n2_turbine_frame']]
Y= df['RUL']
df_all = X.copy()
df_all['RUL'] = Y
remover = OutlierRemoverGroup(groupby_col='flight_phase', columns=numerical_features_for_outlier)
df_filtered = remover.fit_transform(df_all)

#Data Splitting

In [30]:
X = df_filtered.drop(columns='RUL')
Y = df_filtered['RUL']
# Initial 80/20 split
X_train, X_temp, y_train, y_temp = train_test_split(X, Y, test_size=0.2, random_state=42)
# Further split temp into validation and test sets
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

In [31]:
pipefinal.fit(X_train, y_train)

In [33]:
# Predict and evaluate on the test set
y_pred = pipefinal.predict(X_test)
from sklearn.metrics import mean_absolute_error, mean_squared_error
mae = mean_absolute_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred)
print(f"MAE: {mae}, RMSE: {rmse}")

MAE: 125.16436767578125, RMSE: 27350.759765625


In [34]:
# Save the Pipeline
joblib.dump(pipefinal, 'predictive_maintenance_pipeline.pkl')


['predictive_maintenance_pipeline.pkl']