In [1]:
# from google.colab import drive
# drive.mount('/content/drive')

In [2]:
import warnings
warnings.filterwarnings("ignore")

# DATASET INITIALIZATION

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np

# Load training and test data
train_data = pd.read_csv('/kaggle/input/turbofan-dataset/train_FD002.txt', delim_whitespace=True, header=None)
test_data = pd.read_csv('/kaggle/input/turbofan-dataset/test_FD002.txt', delim_whitespace=True, header=None)
rul_data = pd.read_csv('/kaggle/input/turbofan-dataset/RUL_FD002.txt', delim_whitespace=True, header=None)

In [4]:
column_names = ["engine_id" , "time_in_cycles" , "altitude" , "mach_no" , "throttle_angle" , "fan_inlet_temp" , "LPC_outlet_temp" , "HPC_outlet_temp" , "LPT_outlet_temp" , "fan_inlet_pressure" , "bypass_duct_pressure" , "HPC_outlet_pressure" , "fan_speed" , "core_speed" , "engine_pressure_ratio" , "HPC_outlet_static_pressure" , "fuel_ps30_ratio" , "corrected_fan_speed" , "corrected_core_speed" , "bypass_ratio" , "burner_fuel_air_ratio" , "bleed_enthalpy" , "demanded_fan_speed" , "demanded_corrected_fan_speed" , "HPT_coolant_bleed" , "LPT_coolant_bleed"]
train_data.columns = column_names
test_data.columns = column_names

In [5]:
train_data['RUL'] = train_data.groupby('engine_id')['time_in_cycles'].transform(max) - train_data['time_in_cycles']

# REMOVING OUTLIERS

In [6]:
import pandas as pd

# Calculate Q1 (25th percentile) and Q3 (75th percentile) for each feature in the train data
Q1_train = train_data.iloc[:, 2:-1].quantile(0.25)  # Exclude 'engine_id', 'time_in_cycles', and 'RUL'
Q3_train = train_data.iloc[:, 2:-1].quantile(0.75)
IQR_train = Q3_train - Q1_train

# Calculate Q1 (25th percentile) and Q3 (75th percentile) for each feature in the test data
Q1_test = test_data.iloc[:, 2:-1].quantile(0.25)  # Exclude 'engine_id', 'time_in_cycles', and 'RUL'
Q3_test = test_data.iloc[:, 2:-1].quantile(0.75)
IQR_test = Q3_test - Q1_test

# Determine the lower and upper bounds for outliers in the train data
lower_bound_train = Q1_train - 1.5 * IQR_train
upper_bound_train = Q3_train + 1.5 * IQR_train

# Determine the lower and upper bounds for outliers in the test data
lower_bound_test = Q1_test - 1.5 * IQR_test
upper_bound_test = Q3_test + 1.5 * IQR_test

# Identify outliers in the train data
outliers_train = ((train_data.iloc[:, 2:-1] < lower_bound_train) | (train_data.iloc[:, 2:-1] > upper_bound_train)).any(axis=1)

# Identify outliers in the test data
outliers_test = ((test_data.iloc[:, 2:-1] < lower_bound_test) | (test_data.iloc[:, 2:-1] > upper_bound_test)).any(axis=1)

# Filter out outliers
train_data_cleaned = train_data[~outliers_train]
test_data_cleaned = test_data[~outliers_test]

print("Original train data shape:", train_data.shape)
print("Cleaned train data shape:", train_data_cleaned.shape)

print("Original test data shape:", test_data.shape)
print("Cleaned test data shape:", test_data_cleaned.shape)


Original train data shape: (53759, 27)
Cleaned train data shape: (44067, 27)
Original test data shape: (33991, 26)
Cleaned test data shape: (28505, 26)


# SCALING

In [7]:
from sklearn.preprocessing import StandardScaler

# Normalize the data
scaler = StandardScaler()
train_data_cleaned.iloc[:, 2:-1] = scaler.fit_transform(train_data_cleaned.iloc[:, 2:-1])
test_data_cleaned.iloc[:, 2:] = scaler.transform(test_data_cleaned.iloc[:, 2:])

# MLP

In [8]:
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import numpy as np

# Select features and target
features = train_data_cleaned.columns[2:-1]
X_train = train_data_cleaned[features]
y_train = train_data_cleaned['RUL']

In [9]:
# Split the training data into training and validation sets
X_train_mlp, X_val_mlp, y_train_mlp, y_val_mlp = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Define the MLP Regressor model
mlp_model = MLPRegressor(hidden_layer_sizes=(100, 50), activation='relu', solver='adam', max_iter=500, random_state=42)

In [10]:
# Train the MLP Regressor model
mlp_model.fit(X_train_mlp, y_train_mlp)

# Extract the last cycle for each engine in the test set
last_cycle_indices_mlp = test_data_cleaned.groupby('engine_id')['time_in_cycles'].idxmax()
X_test_last_cycles_mlp = test_data_cleaned.loc[last_cycle_indices_mlp, features]

In [11]:
# Make predictions for the last cycles of each engine
y_pred_last_cycles_mlp = mlp_model.predict(X_test_last_cycles_mlp)

# True RUL values from the provided RUL file
true_rul_mlp = rul_data.values.flatten()

In [12]:
# Calculate RMSE and R² Score
mse_mlp = mean_squared_error(true_rul_mlp, y_pred_last_cycles_mlp)
rmse_mlp = np.sqrt(mse_mlp)
r2_mlp = r2_score(true_rul_mlp, y_pred_last_cycles_mlp)
mae_mlp = mean_absolute_error(true_rul_mlp, y_pred_last_cycles_mlp)

print(f'MLP Regressor - Root Mean Squared Error (RMSE): {rmse_mlp}')
print(f'MLP Regressor - R² Score: {r2_mlp}')
print(f'MLP Regressor - Mean Absolute Error (MAE): {mae_mlp}')

MLP Regressor - Root Mean Squared Error (RMSE): 30.91384789431448
MLP Regressor - R² Score: 0.6695665454234958
MLP Regressor - Mean Absolute Error (MAE): 22.778436203552854


# BFE ON MLP

In [13]:
# Features and target
features = train_data_cleaned.columns[2:-1]
X_train = train_data_cleaned[features]
y_train = train_data_cleaned['RUL']

# Split the training data into training and validation sets
X_train_mlp, X_val_mlp, y_train_mlp, y_val_mlp = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [14]:
# Define the MLP Regressor model
mlp_model = MLPRegressor(hidden_layer_sizes=(100, 50), activation='relu', solver='adam', max_iter=500, random_state=42)

# Function to train and evaluate the model
def train_and_evaluate_model(X_train, y_train, X_test, y_test, features):
    mlp_model.fit(X_train, y_train)
    y_pred = mlp_model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    return rmse, r2, mae


In [15]:
# Extract the last cycle for each engine in the test set
last_cycle_indices_mlp = test_data_cleaned.groupby('engine_id')['time_in_cycles'].idxmax()
X_test_last_cycles_mlp = test_data_cleaned.loc[last_cycle_indices_mlp, features]

# True RUL values from the provided RUL file
true_rul_mlp = rul_data.values.flatten()

In [16]:
# Perform backward feature elimination
remaining_features = list(features)
best_rmse = float('inf')
best_features = remaining_features.copy()

while len(remaining_features) > 0:
    worst_feature = None
    for feature in remaining_features:
        # Create a temporary list of features without the current feature
        temp_features = remaining_features.copy()
        temp_features.remove(feature)
        
        # Select training and testing data with the temporary feature list
        X_train_temp = X_train_mlp[temp_features]
        X_test_temp = X_test_last_cycles_mlp[temp_features]
        
        # Train and evaluate the model
        rmse, r2, mae = train_and_evaluate_model(X_train_temp, y_train_mlp, X_test_temp, true_rul_mlp, temp_features)
        
        # Check if this feature is the worst
        if rmse < best_rmse:
            best_rmse = rmse
            worst_feature = feature
            best_r2 = r2
            best_mae = mae
    
    # Remove the worst feature
    if worst_feature is not None:
        remaining_features.remove(worst_feature)
        best_features = remaining_features.copy()
    else:
        break


In [17]:
# Print the results
print(f'Best features: {best_features}')
print(f'Best RMSE: {best_rmse}')
print(f'Best R²: {best_r2}')
print(f'Best MAE: {best_mae}')

Best features: ['altitude', 'mach_no', 'throttle_angle', 'fan_inlet_temp', 'LPC_outlet_temp', 'HPC_outlet_temp', 'LPT_outlet_temp', 'fan_inlet_pressure', 'bypass_duct_pressure', 'fan_speed', 'core_speed', 'engine_pressure_ratio', 'HPC_outlet_static_pressure', 'fuel_ps30_ratio', 'corrected_fan_speed', 'corrected_core_speed', 'bypass_ratio', 'burner_fuel_air_ratio', 'bleed_enthalpy', 'demanded_fan_speed', 'demanded_corrected_fan_speed', 'HPT_coolant_bleed']
Best RMSE: 29.833465397488652
Best R²: 0.6922590532470386
Best MAE: 21.568424851907714
