In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from scipy import stats
import numpy as np

# Load training and test data
train_data = pd.read_csv('/content/drive/MyDrive/CMAPSSData/train_FD002.txt', delim_whitespace=True, header=None)
test_data = pd.read_csv('/content/drive/MyDrive/CMAPSSData/test_FD002.txt', delim_whitespace=True, header=None)
rul_data = pd.read_csv('/content/drive/MyDrive/CMAPSSData/RUL_FD002.txt', delim_whitespace=True, header=None)

In [2]:
# Assign column names
column_names = ["engine_id", "time_in_cycles", "altitude", "mach_no", "throttle_angle", "fan_inlet_temp", "LPC_outlet_temp", "HPC_outlet_temp", "LPT_outlet_temp", "fan_inlet_pressure", "bypass_duct_pressure", "HPC_outlet_pressure", "fan_speed", "core_speed", "engine_pressure_ratio", "HPC_outlet_static_pressure", "fuel_ps30_ratio", "corrected_fan_speed", "corrected_core_speed", "bypass_ratio", "burner_fuel_air_ratio", "bleed_enthalpy", "demanded_fan_speed", "demanded_corrected_fan_speed", "HPT_coolant_bleed", "LPT_coolant_bleed"]
train_data.columns = column_names
test_data.columns = column_names

In [3]:
train_data.columns

Index(['engine_id', 'time_in_cycles', 'altitude', 'mach_no', 'throttle_angle',
       'fan_inlet_temp', 'LPC_outlet_temp', 'HPC_outlet_temp',
       'LPT_outlet_temp', 'fan_inlet_pressure', 'bypass_duct_pressure',
       'HPC_outlet_pressure', 'fan_speed', 'core_speed',
       'engine_pressure_ratio', 'HPC_outlet_static_pressure',
       'fuel_ps30_ratio', 'corrected_fan_speed', 'corrected_core_speed',
       'bypass_ratio', 'burner_fuel_air_ratio', 'bleed_enthalpy',
       'demanded_fan_speed', 'demanded_corrected_fan_speed',
       'HPT_coolant_bleed', 'LPT_coolant_bleed'],
      dtype='object')

In [4]:
test_data.columns

Index(['engine_id', 'time_in_cycles', 'altitude', 'mach_no', 'throttle_angle',
       'fan_inlet_temp', 'LPC_outlet_temp', 'HPC_outlet_temp',
       'LPT_outlet_temp', 'fan_inlet_pressure', 'bypass_duct_pressure',
       'HPC_outlet_pressure', 'fan_speed', 'core_speed',
       'engine_pressure_ratio', 'HPC_outlet_static_pressure',
       'fuel_ps30_ratio', 'corrected_fan_speed', 'corrected_core_speed',
       'bypass_ratio', 'burner_fuel_air_ratio', 'bleed_enthalpy',
       'demanded_fan_speed', 'demanded_corrected_fan_speed',
       'HPT_coolant_bleed', 'LPT_coolant_bleed'],
      dtype='object')

In [5]:
# Calculate RUL for training data
train_data['RUL'] = train_data.groupby('engine_id')['time_in_cycles'].transform(max) - train_data['time_in_cycles']

In [6]:
# Calculate Z-scores for sensor measurements and operational settings
z_scores = np.abs(stats.zscore(train_data.iloc[:, 2:-1]))

# Set a threshold for Z-score
threshold = 3

# Identify outliers
outliers = (z_scores > threshold).any(axis=1)

# Drop outliers
train_data_cleaned = train_data[~outliers]

In [7]:
# Calculate Z-scores for test data
z_scores_test = np.abs(stats.zscore(test_data.iloc[:, 2:]))

# Identify outliers in test data
outliers_test = (z_scores_test > threshold).any(axis=1)

# Drop outliers from test data
test_data_cleaned = test_data[~outliers_test]

In [8]:
# Normalize the data
scaler = StandardScaler()
train_data_cleaned.iloc[:, 2:-1] = scaler.fit_transform(train_data_cleaned.iloc[:, 2:-1])

In [9]:
# Separate features and target variable
X = train_data_cleaned.iloc[:, 2:-1]
y = train_data_cleaned['RUL']

In [11]:
import statsmodels.api as sm

# Add constant term for intercept
X = sm.add_constant(X)

# Perform backward feature elimination
def backward_elimination(X, y, significance_level=0.05):
    num_vars = len(X.columns)
    for i in range(num_vars):
        model = sm.OLS(y, X).fit()
        max_p_value = max(model.pvalues)
        if max_p_value > significance_level:
            excluded_feature = model.pvalues.idxmax()
            X = X.drop(columns=[excluded_feature])
        else:
            break
    return X.columns

# Apply backward elimination
selected_features = backward_elimination(X, y)
print("Selected features:", selected_features)


Selected features: Index(['const', 'altitude', 'throttle_angle', 'fan_inlet_temp',
       'LPC_outlet_temp', 'HPC_outlet_temp', 'LPT_outlet_temp',
       'fan_inlet_pressure', 'bypass_duct_pressure', 'HPC_outlet_pressure',
       'core_speed', 'engine_pressure_ratio', 'HPC_outlet_static_pressure',
       'fuel_ps30_ratio', 'corrected_core_speed', 'bypass_ratio',
       'bleed_enthalpy', 'demanded_fan_speed', 'demanded_corrected_fan_speed',
       'HPT_coolant_bleed', 'LPT_coolant_bleed'],
      dtype='object')


In [12]:
len(selected_features)

21

In [15]:
selected_features.shape

(20,)

In [14]:
selected_features = selected_features.drop('const')

# LR

In [16]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import numpy as np

# # Selected features
# selected_features = ['altitude', 'throttle_angle', 'fan_inlet_temp', 'LPC_outlet_temp', 'HPC_outlet_temp',
#                      'LPT_outlet_temp', 'fan_inlet_pressure', 'bypass_duct_pressure', 'HPC_outlet_pressure',
#                      'core_speed', 'engine_pressure_ratio', 'HPC_outlet_static_pressure', 'fuel_ps30_ratio',
#                      'corrected_core_speed', 'bypass_ratio', 'bleed_enthalpy', 'demanded_fan_speed',
#                      'demanded_corrected_fan_speed', 'HPT_coolant_bleed', 'LPT_coolant_bleed']

# Select features and target
X_train_lr = train_data_cleaned[selected_features]
y_train_lr = train_data_cleaned['RUL']

# Train the Linear Regression model
model_lr = LinearRegression()
model_lr.fit(X_train_lr, y_train_lr)

# Extract the last cycle for each engine in the test set
last_cycle_indices_lr = test_data_cleaned.groupby('engine_id')['time_in_cycles'].idxmax()
X_test_last_cycles_lr = test_data_cleaned.loc[last_cycle_indices_lr, selected_features]

# Make predictions for the last cycles of each engine
y_pred_last_cycles_lr = model_lr.predict(X_test_last_cycles_lr)

# True RUL values from the provided RUL file
true_rul_lr = rul_data.values.flatten()

# Calculate RMSE, MAE, and R² Score
mse_lr = mean_squared_error(true_rul_lr, y_pred_last_cycles_lr)
rmse_lr = np.sqrt(mse_lr)
r2_lr = r2_score(true_rul_lr, y_pred_last_cycles_lr)
mae_lr = mean_absolute_error(true_rul_lr, y_pred_last_cycles_lr)

print(f'Linear Regression - Root Mean Squared Error (RMSE): {rmse_lr}')
print(f'Linear Regression - R² Score: {r2_lr}')
print(f'Linear Regression - Mean Absolute Error (MAE): {mae_lr}')


Linear Regression - Root Mean Squared Error (RMSE): 20794399.51227535
Linear Regression - R² Score: -149510139442.38278
Linear Regression - Mean Absolute Error (MAE): 20750063.558615312


# RF

In [12]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import numpy as np

# Select features and target
X_train_rf = train_data_cleaned[selected_features]
y_train_rf = train_data_cleaned['RUL']

# Train the Random Forest Regressor model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train_rf, y_train_rf)

# Extract the last cycle for each engine in the test set
last_cycle_indices_rf = test_data_cleaned.groupby('engine_id')['time_in_cycles'].idxmax()
X_test_last_cycles_rf = test_data_cleaned.loc[last_cycle_indices_rf, selected_features]

# Make predictions for the last cycles of each engine
y_pred_last_cycles_rf = rf_model.predict(X_test_last_cycles_rf)

# True RUL values from the provided RUL file
true_rul_rf = rul_data.values.flatten()

# Calculate RMSE, MAE, and R² Score
mse_rf = mean_squared_error(true_rul_rf, y_pred_last_cycles_rf)
rmse_rf = np.sqrt(mse_rf)
r2_rf = r2_score(true_rul_rf, y_pred_last_cycles_rf)
mae_rf = mean_absolute_error(true_rul_rf, y_pred_last_cycles_rf)

print(f'Random Forest - Root Mean Squared Error (RMSE): {rmse_rf}')
print(f'Random Forest - R² Score: {r2_rf}')
print(f'Random Forest - Mean Absolute Error (MAE): {mae_rf}')


Random Forest - Root Mean Squared Error (RMSE): 76.09468617767449
Random Forest - R² Score: -1.0021035696130807
Random Forest - Mean Absolute Error (MAE): 60.53030888030887


# SVR

In [None]:
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import numpy as np

# Select features and target
X_train_svr = train_data_cleaned[selected_features]
y_train_svr = train_data_cleaned['RUL']

# Train the SVR model
svr_model = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=0.1)
svr_model.fit(X_train_svr, y_train_svr)

# Extract the last cycle for each engine in the test set
last_cycle_indices_svr = test_data_cleaned.groupby('engine_id')['time_in_cycles'].idxmax()
X_test_last_cycles_svr = test_data_cleaned.loc[last_cycle_indices_svr, selected_features]

# Make predictions for the last cycles of each engine
y_pred_last_cycles_svr = svr_model.predict(X_test_last_cycles_svr)

# True RUL values from the provided RUL file
true_rul_svr = rul_data.values.flatten()

# Calculate metrics for the training set
y_train_pred_svr = svr_model.predict(X_train_svr)
train_mse_svr = mean_squared_error(y_train_svr, y_train_pred_svr)
train_rmse_svr = np.sqrt(train_mse_svr)
train_mae_svr = mean_absolute_error(y_train_svr, y_train_pred_svr)
train_r2_svr = r2_score(y_train_svr, y_train_pred_svr)

# Calculate metrics for the test set
test_mse_svr = mean_squared_error(true_rul_svr, y_pred_last_cycles_svr)
test_rmse_svr = np.sqrt(test_mse_svr)
test_mae_svr = mean_absolute_error(true_rul_svr, y_pred_last_cycles_svr)
test_r2_svr = r2_score(true_rul_svr, y_pred_last_cycles_svr)

# Print training metrics
print(f'Training Root Mean Squared Error (RMSE): {train_rmse_svr}')
print(f'Training Mean Absolute Error (MAE): {train_mae_svr}')
print(f'Training R² Score: {train_r2_svr}')

# Print testing metrics
print(f'Testing Root Mean Squared Error (RMSE): {test_rmse_svr}')
print(f'Testing Mean Absolute Error (MAE): {test_mae_svr}')
print(f'Testing R² Score: {test_r2_svr}')


# XGB

In [None]:
import xgboost as xgb
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

# Select features and target
X_train_xgb = train_data_cleaned[selected_features]
y_train_xgb = train_data_cleaned['RUL']

# Train the XGBoost model
model_xgb = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100, max_depth=5, learning_rate=0.1)
model_xgb.fit(X_train_xgb, y_train_xgb)

# Extract the last cycle for each engine in the test set
last_cycle_indices_xgb = test_data_cleaned.groupby('engine_id')['time_in_cycles'].idxmax()
X_test_last_cycles_xgb = test_data_cleaned.loc[last_cycle_indices_xgb, selected_features]

# Make predictions for the last cycles of each engine
y_pred_last_cycles_xgb = model_xgb.predict(X_test_last_cycles_xgb)

# True RUL values from the provided RUL file
true_rul_xgb = rul_data.values.flatten()

# Calculate RMSE, MAE, and R² Score
mse_xgb = mean_squared_error(true_rul_xgb, y_pred_last_cycles_xgb)
rmse_xgb = np.sqrt(mse_xgb)
r2_xgb = r2_score(true_rul_xgb, y_pred_last_cycles_xgb)
mae_xgb = mean_absolute_error(true_rul_xgb, y_pred_last_cycles_xgb)

print(f'XGBoost - Root Mean Squared Error (RMSE): {rmse_xgb}')
print(f'XGBoost - R² Score: {r2_xgb}')
print(f'XGBoost - Mean Absolute Error (MAE): {mae_xgb}')
