In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# DATASET INITIALIZATION

In [2]:
import pandas as pd
import numpy as np

# Load training and test data
trains = pd.read_csv('/content/drive/MyDrive/CMAPSSData/train_FD002.txt', delim_whitespace=True, header=None)
tests = pd.read_csv('/content/drive/MyDrive/CMAPSSData/test_FD002.txt', delim_whitespace=True, header=None)
rul_data = pd.read_csv('/content/drive/MyDrive/CMAPSSData/RUL_FD002.txt', delim_whitespace=True, header=None)

In [3]:
column_names = ["engine_id" , "time_in_cycles" , "altitude" , "mach_no" , "throttle_angle" , "fan_inlet_temp" , "LPC_outlet_temp" , "HPC_outlet_temp" , "LPT_outlet_temp" , "fan_inlet_pressure" , "bypass_duct_pressure" , "HPC_outlet_pressure" , "fan_speed" , "core_speed" , "engine_pressure_ratio" , "HPC_outlet_static_pressure" , "fuel_ps30_ratio" , "corrected_fan_speed" , "corrected_core_speed" , "bypass_ratio" , "burner_fuel_air_ratio" , "bleed_enthalpy" , "demanded_fan_speed" , "demanded_corrected_fan_speed" , "HPT_coolant_bleed" , "LPT_coolant_bleed"]
trains.columns = column_names
tests.columns = column_names

In [4]:
trains['RUL'] = trains.groupby('engine_id')['time_in_cycles'].transform(max) - trains['time_in_cycles']

# SCALING

In [5]:
from sklearn.preprocessing import StandardScaler

# Normalize the data
scaler = StandardScaler()
trains.iloc[:, 2:-1] = scaler.fit_transform(trains.iloc[:, 2:-1])
tests.iloc[:, 2:] = scaler.transform(tests.iloc[:, 2:])

# LINEAR REGRESSION

In [6]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import numpy as np

# Select features and target
features = trains.columns[2:-1]
X_train_lr = trains[features]
y_train_lr = trains['RUL']

# Train the Linear Regression model
model_lr = LinearRegression()
model_lr.fit(X_train_lr, y_train_lr)

# Extract the last cycle for each engine in the test set
last_cycle_indices_lr = tests.groupby('engine_id')['time_in_cycles'].idxmax()
X_test_last_cycles_lr = tests.loc[last_cycle_indices_lr, features]

# Make predictions for the last cycles of each engine
y_pred_last_cycles_lr = model_lr.predict(X_test_last_cycles_lr)

# True RUL values from the provided RUL file
true_rul_lr = rul_data.values.flatten()

# Calculate RMSE and R² Score
mse_lr = mean_squared_error(true_rul_lr, y_pred_last_cycles_lr)
rmse_lr = np.sqrt(mse_lr)
r2_lr = r2_score(true_rul_lr, y_pred_last_cycles_lr)
mae_lr = mean_absolute_error(true_rul_lr, y_pred_last_cycles_lr)

print(f'Linear Regression - Root Mean Squared Error (RMSE): {rmse_lr}')
print(f'Linear Regression - R² Score: {r2_lr}')
print(f'Linear Regression - Mean Absolute Error (MAE): {mae_lr}')

Linear Regression - Root Mean Squared Error (RMSE): 33.94272528221868
Linear Regression - R² Score: 0.601644059806445
Linear Regression - Mean Absolute Error (MAE): 27.54764657003154


# RANDOM FOREST

In [7]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import numpy as np

# Select features and target
features = trains.columns[2:-1]
X_train_rf = trains[features]
y_train_rf = trains['RUL']

# Train the Random Forest Regressor model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train_rf, y_train_rf)

# Extract the last cycle for each engine in the test set
last_cycle_indices_rf = tests.groupby('engine_id')['time_in_cycles'].idxmax()
X_test_last_cycles_rf = tests.loc[last_cycle_indices_rf, features]

# Make predictions for the last cycles of each engine
y_pred_last_cycles_rf = rf_model.predict(X_test_last_cycles_rf)

# True RUL values from the provided RUL file
true_rul_rf = rul_data.values.flatten()

# Calculate RMSE and R² Score
mse_rf = mean_squared_error(true_rul_rf, y_pred_last_cycles_rf)
rmse_rf = np.sqrt(mse_rf)
r2_rf = r2_score(true_rul_rf, y_pred_last_cycles_rf)
mae_rf = mean_absolute_error(true_rul_rf, y_pred_last_cycles_rf)

print(f'Random Forest - Root Mean Squared Error (RMSE): {rmse_rf}')
print(f'Random Forest - R² Score: {r2_rf}')
print(f'Random Forest - Mean Absolute Error (MAE): {mae_rf}')

Random Forest - Root Mean Squared Error (RMSE): 31.424792544844056
Random Forest - R² Score: 0.6585534586197175
Random Forest - Mean Absolute Error (MAE): 23.08945945945946


# Support Vector Regressor (SVR) model

In [8]:
from sklearn.svm import SVR

# Select features and target
features = trains.columns[2:-1]
X_train = trains[features]
y_train = trains['RUL']

# Train the SVR model
svr_model = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=0.1)
svr_model.fit(X_train, y_train)

In [9]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import numpy as np

# Extract the last cycle for each engine in the test set
last_cycle_indices = tests.groupby('engine_id')['time_in_cycles'].idxmax()
X_test_last_cycles = tests.loc[last_cycle_indices, features]

# Make predictions for the last cycles of each engine
y_pred_last_cycles = svr_model.predict(X_test_last_cycles)

# True RUL values from the provided RUL file
true_rul = rul_data.values.flatten()

# Make predictions for the training set
y_train_pred = svr_model.predict(X_train)

# Calculate metrics for the training set
train_mse = mean_squared_error(y_train, y_train_pred)
train_rmse = np.sqrt(train_mse)
train_mae = mean_absolute_error(y_train, y_train_pred)
train_r2 = r2_score(y_train, y_train_pred)

# Calculate metrics for the test set
test_mse = mean_squared_error(true_rul, y_pred_last_cycles)
test_rmse = np.sqrt(test_mse)
test_mae = mean_absolute_error(true_rul, y_pred_last_cycles)
test_r2 = r2_score(true_rul, y_pred_last_cycles)

# Print training metrics
print(f'Training Root Mean Squared Error (RMSE): {train_rmse}')
print(f'Training Mean Absolute Error (MAE): {train_mae}')
print(f'Training R² Score: {train_r2}')

# Print testing metrics
print(f'Testing Root Mean Squared Error (RMSE): {test_rmse}')
print(f'Testing Mean Absolute Error (MAE): {test_mae}')
print(f'Testing R² Score: {test_r2}')

Training Root Mean Squared Error (RMSE): 44.92737333992474
Training Mean Absolute Error (MAE): 32.58445169661466
Training R² Score: 0.5782434060302126
Testing Root Mean Squared Error (RMSE): 30.225394340028895
Testing Mean Absolute Error (MAE): 22.571016561937093
Testing R² Score: 0.6841202172428678


# XGBOOST

In [10]:
import xgboost as xgb
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

# Train the XGBoost model
model_xgb = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100, max_depth=5, learning_rate=0.1)
model_xgb.fit(X_train_lr, y_train_lr)

# Make predictions for the last cycles of each engine
y_pred_last_cycles_xgb = model_xgb.predict(X_test_last_cycles_lr)

# Calculate RMSE and R² Score
mse_xgb = mean_squared_error(true_rul_lr, y_pred_last_cycles_xgb)
rmse_xgb = np.sqrt(mse_xgb)
r2_xgb = r2_score(true_rul_lr, y_pred_last_cycles_xgb)
mae_xgb = mean_absolute_error(true_rul_lr, y_pred_last_cycles_xgb)

print(f'XGBoost - Root Mean Squared Error (RMSE): {rmse_xgb}')
print(f'XGBoost - R² Score: {r2_xgb}')
print(f'XGBoost - Mean Absolute Error (MAE): {mae_xgb}')

XGBoost - Root Mean Squared Error (RMSE): 30.766703389572502
XGBoost - R² Score: 0.6727046699529016
XGBoost - Mean Absolute Error (MAE): 23.045762931287978


# POLYNOMIAL REGRESSION

In [11]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import numpy as np

# Select features and target
features = trains.columns[2:-1]
X_train_poly = trains[features]
y_train_poly = trains['RUL']

# Generate polynomial features (degree 2 for this example)
poly = PolynomialFeatures(degree=2)
X_train_poly_transformed = poly.fit_transform(X_train_poly)

# Train the Polynomial Regression model
model_poly = LinearRegression()
model_poly.fit(X_train_poly_transformed, y_train_poly)

# Extract the last cycle for each engine in the test set
last_cycle_indices_poly = tests.groupby('engine_id')['time_in_cycles'].idxmax()
X_test_last_cycles_poly = tests.loc[last_cycle_indices_poly, features]

# Transform test data to polynomial features
X_test_last_cycles_poly_transformed = poly.transform(X_test_last_cycles_poly)

# Make predictions for the last cycles of each engine
y_pred_last_cycles_poly = model_poly.predict(X_test_last_cycles_poly_transformed)

# True RUL values from the provided RUL file
true_rul_poly = rul_data.values.flatten()

# Calculate RMSE and R² Score
mse_poly = mean_squared_error(true_rul_poly, y_pred_last_cycles_poly)
rmse_poly = np.sqrt(mse_poly)
mae_poly = mean_absolute_error(true_rul_poly, y_pred_last_cycles_poly)
r2_poly = r2_score(true_rul_poly, y_pred_last_cycles_poly)

print(f'Polynomial Regression - Root Mean Squared Error (RMSE) -> degree-2 : {rmse_poly}')
print(f'Polynomial Regression - R² Score -> degree-2 : {r2_poly}')
print(f'Polynomial Regression - Mean Absolute Error (MAE) -> degree-2 : {mae_poly}')

Polynomial Regression - Root Mean Squared Error (RMSE) -> degree-2 : 31.99948547953702
Polynomial Regression - R² Score -> degree-2 : 0.6459505940457477
Polynomial Regression - Mean Absolute Error (MAE) -> degree-2 : 24.19122580643096


In [12]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import numpy as np

# Select features and target
features3 = trains.columns[2:-1]
X_train_poly3 = trains[features3]
y_train_poly3 = trains['RUL']

# Generate polynomial features (degree 3 for this example)
poly3 = PolynomialFeatures(degree=3)
X_train_poly_transformed3 = poly3.fit_transform(X_train_poly3)

# Train the Polynomial Regression model
model_poly3 = LinearRegression()
model_poly3.fit(X_train_poly_transformed3, y_train_poly3)

# Extract the last cycle for each engine in the test set
last_cycle_indices_poly3 = tests.groupby('engine_id')['time_in_cycles'].idxmax()
X_test_last_cycles_poly3 = tests.loc[last_cycle_indices_poly3, features3]

# Transform test data to polynomial features
X_test_last_cycles_poly_transformed3 = poly3.transform(X_test_last_cycles_poly3)

# Make predictions for the last cycles of each engine
y_pred_last_cycles_poly3 = model_poly3.predict(X_test_last_cycles_poly_transformed3)

# True RUL values from the provided RUL file
true_rul_poly3 = rul_data.values.flatten()

# Calculate RMSE and R² Score
mse_poly3 = mean_squared_error(true_rul_poly3, y_pred_last_cycles_poly3)
rmse_poly3 = np.sqrt(mse_poly3)
r2_poly3 = r2_score(true_rul_poly3, y_pred_last_cycles_poly3)
mae_poly3 = mean_absolute_error(true_rul_poly3, y_pred_last_cycles_poly3)

print(f'Polynomial Regression - Root Mean Squared Error (RMSE)-> degree-3 : {rmse_poly3}')
print(f'Polynomial Regression - R² Score-> degree-3 : {r2_poly3}')
print(f'Polynomial Regression - Mean Absolute Error (MAE) -> degree-3 : {mae_poly3}')

Polynomial Regression - Root Mean Squared Error (RMSE)-> degree-3 : 31.692539760057144
Polynomial Regression - R² Score-> degree-3 : 0.652710248862608
Polynomial Regression - Mean Absolute Error (MAE) -> degree-3 : 23.932731549251955
