In [None]:
import numpy as np
import pandas as pd
from xgboost import XGBRegressor

# Data Train Process

In [None]:
df = pd.read_csv('CMAPSSData/train_FD001.csv')
df

In [None]:
def piecewise_linear_rul(df, rul_max=128):
    def compute_rul(cycles):
        max_cycle = cycles.max()
        rul = max_cycle - cycles
        return np.where(rul > rul_max, rul_max, rul)

    df['RUL'] = df.groupby('unit_number')['cycles'].transform(compute_rul)
    return df


def natural_rul(df):
    df['RUL'] = df.groupby('unit_number')['cycles'].transform(
        lambda x: x.max() - x
    )
    return df

piecewise_linear_rul(df)
# natural_rul(df)

In [None]:
X_xgb = df.drop(columns=['unit_number', 'cycles', 'RUL'])
y_xgb = df['RUL'] + 1

print(np.isnan(X_xgb).any()) 
print(np.isnan(y_xgb).any()) 

In [None]:
from sklearn.model_selection import GridSearchCV

# Inisialisasi model XGBoost
xgb = XGBRegressor(
    booster='gbtree',
    objective='reg:gamma',
    gamma=0.1,   
    reg_lambda=3,
    subsample=0.7,
)

# Range hyperparameter yang diuji
param_grid = {
    'n_estimators': [50, 250, 400, 500],
    'max_depth': [3, 5, 7, 9, 13, 15, 17, 20],
    'learning_rate': [0.001, 0.05, 0.1, 0.2, 0.3],
    'min_child_weight': [1,3, 5, 7, 10]
}

# Grid search
grid_search = GridSearchCV(estimator=xgb, param_grid=param_grid, cv=3, scoring='neg_mean_squared_error', verbose=2)
grid_search.fit(X_xgb, y_xgb)

# Hasil hyperparameter terbaik
best_params = grid_search.best_params_
print("Best Parameters:", best_params)

# Latih model XGBoost dengan parameter terbaik
xgb_model = XGBRegressor(**best_params)
xgb_model.fit(X_xgb, y_xgb)

In [None]:
features = [col for col in df.columns if col not in ['unit_number', 'cycles', 'RUL']]

In [None]:
# feature_importances = xgb_model.feature_importances_
# # Pilih fitur dengan importance > threshold
# threshold = 0.02  # Sesuaikan threshold sesuai kebutuhan
# selected_features = np.array(features)[feature_importances > threshold]

# print("Fitur yang terpilih:", selected_features)

# Urutkan fitur berdasarkan nilai feature importance
sorted_indices = np.argsort(xgb_model.feature_importances_)[::-1]  # Urutkan dari terbesar ke terkecil
top_n = 12  # Jumlah fitur terbaik yang diinginkan

# Ambil fitur terbaik
selected_features = np.array(features)[sorted_indices[:top_n]]

print("Fitur yang terpilih:", selected_features)
