In [37]:
import numpy as np
import pandas as pd
from sklearn.multioutput import MultiOutputRegressor
from sklearn.model_selection import cross_val_score
import xgboost as xgb

In [38]:
data = pd.read_csv('dataset.csv')
data

Unnamed: 0,bf,tf,hw,tw,a,Ss,fyf,fuf,fyw,fuw,Pu,du
0,310,15,1840,14,5130,880,415,520,380,485,1417.299072,54.128886
1,190,15,1150,14,2630,590,290,415,345,450,1436.082764,22.649866
2,150,13,900,12,2100,210,415,520,415,520,988.543457,21.581579
3,250,17,1510,12,4240,1530,450,550,345,450,1635.945557,47.000324
4,220,17,1290,9,2670,960,415,520,415,520,1391.092896,38.423642
...,...,...,...,...,...,...,...,...,...,...,...,...
495,120,13,750,11,1190,490,450,550,380,485,1447.612183,13.472869
496,150,18,910,5,2560,1250,345,450,290,415,465.841492,24.202325
497,150,14,900,13,2060,1020,380,485,290,415,1758.202881,16.323189
498,120,8,710,8,1760,640,345,450,380,485,576.082519,17.450336


In [39]:
# Get input and output
X = data.iloc[:, :-2]
y = data.iloc[:, -2:]
y

Unnamed: 0,Pu,du
0,1417.299072,54.128886
1,1436.082764,22.649866
2,988.543457,21.581579
3,1635.945557,47.000324
4,1391.092896,38.423642
...,...,...
495,1447.612183,13.472869
496,465.841492,24.202325
497,1758.202881,16.323189
498,576.082519,17.450336


In [40]:
# Train-test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,  test_size=0.15, random_state=42)

In [41]:
# Normalize data
from sklearn.preprocessing import MinMaxScaler
scaler_x = MinMaxScaler()
scaler_y = MinMaxScaler()
X_train = scaler_x.fit_transform(X_train)
X_test = scaler_x.transform(X_test)
y_train = scaler_y.fit_transform(y_train)
y_test = scaler_y.transform(y_test)

In [42]:
X_test

array([[0.95652174, 0.73333333, 0.95804196, 0.4       , 0.66393443,
        0.49361702, 0.        , 0.        , 0.34375   , 0.25925926],
       [0.65217391, 0.8       , 0.66433566, 1.        , 0.28688525,
        0.15319149, 0.5625    , 0.51851852, 0.        , 0.        ],
       [0.65217391, 0.53333333, 0.65734266, 0.2       , 0.33811475,
        0.14468085, 0.5625    , 0.51851852, 0.78125   , 0.77777778],
       [0.91304348, 0.86666667, 0.8951049 , 0.6       , 0.33811475,
        0.22978723, 1.        , 1.        , 1.        , 1.        ],
       [0.08695652, 0.4       , 0.1048951 , 0.2       , 0.19467213,
        0.14042553, 0.5625    , 0.51851852, 0.        , 0.        ],
       [0.86956522, 0.93333333, 0.85314685, 0.7       , 0.7192623 ,
        0.55319149, 0.34375   , 0.25925926, 0.78125   , 0.77777778],
       [0.7826087 , 1.        , 0.77622378, 0.5       , 0.29303279,
        0.28085106, 0.78125   , 0.77777778, 0.34375   , 0.25925926],
       [0.69565217, 0.46666667, 0.7062937

In [43]:
# !pip install mealpy

In [44]:
from mealpy import FloatVar, DE
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from xgboost import XGBRegressor
from sklearn.metrics import root_mean_squared_error, mean_absolute_error, r2_score
import joblib

def objective_func(x):
    n_estimators = int(x[0])
    max_depth = int(x[1])
    learning_rate = x[2]

    model = MultiOutputRegressor(XGBRegressor(n_estimators=n_estimators, max_depth=max_depth, learning_rate=learning_rate, random_state=42))

    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    cv_scores_rmse = cross_val_score(model, X_train, y_train, cv=kf, scoring='neg_root_mean_squared_error')
    return -cv_scores_rmse.mean()

lb = [10, 1, 0.01]
ub = [200, 10, 0.3]
problem_dict = {
    "obj_func": objective_func,
    "bounds": FloatVar(lb=lb, ub=ub),
    "minmax": "min",
    "save_population": True
}
term_dict = {"max_early_stop": 50}

optimizer = DE.OriginalDE(epoch=20, pop_size=5)
optimizer.solve(problem_dict, termination=term_dict)
print(optimizer.g_best.solution)

# Cài đặt các tham số sau khi đã tối ưu
optimal_model = XGBRegressor(
    n_estimators=int(optimizer.g_best.solution[0]),
    max_depth=int(optimizer.g_best.solution[1]),
    learning_rate=optimizer.g_best.solution[2],
    random_state=42
)
optimal_model.fit(X_train, y_train)

joblib.dump(scaler_x, 'scaler_x')
joblib.dump(scaler_y, 'scaler_y')
joblib.dump(optimal_model, 'XGB-DE')

print(optimizer.g_best.target.fitness)
best_solution = np.array(optimizer.g_best.solution)
np.savetxt('best_solution_xgb.csv', best_solution, delimiter=',', comments='')
print(best_solution)



INFO:mealpy.evolutionary_based.DE.OriginalDE:Solving single objective optimization problem.
INFO:mealpy.evolutionary_based.DE.OriginalDE:>>>Problem: P, Epoch: 1, Current best: 0.0437303486895619, Global best: 0.0437303486895619, Runtime: 6.76881 seconds
INFO:mealpy.evolutionary_based.DE.OriginalDE:>>>Problem: P, Epoch: 2, Current best: 0.04369079638071912, Global best: 0.04369079638071912, Runtime: 3.39928 seconds
INFO:mealpy.evolutionary_based.DE.OriginalDE:>>>Problem: P, Epoch: 3, Current best: 0.04369079638071912, Global best: 0.04369079638071912, Runtime: 2.78742 seconds
INFO:mealpy.evolutionary_based.DE.OriginalDE:>>>Problem: P, Epoch: 4, Current best: 0.04347386108789762, Global best: 0.04347386108789762, Runtime: 5.67623 seconds
INFO:mealpy.evolutionary_based.DE.OriginalDE:>>>Problem: P, Epoch: 5, Current best: 0.04347386108789762, Global best: 0.04347386108789762, Runtime: 3.54384 seconds
INFO:mealpy.evolutionary_based.DE.OriginalDE:>>>Problem: P, Epoch: 6, Current best: 0.0434

[168.36906679   4.64296141   0.21779446]
0.04345334375569536
[168.36906679   4.64296141   0.21779446]


In [45]:
from sklearn.metrics import root_mean_squared_error, mean_absolute_error, r2_score
# Load lại scale và model đã huấn luyện
scaler_x = joblib.load('scaler_x')
scaler_x = joblib.load('scaler_y')
model = joblib.load('XGB-DE')

y_pred_train = model.predict(X_train)
y_pred_test = model.predict(X_test)

# Đánh giá bằng metric
rmse_train = root_mean_squared_error(y_train, y_pred_train)
mae_train = mean_absolute_error(y_train, y_pred_train)
r2_train = r2_score(y_train, y_pred_train)
print("Train:")
print("Root Mean Squared Error:", rmse_train)
print("Mean Absolute Error:", mae_train)
print("R-squared:", r2_train)

rmse_test = root_mean_squared_error(y_test, y_pred_test)
mae_test = mean_absolute_error(y_test, y_pred_test)
r2_test = r2_score(y_test, y_pred_test)
print("Test:")
print("Root Mean Squared Error:", rmse_test)
print("Mean Absolute Error:", mae_test)
print("R-squared:", r2_test)

Train:
Root Mean Squared Error: 0.0029314904954243857
Mean Absolute Error: 0.002084867314452466
R-squared: 0.9997785652465115
Test:
Root Mean Squared Error: 0.04371379082856712
Mean Absolute Error: 0.032995507659804885
R-squared: 0.9470757082644914
