# HyperXGBoost Regression Test

In [1]:
from HyperXgboost import HyperXGBoost
import seaborn as sns
import pandas as pd
pd.options.plotting.backend = "plotly"
import plotly.express as px

In [2]:
# load dataset
data = sns.load_dataset('diamonds')
X = data.drop('price',axis=1)
y = data['price']

# categorical encodings
for col in X.select_dtypes('category').columns:
    X[col] = X[col].cat.codes

print(f"Shape of X: {X.shape}")
print(f"Shape of y: {y.shape}")

Shape of X: (53940, 9)
Shape of y: (53940,)


In [3]:
# load model
m = HyperXGBoost(n_jobs = 1)

# Create Training and Testing Dataset
X_train, X_test, y_train, y_test = m.train_test_split_Xy(X,y, test_size = 0.2)

# Run Fit and Predict methode
study = m.fit(X_train, X_test, y_train, y_test, study_n_trials = 10)
y_pred = m.predict().tolist()


[32m[I 2023-03-31 15:17:09,027][0m A new study created in memory with name: regression[0m
[32m[I 2023-03-31 15:17:52,748][0m Trial 0 finished with value: 1270950.8692760346 and parameters: {'max_depth': 14, 'learning_rate': 0.01, 'n_estimators': 2000, 'min_child_weight': 2, 'gamma': 0.45901254214736387, 'subsample': 0.8, 'colsample_bytree': 0.19585248486435614, 'reg_alpha': 5.2062546936201155, 'reg_lambda': 4.8629809170875244}. Best is trial 0 with value: 1270950.8692760346.[0m
[32m[I 2023-03-31 15:17:57,522][0m Trial 1 finished with value: 1788167.3760298346 and parameters: {'max_depth': 8, 'learning_rate': 0.01, 'n_estimators': 200, 'min_child_weight': 6, 'gamma': 0.8489047225092643, 'subsample': 0.5, 'colsample_bytree': 0.41447424605440614, 'reg_alpha': 1.3497089769410424, 'reg_lambda': 0.6537435874758492}. Best is trial 0 with value: 1270950.8692760346.[0m
[32m[I 2023-03-31 15:18:00,081][0m Trial 2 finished with value: 1196751.151598272 and parameters: {'max_depth': 27, 

MAE: 278.13372816595717
MSE: 313247.67403304647
RMSE: 559.6853348382879
R2_Score: 0.9810628541510956


In [4]:
# show predictions
pred_df = pd.DataFrame(y_test).assign(y_pred = y_pred).reset_index(drop=True)
pred_df.head()

Unnamed: 0,price,y_pred
0,589,621.591248
1,3881,4044.619629
2,4987,4511.502441
3,628,621.650208
4,525,555.529846


In [5]:
# Get Accuracy Metrics
acc_metrics = m.get_metrics_summary()
acc_metrics

Unnamed: 0_level_0,Score
Accuracy_Metrics,Unnamed: 1_level_1
MAE,278.133728
MSE,313247.674033
RMSE,559.685335
R2_Score,0.981063


In [6]:
# best model parameters
m.best_params_

{'max_depth': 9,
 'learning_rate': 0.05,
 'n_estimators': 5000,
 'min_child_weight': 9,
 'gamma': 0.470942193041601,
 'subsample': 0.7,
 'colsample_bytree': 0.8256552984672079,
 'reg_alpha': 8.72693353229497,
 'reg_lambda': 6.888360525544349}

# Visualisations

In [7]:
# plot predictions
pred_df.plot()

In [8]:
m.plot_optimization_history()

In [9]:
m.plot_parameter_importance()

In [10]:
m.plot_parameter_relationships()