# HyperXGBoost Regression Test

In [9]:
from HyperXgboost import HyperXGBoost
import seaborn as sns
import pandas as pd
pd.options.plotting.backend = "plotly"
import plotly.express as px

In [10]:
# load dataset
data = sns.load_dataset('diamonds')
X = data.drop('price',axis=1)
y = data['price']

# categorical encodings
for col in X.select_dtypes('category').columns:
    X[col] = X[col].cat.codes

print(f"Shape of X: {X.shape}")
print(f"Shape of y: {y.shape}")

Shape of X: (53940, 9)
Shape of y: (53940,)


In [11]:
# load model
m = HyperXGBoost(n_jobs = 4)

# Create Training and Testing Dataset
X_train, X_test, y_train, y_test = m.train_test_split_Xy(X,y, test_size = 0.2)

# Run Fit and Predict methode
study = m.fit(X_train, X_test, y_train, y_test, study_n_trials = 10)
y_pred = m.predict().tolist()


[32m[I 2023-03-31 14:31:34,868][0m A new study created in memory with name: regression[0m
[32m[I 2023-03-31 14:31:39,574][0m Trial 0 finished with value: 417587.7840993264 and parameters: {'max_depth': 6, 'learning_rate': 0.5139183356509007, 'n_estimators': 866, 'min_child_weight': 6, 'gamma': 0.3580646137726451, 'subsample': 0.8, 'colsample_bytree': 0.2789200039602764, 'reg_alpha': 4.430478002769136, 'reg_lambda': 9.422434074050956}. Best is trial 0 with value: 417587.7840993264.[0m
[32m[I 2023-03-31 14:31:47,553][0m Trial 1 finished with value: 425211.91304720065 and parameters: {'max_depth': 14, 'learning_rate': 0.4789244307008706, 'n_estimators': 192, 'min_child_weight': 7, 'gamma': 0.815563918589199, 'subsample': 0.6, 'colsample_bytree': 0.450020341579021, 'reg_alpha': 9.34665438453678, 'reg_lambda': 5.879921495839738}. Best is trial 0 with value: 417587.7840993264.[0m
[32m[I 2023-03-31 14:31:51,365][0m Trial 2 finished with value: 329140.80860256986 and parameters: {'m

MAE: 289.40478269232614
MSE: 329655.0852262899
RMSE: 574.1559763916857
R2_Score: 0.9794654329127093


In [12]:
# show predictions
pred_df = pd.DataFrame(y_test).assign(y_pred = y_pred).reset_index(drop=True)
pred_df.head()

Unnamed: 0,price,y_pred
0,12821,12229.385742
1,945,889.8172
2,4466,5156.023926
3,5937,6035.296387
4,1273,1530.565796


In [13]:
# Get Accuracy Metrics
acc_metrics = m.get_metrics_summary()
acc_metrics

Unnamed: 0_level_0,Score
Accuracy_Metrics,Unnamed: 1_level_1
MAE,289.404783
MSE,329655.085226
RMSE,574.155976
R2_Score,0.979465


In [14]:
# best model parameters
m.best_params_

{'max_depth': 15,
 'learning_rate': 0.3334798790066276,
 'n_estimators': 927,
 'min_child_weight': 2,
 'gamma': 0.9246380541083181,
 'subsample': 0.7,
 'colsample_bytree': 0.9104978330233338,
 'reg_alpha': 7.787471877695388,
 'reg_lambda': 7.155331145306044}

In [15]:
# plot predictions
pred_df.plot()