# Import Libraries

In [2]:
import pandas as pd 
import numpy as np 
import xgboost as xgb
import seaborn as sns
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
color_pal = sns.color_palette()
plt.style.use('fivethirtyeight')
from sklearn.model_selection import train_test_split
from sklearn.metrics import balanced_accuracy_score, roc_auc_score, make_scorer
from sklearn.model_selection import GridSearchCV 
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.pipeline import Pipeline
from skopt import BayesSearchCV
from skopt.space import Real, Integer
from xgboost import XGBRegressor
import pickle
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Model

In [3]:
directory = '../data/'

Mounted at /content/drive


In [4]:
with open(directory + 'data_split.pkl', 'rb') as f:
    X_train, X_test, y_train, y_test = pickle.load(f)

In [5]:
estimators = [
    ('reg', XGBRegressor(random_state=8)) 
]
pipe = Pipeline(steps=estimators)

In [6]:
search_space = {
    'reg__max_depth': Integer(2, 8),
    'reg__learning_rate': Real(0.001, 1.0, prior='log-uniform'),
    'reg__subsample': Real(0.5, 1.0),
    'reg__colsample_bytree': Real(0.5, 1.0),
    'reg__colsample_bylevel': Real(0.5, 1.0),
    'reg__colsample_bynode' : Real(0.5, 1.0),
    'reg__reg_alpha': Real(0.0, 10.0),
    'reg__reg_lambda': Real(0.0, 10.0),
    'reg__gamma': Real(0.0, 10.0)
}

In [7]:
opt = BayesSearchCV(pipe, search_space, cv=3, n_iter=10, scoring='neg_mean_squared_error', random_state=8)
opt.fit(X_train, y_train)

In [8]:
y_pred = opt.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("MSE:", mse)
print("MAE:", mae)
print("R2:", r2)

MSE: 0.14505752909269076
MAE: 0.05906721399910874
R2: 0.8465763265909905


# Export

In [9]:
results = {
    'model': opt,
    'y_pred': y_pred,
    'mse': mse,
    'mae': mae,
    'r2': r2
}

with open(directory + 'xgboost_results.pkl', 'wb') as file:
    pickle.dump(results, file)