In [None]:
import os
import sys
from pathlib import Path

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.mplot3d import Axes3D

from sklearn.datasets import make_regression
from sklearn.model_selection import (
    KFold, cross_val_score, train_test_split,
    GridSearchCV
)
import xgboost as xgb

%matplotlib ipympl
# %config InlineBackend.figure_format = 'retina'
# sns.set()

### Generate regression data

In [None]:
X, y, coef = make_regression(
    n_samples=100,
    n_features=2,
    n_informative=2,
    n_targets=1,
    noise=0.1,
    coef=True # return the coef of the underlying linear model
)
coef

In [None]:
fig=plt.figure()
ax = fig.add_subplot(111, projection='3d')

cols = 'r'

ax.scatter(
    X[:, 0], X[:, 1], y,
    facecolor=(0,0,1,0), s=10, c=X[:, 1], cmap=plt.cm.jet
)

---

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=17
)

In [None]:
params = {
    'n_estimators': 20,
    'max_depth': 4,
    'learning_rate': 0.2,
    'colsample_bytree': 1,
    'objective': 'reg:squarederror'
}
model = xgb.XGBRegressor(**params)

scores = cross_val_score(
    model, X_train, y_train, 
    scoring='r2',
    cv=KFold(n_splits=5)
)
print(np.abs(scores))

model.fit(X_train, y_train)
rmse = np.sqrt(np.mean((model.predict(X_test) - y_test) ** 2))
r2 = model.score(X_test, y_test)
print(f"{rmse = }")
print(f"{r2 = }")

In [None]:
params_base = {
    'colsample_bytree': 1,
    'objective': 'reg:squarederror',
    'max_depth': 1,
}
model = xgb.XGBRegressor(**params_base)

param_grid = {
    # 'max_depth': [1, 4, 10],
    'learning_rate': [0.05, 0.2, 0.5, 0.8],
    'n_estimators': [1, 2, 4]
}
clf = GridSearchCV(model, param_grid, cv=5)
clf.fit(X_train, y_train)

In [None]:
scores = clf.cv_results_['mean_test_score'].reshape(4, 3)

sns.heatmap(scores)
plt.grid(False)
plt.xlabel('lr')
plt.ylabel('n_est')
plt.xticks(np.arange(4), labels=param_grid['learning_rate'])
plt.yticks(np.arange(3), labels=param_grid['n_estimators'])
pass

In [None]:
i = 6
m = clf.cv_results_['mean_test_score'][i]

np.mean([clf.cv_results_[f"split{k}_test_score"][i] for k in range(5)]), m