# XGBoost

***XGBoost provides binary packages for some language bindings. The binary packages support the GPU algorithm (device=cuda:0) on machines with NVIDIA GPUs. Please note that training with multiple GPUs is only supported for Linux platform.***

# pip install xgboost

In [1]:
pip install xgboost

Collecting xgboost
  Downloading xgboost-2.0.3-py3-none-win_amd64.whl (99.8 MB)
     ---------------------------------------- 99.8/99.8 MB 5.8 MB/s eta 0:00:00
Installing collected packages: xgboost
Successfully installed xgboost-2.0.3
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.2.2 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
from xgboost import XGBRegressor

In [6]:
from sklearn.datasets import make_regression


# Generate dataset
X, y = make_regression(n_samples=1000, n_features=2, noise=10, random_state=1)

In [7]:
from sklearn.model_selection import train_test_split


# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)

In [8]:
regression = XGBRegressor()
regression.fit(X_train, y_train)

In [19]:
y_pred = regression.predict(X_test)


from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error


# Evaluate the model
print("Current model performance:")
print(f"R2 score: {r2_score(y_test, y_pred)*100}")
print(f"Mean Absolute Error: {mean_absolute_error(y_test, y_pred)*100}")
print(f"Mean Squared Error: {mean_squared_error(y_test, y_pred)*100}")

Current model performance:
R2 score: 97.75959731278184
Mean Absolute Error: 1052.7663222328606
Mean Squared Error: 18701.416712634462


# Hyperparameter Tuning

***GridSearchCV***

In [11]:
from sklearn.model_selection import GridSearchCV
from xgboost import XGBClassifier

# Define the parameter grid
param_grid = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.05, 0.1, 0.2],
    'max_depth': [3, 4, 5, 6],
}

model = XGBRegressor()


GridSearch = GridSearchCV(
    estimator=model,
    param_grid=param_grid,
    scoring='accuracy',
    cv=5,
    n_jobs=-1
)


In [12]:
GridSearch.fit(X_train , y_train )

 nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan nan
 nan nan nan nan nan nan nan nan nan nan nan nan]


In [13]:
GridSearch.best_params_

{'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 100}

In [20]:
best_model = GridSearch.best_estimator_

y_pred_tuned = best_model.predict(X_test)


# Evaluate the tuned model
print("Tuned model performance:")
print(f"R2 score: {r2_score(y_test, y_pred_tuned)*100}")
print(f"Mean Absolute Error: {mean_absolute_error(y_test, y_pred_tuned)*100}")
print(f"Mean Squared Error: {mean_squared_error(y_test, y_pred_tuned)*100}")

Tuned model performance:
R2 score: 80.40104606282476
Mean Absolute Error: 3084.0058943129475
Mean Squared Error: 163599.2524031237
