In [2]:
import math
import matplotlib.pylab as plt
import numpy as np 
import pandas as pd 
import xgboost as xgb

from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RandomizedSearchCV

np.random.seed(171)

In [3]:
X_train = pd.read_csv(r"C:\Users\patri\Documents\GitHub\ECS171-Project-Group-10\Data\X_train.csv", delimiter = ',', header = None)
X_test = pd.read_csv(r"C:\Users\patri\Documents\GitHub\ECS171-Project-Group-10\Data\X_test.csv", delimiter = ',', header = None)
Y_train = pd.read_csv(r"C:\Users\patri\Documents\GitHub\ECS171-Project-Group-10\Data\Y_train.csv", header = None)
Y_test = pd.read_csv(r"C:\Users\patri\Documents\GitHub\ECS171-Project-Group-10\Data\Y_test.csv", header = None)

In [4]:
import xgboost as xgb

xgb = xgb.XGBRegressor(gpu_id = 0)
xgb.fit(X_train, Y_train)
Y_pred_xgb = xgb.predict(X_test)

print("XGB Training Score:", xgb.score(X_train, Y_train))
print("XGB Testing Score:", xgb.score(X_test, Y_test))
print("XGB Training RMSE:", mean_squared_error(Y_test, Y_pred_xgb, squared = False))
print("XGB Testing RMSE:", mean_squared_error(Y_test, Y_pred_xgb, squared = False))

XGB Training Score: 0.9725841871957128
XGB Testing Score: 0.9115570903929212
XGB Training RMSE: 10.259085956608864
XGB Testing RMSE: 10.259085956608864


In [6]:
import xgboost as xgb

xgb_paper = xgb.XGBRegressor(eta = 0.02, max_depth = 16, subsample = 0.5, n_estimators = 374, gpu_id = 0)
xgb_paper.fit(X_train, Y_train)

Y_pred_train_paper = xgb_paper.predict(X_train)
Y_pred_paper = xgb_paper.predict(X_test)

print("XGB Training Score:", xgb_paper.score(X_train, Y_train))
print("XGB Testing Score:", xgb_paper.score(X_test, Y_test))
print("XGB Training RMSE:", mean_squared_error(Y_train, Y_pred_train_paper, squared = False))
print("XGB Testing RMSE:", mean_squared_error(Y_test, Y_pred_paper, squared = False))

XGB Training Score: 0.9844637438015794
XGB Testing Score: 0.9219333469165599
XGB Training RMSE: 4.254482856073242
XGB Testing RMSE: 9.63851079304864


In [64]:
import xgboost as xgb

xgb_param = {'booster': ['gbtree', 'gblinear', 'dart'],
             'objective':['reg:squarederror'],
             'learning_rate': [0.1, 0.3, 0.5, 0.01, 0.02, 0.03],
             'gamma': [0, 1, 2, 3],
             'max_depth': [4, 6, 8, 10, 12, 14, 16, 18, 20],
             'subsample': [0.3, 0.4, 0.5, 0.6, 0.7],
             'sampling_method': ['uniform', 'gradient_based'],
             'lambda': [1, 2],
             'alpha': [0, 1],
             'tree_method': ['gpu_hist', 'exact', 'approx'],
             'gpu_id': [0],
             'n_estimators': [325, 350, 375, 400, 425]}

print("Now running RandomizedSearchCV on XGBoost Hyperparameters...\n")

xgb = xgb.XGBRegressor()
xgb_rand = RandomizedSearchCV(xgb, xgb_param, cv = 10, n_jobs = -1, verbose = 1)
xgb_rand.fit(X_train, Y_train)

print("\n========================= Hyperparameter Tuning =========================")
print("\nThe score for the best model is:", xgb_rand.best_score_)
print("\nThe best parameters are:\n", xgb_rand.best_params_)
print("=========================================================================")

Now running RandomizedSearchCV on XGBoost Hyperparameters...

Fitting 10 folds for each of 10 candidates, totalling 100 fits
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:  9.0min
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed: 35.9min finished


The score for the best model is: 0.9244876323636424

The best parameters are:
 {'tree_method': 'gpu_hist', 'subsample': 0.4, 'sampling_method': 'gradient_based', 'objective': 'reg:squarederror', 'n_estimators': 500, 'max_depth': 10, 'learning_rate': 0.02, 'lambda': 1, 'gpu_id': 0, 'gamma': 1, 'booster': 'dart', 'alpha': 1}


In [7]:
import xgboost as xgb

xgb_opt = xgb.XGBRegressor(tree_method = 'gpu_hist', subsample = 0.4, sampling_method = 'gradient_based', objective = 'reg:squarederror', n_estimators = 500, max_depth = 10, learning_rate = 0.02, gpu_id = 0, gamma = 1, booster = 'dart', alpha = 1)
xgb_opt.fit(X_train, Y_train)

Y_pred_train_opt = xgb_paper.predict(X_train)
Y_pred_opt = xgb_paper.predict(X_test)

print("XGB Training Score:", xgb_paper.score(X_train, Y_train))
print("XGB Testing Score:", xgb_paper.score(X_test, Y_test))
print("XGB Training RMSE:", mean_squared_error(Y_train, Y_pred_train_opt, squared = False))
print("XGB Testing RMSE:", mean_squared_error(Y_test, Y_pred_opt, squared = False))

XGB Training Score: 0.9844637438015794
XGB Testing Score: 0.9219333469165599
XGB Training RMSE: 4.254482856073242
XGB Testing RMSE: 9.63851079304864
