In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

df = pd.read_csv("./Hitters.csv")
df = df.dropna()
dms = pd.get_dummies(df[["League", "Division", "NewLeague"]])
y = df["Salary"]
x_ = df.drop(["Salary", "League", "Division", "NewLeague"], axis = 1).astype("float64")
x = pd.concat([x_, dms[["League_N", "Division_W", "NewLeague_N"]]], axis = 1)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.20, random_state = 99)

In [3]:
from sklearn.ensemble import GradientBoostingRegressor
gbm_model = GradientBoostingRegressor().fit(x_train, y_train)

In [5]:
pred_y = gbm_model.predict(x_test)

In [6]:
from sklearn.metrics import mean_squared_error
np.sqrt(mean_squared_error(y_test, pred_y))

425.3923469843284

In [9]:
params = {"learning_rate": [0.001, 0.01, 0.1],
         "max_depth": [3, 5, 8],
         "n_estimators": [100, 200, 500],
         "subsample": [1, 0.5, 0.8],
         "loss": ["ls", "lad", "quantile"]}

In [10]:
from sklearn.model_selection import GridSearchCV
gbm_cv_model = GridSearchCV(gbm_model, params, cv = 10, n_jobs = -1).fit(x_train, y_train)

In [11]:
gbm_cv_model.best_params_

{'learning_rate': 0.1,
 'loss': 'lad',
 'max_depth': 5,
 'n_estimators': 100,
 'subsample': 0.5}

In [12]:
gbm_model_tuned = GradientBoostingRegressor(learning_rate = gbm_cv_model.best_params_["learning_rate"],
                                           loss = gbm_cv_model.best_params_["loss"],
                                           max_depth = gbm_cv_model.best_params_["max_depth"],
                                           n_estimators = gbm_cv_model.best_params_["n_estimators"],
                                           subsample = gbm_cv_model.best_params_["subsample"]).fit(x_train, y_train)

In [14]:
pred_y = gbm_model_tuned.predict(x_test)
np.sqrt(mean_squared_error(y_test, pred_y))

411.88409846372554

In [15]:
gbm_model_tuned.feature_importances_

array([0.05733241, 0.07289669, 0.05705896, 0.05791891, 0.04795917,
       0.06330094, 0.08232054, 0.05549055, 0.05339162, 0.06056886,
       0.05806888, 0.08065717, 0.03900845, 0.07919374, 0.06353446,
       0.0454794 , 0.01100184, 0.00623204, 0.00858538])