In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso

df = pd.read_csv("./Hitters.csv")
df = df.dropna()
dms = pd.get_dummies(df[["League", "Division", "NewLeague"]])
y = df["Salary"]
x_ = df.drop(["Salary", "League", "Division", "NewLeague"], axis = 1).astype("float64")
x = pd.concat([x_, dms[["League_N", "Division_W", "NewLeague_N"]]], axis = 1)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.20, random_state = 99)

In [2]:
from sklearn.ensemble import RandomForestRegressor
rf_model = RandomForestRegressor(random_state = 99).fit(x_train, y_train)

In [3]:
pred_y = rf_model.predict(x_test)

In [4]:
from sklearn.metrics import mean_squared_error
np.sqrt(mean_squared_error(y_test, pred_y))

415.37039865225654

In [7]:
from sklearn.model_selection import GridSearchCV
params = {"max_depth": [5, 8, 10], "max_features": [2, 5, 10], "n_estimators": [200, 500, 2000], "min_samples_split": [2, 10, 100]}
rf_cv_model = GridSearchCV(rf_model, params, cv = 10).fit(x_train, y_train)

In [8]:
rf_cv_model.best_params_

{'max_depth': 10,
 'max_features': 2,
 'min_samples_split': 2,
 'n_estimators': 200}

In [11]:
rf_model_tuned = RandomForestRegressor(max_depth = rf_cv_model.best_params_["max_depth"],
                                max_features = rf_cv_model.best_params_["max_features"],
                                min_samples_split = rf_cv_model.best_params_["min_samples_split"],
                                n_estimators = rf_cv_model.best_params_["n_estimators"]).fit(x_train, y_train)

In [12]:
pred_y = rf_model_tuned.predict(x_test)

In [13]:
from sklearn.metrics import mean_squared_error
np.sqrt(mean_squared_error(y_test, pred_y))

405.67398330149217

In [16]:
rf_model_tuned.feature_importances_

array([0.05158537, 0.05127325, 0.04076955, 0.05060865, 0.05871384,
       0.05106565, 0.04925802, 0.09632663, 0.10382708, 0.07336384,
       0.09655853, 0.0937976 , 0.07971833, 0.04346257, 0.02372761,
       0.01948741, 0.005727  , 0.00543618, 0.00529289])