## LightGBM

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, ShuffleSplit, GridSearchCV, cross_val_score
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import scale
from sklearn import model_selection

In [2]:
hit = pd.read_csv("Hitters.csv")
df = hit.copy()
df = df.dropna()
dms = pd.get_dummies(df[["League", "Division", "NewLeague"]])
y = df["Salary"] 
X_ = df.drop(["Salary", "League", "Division", "NewLeague"], axis= 1).astype("float64")
X = pd.concat([X_, dms[["League_N", "Division_W", "NewLeague_N"]]], axis= 1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.25, random_state= 42)

In [3]:
!pip install lightgbm

Collecting lightgbm
  Downloading lightgbm-2.3.1-py2.py3-none-win_amd64.whl (544 kB)
Installing collected packages: lightgbm
Successfully installed lightgbm-2.3.1


In [5]:
from lightgbm import LGBMRegressor

In [6]:
lgbm = LGBMRegressor()
lgbm_model = lgbm.fit(X_train, y_train)

### Tahmin

In [7]:
y_pred = lgbm_model.predict(X_test, num_iteration = lgbm_model.best_iteration_)

In [8]:
np.sqrt(mean_squared_error(y_test, y_pred))

363.8712087611089

### Model Tuning

In [15]:
lgbm_model

LGBMRegressor(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
              importance_type='split', learning_rate=0.1, max_depth=-1,
              min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
              n_estimators=100, n_jobs=-1, num_leaves=31, objective=None,
              random_state=None, reg_alpha=0.0, reg_lambda=0.0, silent=True,
              subsample=1.0, subsample_for_bin=200000, subsample_freq=0)

In [18]:
lgbm_grid = {"learning_rate": [0.01, 0.1, 0.5, 1],
            "n_estimators": [20, 40, 100, 200, 500, 1000],
            "max_depth": [1, 2, 3, 4, 5, 6, 7, 8],
            "colsample_bytree": [0.4, 0.5, 0.6, 0.9, 1]}

lgbm = LGBMRegressor()
lgbm_cv_model = GridSearchCV(lgbm, lgbm_grid, cv= 10, n_jobs= -1, verbose= 2)
lgbm_cv_model.fit(X_train, y_train)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Fitting 10 folds for each of 960 candidates, totalling 9600 fits


[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 896 tasks      | elapsed:    8.1s
[Parallel(n_jobs=-1)]: Done 2520 tasks      | elapsed:   23.9s
[Parallel(n_jobs=-1)]: Done 4784 tasks      | elapsed:   47.5s
[Parallel(n_jobs=-1)]: Done 7704 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done 9600 out of 9600 | elapsed:  1.7min finished


GridSearchCV(cv=10, error_score=nan,
             estimator=LGBMRegressor(boosting_type='gbdt', class_weight=None,
                                     colsample_bytree=1.0,
                                     importance_type='split', learning_rate=0.1,
                                     max_depth=-1, min_child_samples=20,
                                     min_child_weight=0.001, min_split_gain=0.0,
                                     n_estimators=100, n_jobs=-1, num_leaves=31,
                                     objective=None, random_state=None,
                                     reg_alpha=0.0, reg_lambda=0.0, silent=True,
                                     subsample=1.0, subsample_for_bin=200000,
                                     subsample_freq=0),
             iid='deprecated', n_jobs=-1,
             param_grid={'colsample_bytree': [0.4, 0.5, 0.6, 0.9, 1],
                         'learning_rate': [0.01, 0.1, 0.5, 1],
                         'max_depth': [1, 2, 3, 

In [19]:
lgbm_cv_model.best_params_

{'colsample_bytree': 0.4,
 'learning_rate': 0.1,
 'max_depth': 5,
 'n_estimators': 40}

In [20]:
lgbm_tuned = LGBMRegressor(learning_rate= 0.1, max_depth= 5, n_estimators= 40, colsample_bytree= 0.4)
lgbm_tuned = lgbm_tuned.fit(X_train, y_train)

In [21]:
y_pred = lgbm_tuned.predict(X_test)
np.sqrt(mean_squared_error(y_test, y_pred))

377.8415676535648

## CatBoost 

In [22]:
!pip install catboost



In [23]:
from catboost import CatBoostRegressor

In [24]:
# iterations, learning_rate, depth