# CatBoost

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.model_selection import train_test_split, GridSearchCV,cross_val_score
from skompiler import skompile
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import BaggingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor

from warnings import filterwarnings
filterwarnings('ignore')

In [2]:
hit = pd.read_csv("Hitters.csv")
df = hit.copy()
df = df.dropna()
dms = pd.get_dummies(df[['League', 'Division', 'NewLeague']])
y = df["Salary"]
X_ = df.drop(['Salary', 'League', 'Division', 'NewLeague'], axis=1).astype('float64')
X = pd.concat([X_, dms[['League_N', 'Division_W', 'NewLeague_N']]], axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.25, 
                                                    random_state=42)

In [3]:
!pip install catboost

Collecting catboost
  Downloading catboost-1.1.1-cp39-none-win_amd64.whl (74.0 MB)
     ---------------------------------------- 74.0/74.0 MB 2.8 MB/s eta 0:00:00
Collecting graphviz
  Downloading graphviz-0.20.1-py3-none-any.whl (47 kB)
     ---------------------------------------- 47.0/47.0 kB 1.2 MB/s eta 0:00:00
Installing collected packages: graphviz, catboost
Successfully installed catboost-1.1.1 graphviz-0.20.1


In [4]:
from catboost import CatBoostRegressor

In [5]:
catb = CatBoostRegressor()
catb_model = catb.fit(X_train, y_train)

Learning rate set to 0.031674
0:	learn: 437.6430699	total: 143ms	remaining: 2m 22s
1:	learn: 431.3923642	total: 144ms	remaining: 1m 11s
2:	learn: 424.8820360	total: 145ms	remaining: 48.2s
3:	learn: 418.2514904	total: 146ms	remaining: 36.4s
4:	learn: 412.6394021	total: 147ms	remaining: 29.3s
5:	learn: 406.6247020	total: 148ms	remaining: 24.6s
6:	learn: 400.5321206	total: 149ms	remaining: 21.2s
7:	learn: 394.6683437	total: 151ms	remaining: 18.7s
8:	learn: 388.2496484	total: 152ms	remaining: 16.7s
9:	learn: 382.9448842	total: 153ms	remaining: 15.1s
10:	learn: 377.2600080	total: 153ms	remaining: 13.8s
11:	learn: 372.4829606	total: 154ms	remaining: 12.7s
12:	learn: 366.6823437	total: 155ms	remaining: 11.8s
13:	learn: 362.6076230	total: 156ms	remaining: 11s
14:	learn: 358.0107745	total: 157ms	remaining: 10.3s
15:	learn: 353.2802665	total: 158ms	remaining: 9.7s
16:	learn: 348.5646265	total: 159ms	remaining: 9.17s
17:	learn: 343.6407912	total: 160ms	remaining: 8.71s
18:	learn: 339.2363847	tota

# Tahmin

In [6]:
y_pred = catb_model.predict(X_test)
np.sqrt(mean_squared_error(y_test, y_pred))

351.194631344607

# Model Tuning

In [7]:
catb_grid = {
    'iterations': [200,500,1000,2000],
    'learning_rate': [0.01, 0.03, 0.05, 0.1],
    'depth': [3,4,5,6,7,8] }

In [8]:
catb = CatBoostRegressor()
catb_cv_model = GridSearchCV(catb, catb_grid, cv=5, n_jobs = -1, verbose = 2)

In [9]:
catb_cv_model.fit(X_train, y_train)

Fitting 5 folds for each of 96 candidates, totalling 480 fits
0:	learn: 422.4143448	total: 1.21ms	remaining: 1.21s
1:	learn: 404.1864276	total: 2.6ms	remaining: 1.3s
2:	learn: 386.3231718	total: 3.63ms	remaining: 1.21s
3:	learn: 370.5548032	total: 5.04ms	remaining: 1.25s
4:	learn: 354.9242038	total: 6.26ms	remaining: 1.25s
5:	learn: 342.3403984	total: 8.62ms	remaining: 1.43s
6:	learn: 328.2370070	total: 9.95ms	remaining: 1.41s
7:	learn: 317.5056526	total: 10.8ms	remaining: 1.34s
8:	learn: 306.6243511	total: 11.8ms	remaining: 1.3s
9:	learn: 297.3147023	total: 12.8ms	remaining: 1.27s
10:	learn: 288.3685892	total: 14.2ms	remaining: 1.27s
11:	learn: 281.0996220	total: 15.2ms	remaining: 1.25s
12:	learn: 273.2254898	total: 16.2ms	remaining: 1.23s
13:	learn: 266.9003385	total: 17.4ms	remaining: 1.23s
14:	learn: 261.9092500	total: 18.6ms	remaining: 1.22s
15:	learn: 256.2637350	total: 19.9ms	remaining: 1.22s
16:	learn: 250.3667935	total: 21ms	remaining: 1.21s
17:	learn: 244.8631098	total: 22ms	

GridSearchCV(cv=5,
             estimator=<catboost.core.CatBoostRegressor object at 0x00000208D93BFAF0>,
             n_jobs=-1,
             param_grid={'depth': [3, 4, 5, 6, 7, 8],
                         'iterations': [200, 500, 1000, 2000],
                         'learning_rate': [0.01, 0.03, 0.05, 0.1]},
             verbose=2)

In [10]:
catb_cv_model.best_params_

{'depth': 5, 'iterations': 1000, 'learning_rate': 0.1}

In [11]:
catb_tuned = CatBoostRegressor(iterations = 200, 
                               learning_rate = 0.01, 
                               depth = 8)

catb_tuned = catb_tuned.fit(X_train,y_train)

0:	learn: 442.4903140	total: 4.12ms	remaining: 820ms
1:	learn: 440.4621805	total: 7.4ms	remaining: 733ms
2:	learn: 438.5132091	total: 10.7ms	remaining: 703ms
3:	learn: 436.2180377	total: 14.1ms	remaining: 693ms
4:	learn: 434.0461579	total: 17.3ms	remaining: 675ms
5:	learn: 431.8437770	total: 21ms	remaining: 679ms
6:	learn: 430.1594587	total: 24.5ms	remaining: 677ms
7:	learn: 428.0941830	total: 28.3ms	remaining: 679ms
8:	learn: 426.0998774	total: 31.6ms	remaining: 671ms
9:	learn: 424.0249067	total: 32.5ms	remaining: 617ms
10:	learn: 422.1921868	total: 35.7ms	remaining: 614ms
11:	learn: 420.2506764	total: 38.8ms	remaining: 607ms
12:	learn: 418.3116383	total: 41.9ms	remaining: 603ms
13:	learn: 416.2966847	total: 46.9ms	remaining: 623ms
14:	learn: 414.5776175	total: 50ms	remaining: 617ms
15:	learn: 412.8009394	total: 53.3ms	remaining: 613ms
16:	learn: 410.9774146	total: 57.2ms	remaining: 616ms
17:	learn: 409.1047417	total: 61ms	remaining: 617ms
18:	learn: 407.6243957	total: 66ms	remaining:

In [12]:
y_pred = catb_tuned.predict(X_test)
np.sqrt(mean_squared_error(y_test, y_pred))

369.6970696250705