# Library

In [None]:
pip install catboost



In [None]:
import pandas as pd
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error as MSE
from catboost import CatBoostRegressor
from sklearn.model_selection import GridSearchCV
import warnings 
warnings.filterwarnings('ignore')

# Data

In [None]:
boston_data = datasets.load_boston()
data = pd.DataFrame(boston_data['data'], columns=boston_data['feature_names'])
target = pd.DataFrame(boston_data['target'], columns=['Target'])
df = pd.concat([data, target], axis=1)

In [None]:
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,Target
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


# Split data

In [None]:
X = df.drop(['Target'],axis=1)
y = df['Target']

X_train, X_test, y_train, y_test = train_test_split(X, y,test_size = 0.2, random_state = 100)

# CatBoost + GridSearchCV

In [None]:
#모델 생성
model=CatBoostRegressor()

#최적화할 파라미터 범위 
cb_params = {'depth' : [6, 8, 10],
             'learning_rate' : [0.05, 0.1, 0.2],
             'iterations' : [100, 150, 200]
            }

grid_cb = GridSearchCV(model, param_grid=cb_params, scoring = 'neg_root_mean_squared_error', cv=5, n_jobs=-1)
grid_cb.fit(X_train, y_train)

0:	learn: 8.5281806	total: 8.16ms	remaining: 1.62s
1:	learn: 8.0168240	total: 15.1ms	remaining: 1.49s
2:	learn: 7.5885777	total: 21.2ms	remaining: 1.39s
3:	learn: 7.1802796	total: 27.2ms	remaining: 1.33s
4:	learn: 6.8454180	total: 33.3ms	remaining: 1.3s
5:	learn: 6.5315887	total: 39.4ms	remaining: 1.27s
6:	learn: 6.2208345	total: 45.5ms	remaining: 1.25s
7:	learn: 5.9102254	total: 51.6ms	remaining: 1.24s
8:	learn: 5.6373644	total: 57.8ms	remaining: 1.23s
9:	learn: 5.4194050	total: 63.9ms	remaining: 1.21s
10:	learn: 5.2327498	total: 69.9ms	remaining: 1.2s
11:	learn: 5.0248925	total: 75.7ms	remaining: 1.19s
12:	learn: 4.7906250	total: 81.7ms	remaining: 1.18s
13:	learn: 4.5682318	total: 87.6ms	remaining: 1.16s
14:	learn: 4.3740971	total: 93.6ms	remaining: 1.15s
15:	learn: 4.2305116	total: 99.3ms	remaining: 1.14s
16:	learn: 4.0648448	total: 105ms	remaining: 1.13s
17:	learn: 3.9084069	total: 111ms	remaining: 1.12s
18:	learn: 3.7987844	total: 117ms	remaining: 1.11s
19:	learn: 3.7107208	total:

GridSearchCV(cv=5,
             estimator=<catboost.core.CatBoostRegressor object at 0x7f47d4108f10>,
             n_jobs=-1,
             param_grid={'depth': [6, 8, 10], 'iterations': [100, 150, 200],
                         'learning_rate': [0.05, 0.1, 0.2]},
             scoring='neg_root_mean_squared_error')

In [None]:
result_df = pd.DataFrame(grid_cb.cv_results_)

In [None]:
result_df[['params', 'mean_test_score', 'rank_test_score']].head()

Unnamed: 0,params,mean_test_score,rank_test_score
0,"{'depth': 6, 'iterations': 100, 'learning_rate...",-3.694983,25
1,"{'depth': 6, 'iterations': 100, 'learning_rate...",-3.399156,16
2,"{'depth': 6, 'iterations': 100, 'learning_rate...",-3.404712,17
3,"{'depth': 6, 'iterations': 150, 'learning_rate...",-3.44624,18
4,"{'depth': 6, 'iterations': 150, 'learning_rate...",-3.304564,6


In [None]:
print('best params:', grid_cb.best_params_)
print('best score:', grid_cb.best_score_)

best params: {'depth': 8, 'iterations': 200, 'learning_rate': 0.1}
best score: -3.242009622894945


# Prediction

In [None]:
pred = grid_cb.predict(X_test)

#RMSE 
rmse = np.sqrt(MSE(y_test, pred))
print("RMSE : {}".format(rmse))

RMSE : 3.203435005370735
