In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# 글꼴깨짐 방지
import matplotlib
import matplotlib.font_manager as fm

import warnings

# 경고 메시지를 무시하고 숨기거나
warnings.filterwarnings(action='ignore')

fm.get_fontconfig_fonts()
font_location = 'C:/Windows/Fonts/malgun.ttf' # For Windows
font_name = fm.FontProperties(fname=font_location).get_name()
matplotlib.rc('font', family=font_name)
font_name = fm.FontProperties(fname=font_location).get_name()
matplotlib.rc('font', family=font_name)

In [2]:
df = pd.read_csv('train.csv')

In [3]:
def preprocessing(df):
    # 오류 단지코드가 존재하는 행들을  사전에 제거
    df_error =  ['C1095', 'C2051', 'C1218', 'C1894', 'C2483', 'C1502', 'C1988']
    #df_error =  ['C2335', 'C1327']
    df = df[~df['단지코드'].isin(df_error)].reset_index(drop=True)
    df.rename(columns = {'도보 10분거리 내 지하철역 수(환승노선 수 반영)':'지하철','도보 10분거리 내 버스정류장 수':'버스'},inplace=True)
    df.drop(columns=['임대보증금','임대료','자격유형','임대건물구분'],axis = 1,inplace=True)
    지역_비율 = (df.groupby(['지역'])['총세대수'].count())/(df.groupby(['지역'])['총세대수'].count().sum())*100
    지역_비율=지역_비율.reset_index(name='지역_비율')
    공급유형_비율 = (df.groupby(['공급유형'])['총세대수'].count())/(df.groupby(['공급유형'])['총세대수'].count().sum())*100
    공급유형_비율=공급유형_비율.reset_index(name='공급유형_비율')
    df = pd.merge(df,지역_비율, on='지역')
    df = pd.merge(df,공급유형_비율, on='공급유형')
    df.drop(columns=['지역','공급유형','단지코드'],axis = 1,inplace=True)
    df=df.dropna(axis=0)
    df = df[['총세대수', '전용면적', '전용면적별세대수', '공가수', '지하철', '버스', '단지내주차면수', '공급유형_비율',
       '지역_비율', '등록차량수']]
    return df

In [4]:
df = preprocessing(df)

In [5]:
#Splitting the data into train and test split
from sklearn.model_selection import train_test_split
X = df.iloc[:, 1:-1]
y = df.iloc[:,-1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
from sklearn.metrics import explained_variance_score,mean_absolute_error,r2_score

from time import time

In [7]:
# test cycle
from catboost import CatBoostRegressor

model = CatBoostRegressor()

model = model.fit(X_train, y_train)

y_pred = model.predict(X_test)

Learning rate set to 0.0462
0:	learn: 411.9835942	total: 160ms	remaining: 2m 39s
1:	learn: 399.2638885	total: 162ms	remaining: 1m 20s
2:	learn: 386.9914956	total: 163ms	remaining: 54.3s
3:	learn: 375.1169183	total: 164ms	remaining: 40.9s
4:	learn: 363.4540440	total: 166ms	remaining: 33s
5:	learn: 352.3173797	total: 167ms	remaining: 27.7s
6:	learn: 342.1581667	total: 173ms	remaining: 24.6s
7:	learn: 332.6173845	total: 189ms	remaining: 23.5s
8:	learn: 323.3894917	total: 205ms	remaining: 22.6s
9:	learn: 314.5482799	total: 220ms	remaining: 21.8s
10:	learn: 305.4289867	total: 236ms	remaining: 21.2s
11:	learn: 296.7823077	total: 252ms	remaining: 20.7s
12:	learn: 289.6188880	total: 269ms	remaining: 20.4s
13:	learn: 281.5116338	total: 286ms	remaining: 20.1s
14:	learn: 274.0491608	total: 301ms	remaining: 19.8s
15:	learn: 267.8042729	total: 324ms	remaining: 19.9s
16:	learn: 262.2963716	total: 342ms	remaining: 19.8s
17:	learn: 256.0377489	total: 362ms	remaining: 19.7s
18:	learn: 249.8287312	total

230:	learn: 86.3610554	total: 1.36s	remaining: 4.53s
231:	learn: 86.1845122	total: 1.36s	remaining: 4.51s
232:	learn: 86.0412667	total: 1.36s	remaining: 4.49s
233:	learn: 85.7672340	total: 1.36s	remaining: 4.46s
234:	learn: 85.5399612	total: 1.36s	remaining: 4.44s
235:	learn: 85.3833871	total: 1.37s	remaining: 4.42s
236:	learn: 85.1629918	total: 1.37s	remaining: 4.4s
237:	learn: 85.0263075	total: 1.37s	remaining: 4.38s
238:	learn: 84.7590017	total: 1.37s	remaining: 4.36s
239:	learn: 84.6858638	total: 1.37s	remaining: 4.34s
240:	learn: 84.3992064	total: 1.37s	remaining: 4.32s
241:	learn: 84.3089981	total: 1.37s	remaining: 4.3s
242:	learn: 84.1218043	total: 1.38s	remaining: 4.29s
243:	learn: 83.9972145	total: 1.38s	remaining: 4.27s
244:	learn: 83.7986732	total: 1.38s	remaining: 4.25s
245:	learn: 83.7683927	total: 1.38s	remaining: 4.23s
246:	learn: 83.5673102	total: 1.38s	remaining: 4.21s
247:	learn: 83.5101244	total: 1.38s	remaining: 4.19s
248:	learn: 83.4182637	total: 1.38s	remaining: 4

407:	learn: 63.3150894	total: 2.14s	remaining: 3.1s
408:	learn: 63.2938703	total: 2.14s	remaining: 3.09s
409:	learn: 63.2052657	total: 2.14s	remaining: 3.08s
410:	learn: 63.1154762	total: 2.14s	remaining: 3.07s
411:	learn: 63.0569849	total: 2.14s	remaining: 3.06s
412:	learn: 63.0292083	total: 2.14s	remaining: 3.05s
413:	learn: 62.9205433	total: 2.15s	remaining: 3.04s
414:	learn: 62.7855384	total: 2.15s	remaining: 3.03s
415:	learn: 62.6094322	total: 2.15s	remaining: 3.02s
416:	learn: 62.4865373	total: 2.16s	remaining: 3.02s
417:	learn: 62.4033742	total: 2.17s	remaining: 3.03s
418:	learn: 62.2950972	total: 2.19s	remaining: 3.03s
419:	learn: 62.2232298	total: 2.2s	remaining: 3.04s
420:	learn: 62.1885030	total: 2.21s	remaining: 3.04s
421:	learn: 62.0742228	total: 2.22s	remaining: 3.05s
422:	learn: 61.9960975	total: 2.24s	remaining: 3.05s
423:	learn: 61.8296571	total: 2.25s	remaining: 3.06s
424:	learn: 61.7474663	total: 2.26s	remaining: 3.06s
425:	learn: 61.6780941	total: 2.27s	remaining: 3

629:	learn: 49.4749426	total: 3.1s	remaining: 1.82s
630:	learn: 49.4069578	total: 3.12s	remaining: 1.82s
631:	learn: 49.3555825	total: 3.13s	remaining: 1.82s
632:	learn: 49.3024761	total: 3.15s	remaining: 1.82s
633:	learn: 49.2680651	total: 3.16s	remaining: 1.82s
634:	learn: 49.2039823	total: 3.17s	remaining: 1.82s
635:	learn: 49.1753920	total: 3.18s	remaining: 1.82s
636:	learn: 49.1254638	total: 3.19s	remaining: 1.82s
637:	learn: 49.0848821	total: 3.21s	remaining: 1.82s
638:	learn: 49.0738809	total: 3.22s	remaining: 1.82s
639:	learn: 49.0282204	total: 3.23s	remaining: 1.82s
640:	learn: 48.8854204	total: 3.24s	remaining: 1.82s
641:	learn: 48.8244717	total: 3.25s	remaining: 1.81s
642:	learn: 48.6892383	total: 3.27s	remaining: 1.81s
643:	learn: 48.6390661	total: 3.28s	remaining: 1.81s
644:	learn: 48.5715702	total: 3.29s	remaining: 1.81s
645:	learn: 48.5101607	total: 3.31s	remaining: 1.81s
646:	learn: 48.4348167	total: 3.31s	remaining: 1.81s
647:	learn: 48.4287682	total: 3.31s	remaining: 

810:	learn: 40.6993009	total: 3.88s	remaining: 905ms
811:	learn: 40.6567610	total: 3.9s	remaining: 902ms
812:	learn: 40.6048801	total: 3.91s	remaining: 899ms
813:	learn: 40.5787498	total: 3.92s	remaining: 896ms
814:	learn: 40.5410177	total: 3.93s	remaining: 893ms
815:	learn: 40.4805616	total: 3.95s	remaining: 890ms
816:	learn: 40.4703943	total: 3.96s	remaining: 887ms
817:	learn: 40.3965207	total: 3.97s	remaining: 884ms
818:	learn: 40.3708532	total: 3.99s	remaining: 881ms
819:	learn: 40.2997361	total: 4s	remaining: 878ms
820:	learn: 40.2748991	total: 4.01s	remaining: 875ms
821:	learn: 40.2338506	total: 4.03s	remaining: 872ms
822:	learn: 40.2029516	total: 4.04s	remaining: 869ms
823:	learn: 40.1682540	total: 4.05s	remaining: 865ms
824:	learn: 40.1270283	total: 4.06s	remaining: 862ms
825:	learn: 40.0961527	total: 4.08s	remaining: 859ms
826:	learn: 40.0379300	total: 4.09s	remaining: 856ms
827:	learn: 40.0144653	total: 4.1s	remaining: 853ms
828:	learn: 39.9631242	total: 4.11s	remaining: 848m

967:	learn: 34.5248412	total: 5.1s	remaining: 169ms
968:	learn: 34.4951464	total: 5.12s	remaining: 164ms
969:	learn: 34.4730515	total: 5.13s	remaining: 159ms
970:	learn: 34.4262915	total: 5.14s	remaining: 154ms
971:	learn: 34.3936339	total: 5.15s	remaining: 148ms
972:	learn: 34.3507958	total: 5.17s	remaining: 143ms
973:	learn: 34.3234176	total: 5.18s	remaining: 138ms
974:	learn: 34.2676919	total: 5.2s	remaining: 133ms
975:	learn: 34.2527758	total: 5.2s	remaining: 128ms
976:	learn: 34.2329264	total: 5.2s	remaining: 122ms
977:	learn: 34.2090299	total: 5.2s	remaining: 117ms
978:	learn: 34.1733834	total: 5.21s	remaining: 112ms
979:	learn: 34.1524903	total: 5.21s	remaining: 106ms
980:	learn: 34.1327660	total: 5.21s	remaining: 101ms
981:	learn: 34.1040448	total: 5.21s	remaining: 95.5ms
982:	learn: 34.0529428	total: 5.21s	remaining: 90.1ms
983:	learn: 34.0223039	total: 5.21s	remaining: 84.8ms
984:	learn: 33.9883740	total: 5.21s	remaining: 79.4ms
985:	learn: 33.9572400	total: 5.21s	remaining: 

In [8]:
ev_ = explained_variance_score(y_test, y_pred)
print("\tExplained variance:", ev_)
mae_ = mean_absolute_error(y_test, y_pred)
print("\tMean absolute error:", mae_)
r2_ = r2_score(y_test, y_pred)
print("\tR2 score:", r2_)
print()

	Explained variance: 0.9814145542462404
	Mean absolute error: 36.84444338132358
	R2 score: 0.9813679019618019



In [24]:
# GridSearchCV parameters
parameters = {
    # tree의 깊이, default: 6
    'max_depth': [4, 6, 8, 10], 
    
    # 생성 가능한 최대 tree 갯수, default: 1000
    'n_estimators': [1000, 1500, 2000],
    
    # 오차 함수에서 조정하는 step의 크기, default: 자동 설정(0.03)
    'learning_rate': [0.01, 0.03, 0.1, None],
    
    # tree에 랜덤하게 부여되는 가중치, defaul: 1
    'random_strength': [1, 1.2, 1.5, 2, 4]
}

In [25]:
from sklearn.model_selection import GridSearchCV

model = CatBoostRegressor()
model = model.fit(X_train, y_train)
y_pred = model.predict(X_test)

grid = GridSearchCV(model, parameters, n_jobs=6, verbose=10)
model = grid.fit(X,y)

Learning rate set to 0.0462
0:	learn: 411.9835942	total: 4.26ms	remaining: 4.26s
1:	learn: 399.2638885	total: 8.43ms	remaining: 4.2s
2:	learn: 386.9914956	total: 11.7ms	remaining: 3.89s
3:	learn: 375.1169183	total: 14.5ms	remaining: 3.61s
4:	learn: 363.4540440	total: 17.6ms	remaining: 3.5s
5:	learn: 352.3173797	total: 20.5ms	remaining: 3.4s
6:	learn: 342.1581667	total: 23.2ms	remaining: 3.29s
7:	learn: 332.6173845	total: 26.2ms	remaining: 3.25s
8:	learn: 323.3894917	total: 29.1ms	remaining: 3.21s
9:	learn: 314.5482799	total: 31.8ms	remaining: 3.15s
10:	learn: 305.4289867	total: 34.7ms	remaining: 3.12s
11:	learn: 296.7823077	total: 37.3ms	remaining: 3.07s
12:	learn: 289.6188880	total: 40ms	remaining: 3.04s
13:	learn: 281.5116338	total: 43.1ms	remaining: 3.04s
14:	learn: 274.0491608	total: 46ms	remaining: 3.02s
15:	learn: 267.8042729	total: 48.4ms	remaining: 2.98s
16:	learn: 262.2963716	total: 51ms	remaining: 2.95s
17:	learn: 256.0377489	total: 53.5ms	remaining: 2.92s
18:	learn: 249.8287

240:	learn: 84.3992064	total: 561ms	remaining: 1.77s
241:	learn: 84.3089981	total: 564ms	remaining: 1.77s
242:	learn: 84.1218043	total: 566ms	remaining: 1.76s
243:	learn: 83.9972145	total: 568ms	remaining: 1.76s
244:	learn: 83.7986732	total: 570ms	remaining: 1.76s
245:	learn: 83.7683927	total: 572ms	remaining: 1.75s
246:	learn: 83.5673102	total: 575ms	remaining: 1.75s
247:	learn: 83.5101244	total: 577ms	remaining: 1.75s
248:	learn: 83.4182637	total: 579ms	remaining: 1.75s
249:	learn: 83.1922093	total: 592ms	remaining: 1.78s
250:	learn: 83.0358982	total: 604ms	remaining: 1.8s
251:	learn: 82.9416049	total: 615ms	remaining: 1.82s
252:	learn: 82.7584701	total: 627ms	remaining: 1.85s
253:	learn: 82.6941426	total: 638ms	remaining: 1.87s
254:	learn: 82.5500672	total: 650ms	remaining: 1.9s
255:	learn: 82.3940647	total: 662ms	remaining: 1.92s
256:	learn: 82.1953210	total: 675ms	remaining: 1.95s
257:	learn: 82.0150980	total: 686ms	remaining: 1.97s
258:	learn: 81.7586111	total: 698ms	remaining: 2

477:	learn: 58.0609928	total: 1.33s	remaining: 1.46s
478:	learn: 58.0172277	total: 1.34s	remaining: 1.46s
479:	learn: 57.9921638	total: 1.34s	remaining: 1.45s
480:	learn: 57.9292059	total: 1.34s	remaining: 1.45s
481:	learn: 57.8659021	total: 1.34s	remaining: 1.44s
482:	learn: 57.8158680	total: 1.35s	remaining: 1.44s
483:	learn: 57.7467760	total: 1.35s	remaining: 1.44s
484:	learn: 57.6110874	total: 1.35s	remaining: 1.43s
485:	learn: 57.5340488	total: 1.35s	remaining: 1.43s
486:	learn: 57.4845285	total: 1.35s	remaining: 1.43s
487:	learn: 57.4308455	total: 1.36s	remaining: 1.42s
488:	learn: 57.4123838	total: 1.36s	remaining: 1.42s
489:	learn: 57.3708701	total: 1.36s	remaining: 1.42s
490:	learn: 57.2877623	total: 1.36s	remaining: 1.41s
491:	learn: 57.1906952	total: 1.37s	remaining: 1.41s
492:	learn: 57.1392468	total: 1.37s	remaining: 1.41s
493:	learn: 57.0837972	total: 1.37s	remaining: 1.4s
494:	learn: 57.0633561	total: 1.37s	remaining: 1.4s
495:	learn: 57.0363710	total: 1.37s	remaining: 1

647:	learn: 48.4287682	total: 2.34s	remaining: 1.27s
648:	learn: 48.3999983	total: 2.35s	remaining: 1.27s
649:	learn: 48.2379316	total: 2.37s	remaining: 1.27s
650:	learn: 48.1157669	total: 2.38s	remaining: 1.28s
651:	learn: 48.0543391	total: 2.39s	remaining: 1.28s
652:	learn: 47.9705986	total: 2.41s	remaining: 1.28s
653:	learn: 47.8584839	total: 2.42s	remaining: 1.28s
654:	learn: 47.7866942	total: 2.44s	remaining: 1.28s
655:	learn: 47.7232628	total: 2.45s	remaining: 1.28s
656:	learn: 47.6671345	total: 2.47s	remaining: 1.29s
657:	learn: 47.6408722	total: 2.48s	remaining: 1.29s
658:	learn: 47.5605594	total: 2.5s	remaining: 1.29s
659:	learn: 47.5214957	total: 2.51s	remaining: 1.29s
660:	learn: 47.4727002	total: 2.52s	remaining: 1.29s
661:	learn: 47.4178049	total: 2.54s	remaining: 1.3s
662:	learn: 47.4107856	total: 2.56s	remaining: 1.3s
663:	learn: 47.3741776	total: 2.57s	remaining: 1.3s
664:	learn: 47.3332580	total: 2.58s	remaining: 1.3s
665:	learn: 47.2897835	total: 2.6s	remaining: 1.3s


816:	learn: 40.4703943	total: 4.17s	remaining: 935ms
817:	learn: 40.3965207	total: 4.19s	remaining: 932ms
818:	learn: 40.3708532	total: 4.2s	remaining: 929ms
819:	learn: 40.2997361	total: 4.22s	remaining: 926ms
820:	learn: 40.2748991	total: 4.23s	remaining: 922ms
821:	learn: 40.2338506	total: 4.24s	remaining: 919ms
822:	learn: 40.2029516	total: 4.26s	remaining: 915ms
823:	learn: 40.1682540	total: 4.27s	remaining: 912ms
824:	learn: 40.1270283	total: 4.28s	remaining: 908ms
825:	learn: 40.0961527	total: 4.29s	remaining: 905ms
826:	learn: 40.0379300	total: 4.31s	remaining: 901ms
827:	learn: 40.0144653	total: 4.32s	remaining: 898ms
828:	learn: 39.9631242	total: 4.33s	remaining: 894ms
829:	learn: 39.9078815	total: 4.35s	remaining: 891ms
830:	learn: 39.8882108	total: 4.36s	remaining: 887ms
831:	learn: 39.8618369	total: 4.38s	remaining: 884ms
832:	learn: 39.8204030	total: 4.39s	remaining: 880ms
833:	learn: 39.7907401	total: 4.4s	remaining: 876ms
834:	learn: 39.7627566	total: 4.41s	remaining: 8

0:	learn: 418.7266318	total: 2.21ms	remaining: 2.2s
1:	learn: 415.9469538	total: 3.94ms	remaining: 1.97s
2:	learn: 413.3990145	total: 5.45ms	remaining: 1.81s
3:	learn: 410.7734094	total: 6.84ms	remaining: 1.7s
4:	learn: 407.8149216	total: 8.31ms	remaining: 1.65s
5:	learn: 405.3779116	total: 9.69ms	remaining: 1.6s
6:	learn: 403.1005220	total: 11.1ms	remaining: 1.57s
7:	learn: 400.3092262	total: 12.5ms	remaining: 1.55s
8:	learn: 397.6226516	total: 14ms	remaining: 1.54s
9:	learn: 394.8631369	total: 15.5ms	remaining: 1.53s
10:	learn: 392.1944917	total: 17ms	remaining: 1.53s
11:	learn: 389.6212433	total: 18.5ms	remaining: 1.52s
12:	learn: 387.2163850	total: 19.9ms	remaining: 1.51s
13:	learn: 384.7078099	total: 21.5ms	remaining: 1.51s
14:	learn: 382.3077127	total: 22.9ms	remaining: 1.5s
15:	learn: 379.9897562	total: 24.3ms	remaining: 1.49s
16:	learn: 377.8788775	total: 25.8ms	remaining: 1.49s
17:	learn: 375.4381465	total: 27.2ms	remaining: 1.48s
18:	learn: 373.1601044	total: 28.7ms	remaining

241:	learn: 154.6046401	total: 368ms	remaining: 1.15s
242:	learn: 154.3068333	total: 370ms	remaining: 1.15s
243:	learn: 153.9298685	total: 372ms	remaining: 1.15s
244:	learn: 153.6326282	total: 373ms	remaining: 1.15s
245:	learn: 153.3238318	total: 375ms	remaining: 1.15s
246:	learn: 153.0152148	total: 376ms	remaining: 1.15s
247:	learn: 152.8000712	total: 378ms	remaining: 1.15s
248:	learn: 152.5597504	total: 379ms	remaining: 1.14s
249:	learn: 152.3370158	total: 381ms	remaining: 1.14s
250:	learn: 152.0488693	total: 382ms	remaining: 1.14s
251:	learn: 151.7224215	total: 384ms	remaining: 1.14s
252:	learn: 151.4974185	total: 386ms	remaining: 1.14s
253:	learn: 151.1955349	total: 387ms	remaining: 1.14s
254:	learn: 150.9060308	total: 389ms	remaining: 1.14s
255:	learn: 150.6124411	total: 390ms	remaining: 1.13s
256:	learn: 150.3126449	total: 392ms	remaining: 1.13s
257:	learn: 150.1128452	total: 393ms	remaining: 1.13s
258:	learn: 149.8289026	total: 394ms	remaining: 1.13s
259:	learn: 149.6004438	tota

488:	learn: 119.0898965	total: 739ms	remaining: 773ms
489:	learn: 119.0493338	total: 741ms	remaining: 771ms
490:	learn: 118.9300569	total: 743ms	remaining: 770ms
491:	learn: 118.8265909	total: 744ms	remaining: 769ms
492:	learn: 118.7870405	total: 746ms	remaining: 767ms
493:	learn: 118.7342891	total: 747ms	remaining: 766ms
494:	learn: 118.6566781	total: 749ms	remaining: 764ms
495:	learn: 118.5446799	total: 750ms	remaining: 763ms
496:	learn: 118.4744099	total: 752ms	remaining: 761ms
497:	learn: 118.3800344	total: 754ms	remaining: 760ms
498:	learn: 118.3246330	total: 755ms	remaining: 758ms
499:	learn: 118.2783503	total: 757ms	remaining: 757ms
500:	learn: 118.2330188	total: 758ms	remaining: 755ms
501:	learn: 118.1753395	total: 760ms	remaining: 754ms
502:	learn: 118.0593784	total: 761ms	remaining: 752ms
503:	learn: 117.9543047	total: 763ms	remaining: 751ms
504:	learn: 117.9129150	total: 764ms	remaining: 749ms
505:	learn: 117.7867333	total: 766ms	remaining: 747ms
506:	learn: 117.7564088	tota

737:	learn: 103.8143656	total: 1.12s	remaining: 398ms
738:	learn: 103.7380709	total: 1.12s	remaining: 397ms
739:	learn: 103.7055969	total: 1.12s	remaining: 395ms
740:	learn: 103.6775833	total: 1.13s	remaining: 394ms
741:	learn: 103.5930627	total: 1.13s	remaining: 392ms
742:	learn: 103.5288711	total: 1.13s	remaining: 391ms
743:	learn: 103.4773031	total: 1.13s	remaining: 389ms
744:	learn: 103.4156211	total: 1.13s	remaining: 388ms
745:	learn: 103.2953721	total: 1.13s	remaining: 386ms
746:	learn: 103.2426301	total: 1.14s	remaining: 385ms
747:	learn: 103.2117706	total: 1.14s	remaining: 383ms
748:	learn: 103.1824879	total: 1.14s	remaining: 382ms
749:	learn: 103.1285792	total: 1.14s	remaining: 380ms
750:	learn: 103.0844523	total: 1.14s	remaining: 379ms
751:	learn: 103.0363573	total: 1.14s	remaining: 377ms
752:	learn: 102.9933813	total: 1.14s	remaining: 375ms
753:	learn: 102.9435423	total: 1.15s	remaining: 374ms
754:	learn: 102.8735430	total: 1.15s	remaining: 372ms
755:	learn: 102.8356979	tota

982:	learn: 91.8094817	total: 2.93s	remaining: 50.6ms
983:	learn: 91.7678950	total: 2.93s	remaining: 47.6ms
984:	learn: 91.7007849	total: 2.93s	remaining: 44.6ms
985:	learn: 91.6402468	total: 2.93s	remaining: 41.6ms
986:	learn: 91.5963595	total: 2.93s	remaining: 38.6ms
987:	learn: 91.5555213	total: 2.94s	remaining: 35.7ms
988:	learn: 91.4542167	total: 2.94s	remaining: 32.7ms
989:	learn: 91.4414816	total: 2.94s	remaining: 29.7ms
990:	learn: 91.4249448	total: 2.94s	remaining: 26.7ms
991:	learn: 91.3712018	total: 2.94s	remaining: 23.7ms
992:	learn: 91.3226594	total: 2.94s	remaining: 20.7ms
993:	learn: 91.2989211	total: 2.94s	remaining: 17.8ms
994:	learn: 91.2369116	total: 2.95s	remaining: 14.8ms
995:	learn: 91.2085762	total: 2.95s	remaining: 11.8ms
996:	learn: 91.1731265	total: 2.95s	remaining: 8.87ms
997:	learn: 91.1481787	total: 2.95s	remaining: 5.91ms
998:	learn: 91.1236479	total: 2.95s	remaining: 2.96ms
999:	learn: 91.0730746	total: 2.95s	remaining: 0us


In [26]:
model

GridSearchCV(estimator=<catboost.core.CatBoostRegressor object at 0x000001492B4D59D0>,
             n_jobs=6,
             param_grid={'learning_rate': [0.01, 0.03, 0.1, None],
                         'max_depth': [4, 6, 8, 10],
                         'n_estimators': [1000, 1500, 2000],
                         'random_strength': [1, 1.2, 1.5, 2, 4]},
             verbose=10)

In [31]:
params = model.best_params_
params

{'learning_rate': 0.01,
 'max_depth': 6,
 'n_estimators': 1000,
 'random_strength': 2}

In [28]:
model.best_estimator_

<catboost.core.CatBoostRegressor at 0x1492dfb7220>

In [43]:
new_model = CatBoostRegressor(
   learning_rate=0.01, max_depth=6, n_estimators=1000, random_strength=2
)
new_model = new_model.fit(X_train, y_train)
y_pred = new_model.predict(X_test)

0:	learn: 422.2660997	total: 2.1ms	remaining: 2.1s
1:	learn: 419.5266003	total: 4.05ms	remaining: 2.02s
2:	learn: 416.9758172	total: 6.06ms	remaining: 2.01s
3:	learn: 414.6512255	total: 7.58ms	remaining: 1.89s
4:	learn: 411.8653318	total: 8.8ms	remaining: 1.75s
5:	learn: 409.3881384	total: 10ms	remaining: 1.66s
6:	learn: 407.0937763	total: 11.3ms	remaining: 1.6s
7:	learn: 404.3043907	total: 13ms	remaining: 1.61s
8:	learn: 401.6222831	total: 14.5ms	remaining: 1.6s
9:	learn: 399.0184657	total: 15.8ms	remaining: 1.56s
10:	learn: 396.4846530	total: 17.2ms	remaining: 1.55s
11:	learn: 393.9015117	total: 18.4ms	remaining: 1.52s
12:	learn: 391.5338898	total: 19.6ms	remaining: 1.49s
13:	learn: 389.0384064	total: 20.9ms	remaining: 1.47s
14:	learn: 386.6649369	total: 22.1ms	remaining: 1.45s
15:	learn: 384.1456096	total: 23.3ms	remaining: 1.44s
16:	learn: 382.0476546	total: 24.6ms	remaining: 1.42s
17:	learn: 379.7535729	total: 26ms	remaining: 1.42s
18:	learn: 377.5075141	total: 27.6ms	remaining: 1

277:	learn: 145.9986470	total: 370ms	remaining: 962ms
278:	learn: 145.7757798	total: 372ms	remaining: 961ms
279:	learn: 145.4865175	total: 373ms	remaining: 960ms
280:	learn: 145.2756775	total: 375ms	remaining: 958ms
281:	learn: 145.0466211	total: 376ms	remaining: 957ms
282:	learn: 144.8343495	total: 377ms	remaining: 956ms
283:	learn: 144.6001497	total: 379ms	remaining: 954ms
284:	learn: 144.4120801	total: 380ms	remaining: 953ms
285:	learn: 144.1873491	total: 381ms	remaining: 952ms
286:	learn: 143.9952869	total: 383ms	remaining: 951ms
287:	learn: 143.6205011	total: 384ms	remaining: 949ms
288:	learn: 143.4221509	total: 385ms	remaining: 948ms
289:	learn: 143.2037890	total: 387ms	remaining: 947ms
290:	learn: 142.9575874	total: 388ms	remaining: 945ms
291:	learn: 142.6812464	total: 389ms	remaining: 943ms
292:	learn: 142.5062671	total: 390ms	remaining: 942ms
293:	learn: 142.2456783	total: 392ms	remaining: 940ms
294:	learn: 142.0448430	total: 393ms	remaining: 939ms
295:	learn: 141.8724784	tota

559:	learn: 114.5091607	total: 741ms	remaining: 582ms
560:	learn: 114.4117612	total: 743ms	remaining: 581ms
561:	learn: 114.3279090	total: 744ms	remaining: 580ms
562:	learn: 114.2974488	total: 745ms	remaining: 579ms
563:	learn: 114.1950679	total: 747ms	remaining: 577ms
564:	learn: 114.1139049	total: 748ms	remaining: 576ms
565:	learn: 114.0682170	total: 749ms	remaining: 575ms
566:	learn: 113.9701771	total: 751ms	remaining: 573ms
567:	learn: 113.8516418	total: 752ms	remaining: 572ms
568:	learn: 113.8007580	total: 753ms	remaining: 571ms
569:	learn: 113.7634611	total: 755ms	remaining: 569ms
570:	learn: 113.6995878	total: 756ms	remaining: 568ms
571:	learn: 113.6451220	total: 757ms	remaining: 567ms
572:	learn: 113.5777449	total: 759ms	remaining: 565ms
573:	learn: 113.5184802	total: 760ms	remaining: 564ms
574:	learn: 113.4840435	total: 761ms	remaining: 563ms
575:	learn: 113.4519343	total: 762ms	remaining: 561ms
576:	learn: 113.4134107	total: 764ms	remaining: 560ms
577:	learn: 113.3508477	tota

712:	learn: 105.9767560	total: 938ms	remaining: 378ms
713:	learn: 105.9152717	total: 940ms	remaining: 377ms
714:	learn: 105.8272125	total: 941ms	remaining: 375ms
715:	learn: 105.7466058	total: 943ms	remaining: 374ms
716:	learn: 105.6941115	total: 944ms	remaining: 373ms
717:	learn: 105.6501418	total: 945ms	remaining: 371ms
718:	learn: 105.6220403	total: 947ms	remaining: 370ms
719:	learn: 105.5523734	total: 948ms	remaining: 369ms
720:	learn: 105.4976136	total: 950ms	remaining: 368ms
721:	learn: 105.4065335	total: 951ms	remaining: 366ms
722:	learn: 105.3878117	total: 953ms	remaining: 365ms
723:	learn: 105.3343675	total: 955ms	remaining: 364ms
724:	learn: 105.2884928	total: 956ms	remaining: 363ms
725:	learn: 105.1849632	total: 957ms	remaining: 361ms
726:	learn: 105.1315986	total: 959ms	remaining: 360ms
727:	learn: 105.0740633	total: 960ms	remaining: 359ms
728:	learn: 104.9927523	total: 961ms	remaining: 357ms
729:	learn: 104.9470920	total: 963ms	remaining: 356ms
730:	learn: 104.8931749	tota

988:	learn: 91.5878602	total: 1.33s	remaining: 14.8ms
989:	learn: 91.5715284	total: 1.34s	remaining: 13.6ms
990:	learn: 91.5135056	total: 1.36s	remaining: 12.3ms
991:	learn: 91.4661695	total: 1.37s	remaining: 11.1ms
992:	learn: 91.4469235	total: 1.39s	remaining: 9.78ms
993:	learn: 91.3845977	total: 1.4s	remaining: 8.46ms
994:	learn: 91.3349210	total: 1.42s	remaining: 7.12ms
995:	learn: 91.3102104	total: 1.43s	remaining: 5.75ms
996:	learn: 91.2691586	total: 1.45s	remaining: 4.35ms
997:	learn: 91.2280443	total: 1.46s	remaining: 2.92ms
998:	learn: 91.1889204	total: 1.47s	remaining: 1.48ms
999:	learn: 91.1078299	total: 1.49s	remaining: 0us


In [44]:
print("\tExplained variance:", explained_variance_score(y_test, y_pred))
print("\tMean absolute error:", mean_absolute_error(y_test, y_pred))
print("\tR2 score:", r2_score(y_test, y_pred))
print()

	Explained variance: 0.931080804891035
	Mean absolute error: 76.57144648162803
	R2 score: 0.9310625839808818



In [45]:
scores_df = pd.DataFrame(grid.cv_results_)
scores_df

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_learning_rate,param_max_depth,param_n_estimators,param_random_strength,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,split4_test_score,mean_test_score,std_test_score,rank_test_score
0,12.432544,0.108272,0.018810,0.011679,0.01,4,1000,1,"{'learning_rate': 0.01, 'max_depth': 4, 'n_est...",0.280511,0.861714,0.686899,0.766709,0.725451,0.664257,0.200501,40
1,13.936650,1.277769,0.031259,0.000012,0.01,4,1000,1.2,"{'learning_rate': 0.01, 'max_depth': 4, 'n_est...",0.289619,0.859776,0.687377,0.772512,0.719704,0.665798,0.196925,32
2,14.728018,0.951881,0.025002,0.012501,0.01,4,1000,1.5,"{'learning_rate': 0.01, 'max_depth': 4, 'n_est...",0.295861,0.856272,0.692892,0.775840,0.707161,0.665605,0.193746,34
3,14.014475,0.783011,0.020350,0.009327,0.01,4,1000,2,"{'learning_rate': 0.01, 'max_depth': 4, 'n_est...",0.326573,0.853203,0.685122,0.780834,0.713531,0.671853,0.182152,15
4,12.218783,1.199947,0.021828,0.010590,0.01,4,1000,4,"{'learning_rate': 0.01, 'max_depth': 4, 'n_est...",0.363784,0.845121,0.682998,0.782245,0.703707,0.675571,0.166244,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235,140.862721,4.108252,0.025006,0.007281,,10,2000,1,"{'learning_rate': None, 'max_depth': 10, 'n_es...",0.643536,0.785456,0.689632,0.629569,0.392550,0.628149,0.129824,207
236,138.644062,3.331646,0.025820,0.013669,,10,2000,1.2,"{'learning_rate': None, 'max_depth': 10, 'n_es...",0.644854,0.783488,0.678432,0.662838,0.377862,0.629495,0.134741,203
237,139.452748,3.344479,0.021953,0.005044,,10,2000,1.5,"{'learning_rate': None, 'max_depth': 10, 'n_es...",0.621353,0.775353,0.692167,0.672767,0.437443,0.639817,0.112688,169
238,141.571976,3.159019,0.025340,0.005378,,10,2000,2,"{'learning_rate': None, 'max_depth': 10, 'n_es...",0.632886,0.773084,0.682834,0.674870,0.439973,0.640730,0.110291,159


In [46]:
import joblib

joblib.dump(new_model, 'Catboost_GridSearchCV_model.pkl')

['Catboost_GridSearchCV_model.pkl']