In [1]:
#!pip install catboost

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns
import time
from catboost import Pool
from catboost import CatBoostRegressor
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [3]:
test = pd.read_csv('./data/test.csv')

In [4]:
df = pd.read_pickle('./data/data_3.pkl')

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8029111 entries, 827196 to 8856306
Data columns (total 41 columns):
 #   Column                       Dtype  
---  ------                       -----  
 0   month_id                     int8   
 1   shop_id                      int8   
 2   item_id                      int16  
 3   cnt_month                    float32
 4   city_id                      int8   
 5   cat_id                       int8   
 6   main_cat                     int8   
 7   sub_cat                      int8   
 8   year                         int16  
 9   month                        int8   
 10  sales_month                  int8   
 11  cnt_month_1                  float32
 12  cnt_month_2                  float32
 13  cnt_month_3                  float32
 14  cnt_month_6                  float32
 15  cnt_month_12                 float32
 16  item_id_mean_cnt_1           float32
 17  item_id_mean_cnt_2           float32
 18  item_id_mean_cnt_3           float32


In [6]:
df.head()

Unnamed: 0,month_id,shop_id,item_id,cnt_month,city_id,cat_id,main_cat,sub_cat,year,month,...,item_id/city_id_mean_cnt_1,item_id/city_id_mean_cnt_2,item_id/city_id_mean_cnt_3,item_id/city_id_mean_cnt_6,item_id/city_id_mean_cnt_12,shop_id/cat_id_mean_cnt_1,shop_id/cat_id_mean_cnt_2,shop_id/cat_id_mean_cnt_3,shop_id/cat_id_mean_cnt_6,shop_id/cat_id_mean_cnt_12
827196,3,25,8092,3.0,13,40,11,6,2013,4,...,1.0,2.142857,0.0,0.0,0.0,1.087262,1.049296,0.0,0.0,0.0
827197,3,25,7850,3.0,13,30,8,54,2013,4,...,3.714286,2.285714,0.0,0.0,0.0,3.654494,2.279188,0.0,0.0,0.0
827198,3,25,8051,3.0,13,66,14,47,2013,4,...,0.0,0.428571,0.0,0.0,0.0,1.827586,1.389474,0.0,0.0,0.0
827199,3,25,8088,1.0,13,55,13,4,2013,4,...,0.285714,0.0,0.0,0.0,0.0,0.599845,0.562107,0.0,0.0,0.0
827200,3,25,8089,1.0,13,55,13,4,2013,4,...,0.571429,0.0,0.0,0.0,0.0,0.599845,0.562107,0.0,0.0,0.0


In [7]:
df.columns

Index(['month_id', 'shop_id', 'item_id', 'cnt_month', 'city_id', 'cat_id',
       'main_cat', 'sub_cat', 'year', 'month', 'sales_month', 'cnt_month_1',
       'cnt_month_2', 'cnt_month_3', 'cnt_month_6', 'cnt_month_12',
       'item_id_mean_cnt_1', 'item_id_mean_cnt_2', 'item_id_mean_cnt_3',
       'item_id_mean_cnt_6', 'item_id_mean_cnt_12', 'sub_cat_mean_cnt_1',
       'sub_cat_mean_cnt_2', 'sub_cat_mean_cnt_3', 'sub_cat_mean_cnt_6',
       'sub_cat_mean_cnt_12', 'price_mean_mean_cnt_1', 'price_mean_mean_cnt_2',
       'price_mean_mean_cnt_3', 'price_mean_mean_cnt_6',
       'price_mean_mean_cnt_12', 'item_id/city_id_mean_cnt_1',
       'item_id/city_id_mean_cnt_2', 'item_id/city_id_mean_cnt_3',
       'item_id/city_id_mean_cnt_6', 'item_id/city_id_mean_cnt_12',
       'shop_id/cat_id_mean_cnt_1', 'shop_id/cat_id_mean_cnt_2',
       'shop_id/cat_id_mean_cnt_3', 'shop_id/cat_id_mean_cnt_6',
       'shop_id/cat_id_mean_cnt_12'],
      dtype='object')

In [8]:
s = ['cnt_month','cnt_month_1','cnt_month_2', 'cnt_month_3', 'cnt_month_6', 'cnt_month_12',
     'item_id_mean_cnt_1', 'item_id_mean_cnt_2', 'item_id_mean_cnt_3', 'item_id_mean_cnt_6', 'item_id_mean_cnt_12', 
     'sub_cat_mean_cnt_1', 'sub_cat_mean_cnt_2', 'sub_cat_mean_cnt_3', 'sub_cat_mean_cnt_6', 'sub_cat_mean_cnt_12', 
     'price_mean_mean_cnt_1', 'price_mean_mean_cnt_2','price_mean_mean_cnt_3', 'price_mean_mean_cnt_6','price_mean_mean_cnt_12',
     'item_id/city_id_mean_cnt_1', 'item_id/city_id_mean_cnt_2', 'item_id/city_id_mean_cnt_3','item_id/city_id_mean_cnt_6', 'item_id/city_id_mean_cnt_12',
     'shop_id/cat_id_mean_cnt_1', 'shop_id/cat_id_mean_cnt_2','shop_id/cat_id_mean_cnt_3', 'shop_id/cat_id_mean_cnt_6','shop_id/cat_id_mean_cnt_12']

In [9]:
# value clip(0~20)
df[s] = df[s].clip(0, 20)

In [10]:
# train/validation/test data split
X_train = df[df['month_id'] < 33].drop(['cnt_month'], axis=1)
Y_train = df[df['month_id'] < 33]['cnt_month']

X_valid = df[df['month_id'] == 33].drop(['cnt_month'], axis=1)
Y_valid = df[df['month_id'] == 33]['cnt_month']

X_test = df[df['month_id'] == 34].drop(['cnt_month'], axis=1)

In [11]:
model = CatBoostRegressor()

In [12]:
ts = time.time()

model.fit(
    X_train, Y_train, 
    eval_set=[(X_train, Y_train), (X_valid, Y_valid)],  # 유효성 검사 데이터 세트
    )

print('소요시간: ',time.time()-ts)

Learning rate set to 0.207073
0:	learn: 1.1471117	test: 1.1471117	test1: 1.0152585	best: 1.0152585 (0)	total: 1.16s	remaining: 19m 18s
1:	learn: 1.0866624	test: 1.0866624	test1: 0.9702679	best: 0.9702679 (1)	total: 2.2s	remaining: 18m 19s
2:	learn: 1.0456200	test: 1.0456200	test1: 0.9386596	best: 0.9386596 (2)	total: 3.29s	remaining: 18m 14s
3:	learn: 1.0161887	test: 1.0161887	test1: 0.9207340	best: 0.9207340 (3)	total: 4.57s	remaining: 18m 59s
4:	learn: 0.9912736	test: 0.9912736	test1: 0.9017455	best: 0.9017455 (4)	total: 5.9s	remaining: 19m 34s
5:	learn: 0.9757091	test: 0.9757091	test1: 0.8930655	best: 0.8930655 (5)	total: 7.16s	remaining: 19m 45s
6:	learn: 0.9614103	test: 0.9614103	test1: 0.8824695	best: 0.8824695 (6)	total: 8.41s	remaining: 19m 52s
7:	learn: 0.9527902	test: 0.9527902	test1: 0.8772420	best: 0.8772420 (7)	total: 9.53s	remaining: 19m 41s
8:	learn: 0.9461487	test: 0.9461487	test1: 0.8718173	best: 0.8718173 (8)	total: 10.6s	remaining: 19m 23s
9:	learn: 0.9412066	test: 0

77:	learn: 0.8781333	test: 0.8781333	test1: 0.8195691	best: 0.8195691 (77)	total: 1m 37s	remaining: 19m 8s
78:	learn: 0.8779391	test: 0.8779391	test1: 0.8192753	best: 0.8192753 (78)	total: 1m 38s	remaining: 19m 5s
79:	learn: 0.8776764	test: 0.8776764	test1: 0.8192791	best: 0.8192753 (78)	total: 1m 39s	remaining: 19m 1s
80:	learn: 0.8774030	test: 0.8774030	test1: 0.8189159	best: 0.8189159 (80)	total: 1m 40s	remaining: 19m
81:	learn: 0.8772407	test: 0.8772407	test1: 0.8188021	best: 0.8188021 (81)	total: 1m 41s	remaining: 18m 56s
82:	learn: 0.8770024	test: 0.8770024	test1: 0.8189936	best: 0.8188021 (81)	total: 1m 42s	remaining: 18m 53s
83:	learn: 0.8767781	test: 0.8767781	test1: 0.8189216	best: 0.8188021 (81)	total: 1m 43s	remaining: 18m 49s
84:	learn: 0.8764075	test: 0.8764075	test1: 0.8186791	best: 0.8186791 (84)	total: 1m 44s	remaining: 18m 47s
85:	learn: 0.8758768	test: 0.8758768	test1: 0.8183862	best: 0.8183862 (85)	total: 1m 45s	remaining: 18m 45s
86:	learn: 0.8755392	test: 0.875539

153:	learn: 0.8588628	test: 0.8588628	test1: 0.8100287	best: 0.8090204 (143)	total: 3m 8s	remaining: 17m 17s
154:	learn: 0.8587516	test: 0.8587516	test1: 0.8101709	best: 0.8090204 (143)	total: 3m 9s	remaining: 17m 15s
155:	learn: 0.8585197	test: 0.8585197	test1: 0.8102005	best: 0.8090204 (143)	total: 3m 11s	remaining: 17m 13s
156:	learn: 0.8583671	test: 0.8583671	test1: 0.8100142	best: 0.8090204 (143)	total: 3m 12s	remaining: 17m 12s
157:	learn: 0.8582893	test: 0.8582893	test1: 0.8098904	best: 0.8090204 (143)	total: 3m 13s	remaining: 17m 11s
158:	learn: 0.8578725	test: 0.8578725	test1: 0.8094025	best: 0.8090204 (143)	total: 3m 14s	remaining: 17m 10s
159:	learn: 0.8577686	test: 0.8577686	test1: 0.8093450	best: 0.8090204 (143)	total: 3m 15s	remaining: 17m 8s
160:	learn: 0.8576593	test: 0.8576593	test1: 0.8094024	best: 0.8090204 (143)	total: 3m 17s	remaining: 17m 8s
161:	learn: 0.8574940	test: 0.8574940	test1: 0.8094610	best: 0.8090204 (143)	total: 3m 18s	remaining: 17m 6s
162:	learn: 0.8

228:	learn: 0.8468689	test: 0.8468689	test1: 0.8104007	best: 0.8090204 (143)	total: 4m 41s	remaining: 15m 47s
229:	learn: 0.8466008	test: 0.8466008	test1: 0.8101002	best: 0.8090204 (143)	total: 4m 42s	remaining: 15m 47s
230:	learn: 0.8464541	test: 0.8464541	test1: 0.8102583	best: 0.8090204 (143)	total: 4m 44s	remaining: 15m 45s
231:	learn: 0.8463480	test: 0.8463480	test1: 0.8102178	best: 0.8090204 (143)	total: 4m 45s	remaining: 15m 44s
232:	learn: 0.8462225	test: 0.8462225	test1: 0.8102407	best: 0.8090204 (143)	total: 4m 46s	remaining: 15m 43s
233:	learn: 0.8460842	test: 0.8460842	test1: 0.8099741	best: 0.8090204 (143)	total: 4m 48s	remaining: 15m 42s
234:	learn: 0.8459524	test: 0.8459524	test1: 0.8098992	best: 0.8090204 (143)	total: 4m 49s	remaining: 15m 41s
235:	learn: 0.8458244	test: 0.8458244	test1: 0.8098821	best: 0.8090204 (143)	total: 4m 50s	remaining: 15m 40s
236:	learn: 0.8457234	test: 0.8457234	test1: 0.8099090	best: 0.8090204 (143)	total: 4m 51s	remaining: 15m 38s
237:	learn

303:	learn: 0.8382034	test: 0.8382034	test1: 0.8105982	best: 0.8085790 (277)	total: 6m 14s	remaining: 14m 17s
304:	learn: 0.8381606	test: 0.8381606	test1: 0.8105745	best: 0.8085790 (277)	total: 6m 15s	remaining: 14m 16s
305:	learn: 0.8380743	test: 0.8380743	test1: 0.8105866	best: 0.8085790 (277)	total: 6m 17s	remaining: 14m 15s
306:	learn: 0.8379810	test: 0.8379810	test1: 0.8105935	best: 0.8085790 (277)	total: 6m 18s	remaining: 14m 14s
307:	learn: 0.8379129	test: 0.8379129	test1: 0.8104504	best: 0.8085790 (277)	total: 6m 19s	remaining: 14m 12s
308:	learn: 0.8378462	test: 0.8378462	test1: 0.8104168	best: 0.8085790 (277)	total: 6m 20s	remaining: 14m 10s
309:	learn: 0.8377886	test: 0.8377886	test1: 0.8103844	best: 0.8085790 (277)	total: 6m 21s	remaining: 14m 9s
310:	learn: 0.8376493	test: 0.8376493	test1: 0.8101356	best: 0.8085790 (277)	total: 6m 22s	remaining: 14m 8s
311:	learn: 0.8375674	test: 0.8375674	test1: 0.8108486	best: 0.8085790 (277)	total: 6m 24s	remaining: 14m 7s
312:	learn: 0

378:	learn: 0.8325474	test: 0.8325474	test1: 0.8107646	best: 0.8085790 (277)	total: 7m 45s	remaining: 12m 42s
379:	learn: 0.8325015	test: 0.8325015	test1: 0.8107587	best: 0.8085790 (277)	total: 7m 46s	remaining: 12m 41s
380:	learn: 0.8324706	test: 0.8324706	test1: 0.8107110	best: 0.8085790 (277)	total: 7m 47s	remaining: 12m 40s
381:	learn: 0.8324010	test: 0.8324010	test1: 0.8107145	best: 0.8085790 (277)	total: 7m 48s	remaining: 12m 38s
382:	learn: 0.8323489	test: 0.8323489	test1: 0.8106817	best: 0.8085790 (277)	total: 7m 49s	remaining: 12m 37s
383:	learn: 0.8322858	test: 0.8322858	test1: 0.8106625	best: 0.8085790 (277)	total: 7m 51s	remaining: 12m 35s
384:	learn: 0.8322061	test: 0.8322061	test1: 0.8106317	best: 0.8085790 (277)	total: 7m 52s	remaining: 12m 34s
385:	learn: 0.8321130	test: 0.8321130	test1: 0.8106521	best: 0.8085790 (277)	total: 7m 53s	remaining: 12m 33s
386:	learn: 0.8320553	test: 0.8320553	test1: 0.8106719	best: 0.8085790 (277)	total: 7m 54s	remaining: 12m 31s
387:	learn

453:	learn: 0.8261811	test: 0.8261811	test1: 0.8085035	best: 0.8085035 (453)	total: 9m 16s	remaining: 11m 8s
454:	learn: 0.8260718	test: 0.8260718	test1: 0.8084475	best: 0.8084475 (454)	total: 9m 17s	remaining: 11m 7s
455:	learn: 0.8260238	test: 0.8260238	test1: 0.8085196	best: 0.8084475 (454)	total: 9m 18s	remaining: 11m 6s
456:	learn: 0.8259821	test: 0.8259821	test1: 0.8084971	best: 0.8084475 (454)	total: 9m 19s	remaining: 11m 4s
457:	learn: 0.8259258	test: 0.8259258	test1: 0.8084637	best: 0.8084475 (454)	total: 9m 20s	remaining: 11m 3s
458:	learn: 0.8258709	test: 0.8258709	test1: 0.8084096	best: 0.8084096 (458)	total: 9m 22s	remaining: 11m 2s
459:	learn: 0.8257676	test: 0.8257676	test1: 0.8084713	best: 0.8084096 (458)	total: 9m 23s	remaining: 11m 1s
460:	learn: 0.8257140	test: 0.8257140	test1: 0.8087433	best: 0.8084096 (458)	total: 9m 24s	remaining: 11m
461:	learn: 0.8255827	test: 0.8255827	test1: 0.8087219	best: 0.8084096 (458)	total: 9m 25s	remaining: 10m 58s
462:	learn: 0.8255176

528:	learn: 0.8206383	test: 0.8206383	test1: 0.8143810	best: 0.8084096 (458)	total: 10m 51s	remaining: 9m 40s
529:	learn: 0.8205965	test: 0.8205965	test1: 0.8144710	best: 0.8084096 (458)	total: 10m 53s	remaining: 9m 39s
530:	learn: 0.8205623	test: 0.8205623	test1: 0.8144348	best: 0.8084096 (458)	total: 10m 54s	remaining: 9m 37s
531:	learn: 0.8205380	test: 0.8205380	test1: 0.8144274	best: 0.8084096 (458)	total: 10m 55s	remaining: 9m 36s
532:	learn: 0.8204907	test: 0.8204907	test1: 0.8144054	best: 0.8084096 (458)	total: 10m 56s	remaining: 9m 35s
533:	learn: 0.8204581	test: 0.8204581	test1: 0.8143391	best: 0.8084096 (458)	total: 10m 57s	remaining: 9m 34s
534:	learn: 0.8204009	test: 0.8204009	test1: 0.8143501	best: 0.8084096 (458)	total: 10m 59s	remaining: 9m 32s
535:	learn: 0.8202977	test: 0.8202977	test1: 0.8149746	best: 0.8084096 (458)	total: 11m	remaining: 9m 31s
536:	learn: 0.8201634	test: 0.8201634	test1: 0.8149466	best: 0.8084096 (458)	total: 11m 1s	remaining: 9m 30s
537:	learn: 0.8

603:	learn: 0.8162821	test: 0.8162821	test1: 0.8167704	best: 0.8084096 (458)	total: 12m 22s	remaining: 8m 6s
604:	learn: 0.8162433	test: 0.8162433	test1: 0.8167880	best: 0.8084096 (458)	total: 12m 23s	remaining: 8m 5s
605:	learn: 0.8162168	test: 0.8162168	test1: 0.8167794	best: 0.8084096 (458)	total: 12m 24s	remaining: 8m 4s
606:	learn: 0.8161704	test: 0.8161704	test1: 0.8168586	best: 0.8084096 (458)	total: 12m 25s	remaining: 8m 2s
607:	learn: 0.8161088	test: 0.8161088	test1: 0.8168534	best: 0.8084096 (458)	total: 12m 26s	remaining: 8m 1s
608:	learn: 0.8160650	test: 0.8160650	test1: 0.8168420	best: 0.8084096 (458)	total: 12m 27s	remaining: 8m
609:	learn: 0.8160410	test: 0.8160410	test1: 0.8168714	best: 0.8084096 (458)	total: 12m 28s	remaining: 7m 58s
610:	learn: 0.8159468	test: 0.8159468	test1: 0.8168948	best: 0.8084096 (458)	total: 12m 29s	remaining: 7m 57s
611:	learn: 0.8159184	test: 0.8159184	test1: 0.8168918	best: 0.8084096 (458)	total: 12m 30s	remaining: 7m 56s
612:	learn: 0.81587

678:	learn: 0.8120911	test: 0.8120911	test1: 0.8167765	best: 0.8084096 (458)	total: 13m 51s	remaining: 6m 33s
679:	learn: 0.8120349	test: 0.8120349	test1: 0.8168214	best: 0.8084096 (458)	total: 13m 52s	remaining: 6m 31s
680:	learn: 0.8120087	test: 0.8120087	test1: 0.8168304	best: 0.8084096 (458)	total: 13m 53s	remaining: 6m 30s
681:	learn: 0.8119877	test: 0.8119877	test1: 0.8168134	best: 0.8084096 (458)	total: 13m 54s	remaining: 6m 29s
682:	learn: 0.8119200	test: 0.8119200	test1: 0.8166146	best: 0.8084096 (458)	total: 13m 56s	remaining: 6m 28s
683:	learn: 0.8118767	test: 0.8118767	test1: 0.8165813	best: 0.8084096 (458)	total: 13m 57s	remaining: 6m 26s
684:	learn: 0.8117096	test: 0.8117096	test1: 0.8169864	best: 0.8084096 (458)	total: 13m 58s	remaining: 6m 25s
685:	learn: 0.8116587	test: 0.8116587	test1: 0.8168841	best: 0.8084096 (458)	total: 13m 59s	remaining: 6m 24s
686:	learn: 0.8116078	test: 0.8116078	test1: 0.8168860	best: 0.8084096 (458)	total: 14m 1s	remaining: 6m 23s
687:	learn:

753:	learn: 0.8065725	test: 0.8065725	test1: 0.8240920	best: 0.8084096 (458)	total: 15m 24s	remaining: 5m 1s
754:	learn: 0.8062893	test: 0.8062893	test1: 0.8239026	best: 0.8084096 (458)	total: 15m 25s	remaining: 5m
755:	learn: 0.8062742	test: 0.8062742	test1: 0.8239737	best: 0.8084096 (458)	total: 15m 26s	remaining: 4m 59s
756:	learn: 0.8062446	test: 0.8062446	test1: 0.8240088	best: 0.8084096 (458)	total: 15m 28s	remaining: 4m 57s
757:	learn: 0.8062039	test: 0.8062039	test1: 0.8240184	best: 0.8084096 (458)	total: 15m 29s	remaining: 4m 56s
758:	learn: 0.8061713	test: 0.8061713	test1: 0.8240002	best: 0.8084096 (458)	total: 15m 30s	remaining: 4m 55s
759:	learn: 0.8060911	test: 0.8060911	test1: 0.8239829	best: 0.8084096 (458)	total: 15m 31s	remaining: 4m 54s
760:	learn: 0.8060553	test: 0.8060553	test1: 0.8239417	best: 0.8084096 (458)	total: 15m 32s	remaining: 4m 52s
761:	learn: 0.8060350	test: 0.8060350	test1: 0.8239126	best: 0.8084096 (458)	total: 15m 34s	remaining: 4m 51s
762:	learn: 0.8

828:	learn: 0.8022672	test: 0.8022672	test1: 0.8243791	best: 0.8084096 (458)	total: 16m 55s	remaining: 3m 29s
829:	learn: 0.8022348	test: 0.8022348	test1: 0.8243638	best: 0.8084096 (458)	total: 16m 56s	remaining: 3m 28s
830:	learn: 0.8022002	test: 0.8022002	test1: 0.8243565	best: 0.8084096 (458)	total: 16m 57s	remaining: 3m 27s
831:	learn: 0.8021070	test: 0.8021070	test1: 0.8243430	best: 0.8084096 (458)	total: 16m 59s	remaining: 3m 25s
832:	learn: 0.8020653	test: 0.8020653	test1: 0.8242602	best: 0.8084096 (458)	total: 17m	remaining: 3m 24s
833:	learn: 0.8020431	test: 0.8020431	test1: 0.8242663	best: 0.8084096 (458)	total: 17m 1s	remaining: 3m 23s
834:	learn: 0.8020157	test: 0.8020157	test1: 0.8242800	best: 0.8084096 (458)	total: 17m 2s	remaining: 3m 22s
835:	learn: 0.8019912	test: 0.8019912	test1: 0.8242630	best: 0.8084096 (458)	total: 17m 4s	remaining: 3m 20s
836:	learn: 0.8019333	test: 0.8019333	test1: 0.8249098	best: 0.8084096 (458)	total: 17m 5s	remaining: 3m 19s
837:	learn: 0.8016

903:	learn: 0.7985399	test: 0.7985399	test1: 0.8266097	best: 0.8084096 (458)	total: 18m 25s	remaining: 1m 57s
904:	learn: 0.7985176	test: 0.7985176	test1: 0.8266711	best: 0.8084096 (458)	total: 18m 27s	remaining: 1m 56s
905:	learn: 0.7984827	test: 0.7984827	test1: 0.8266757	best: 0.8084096 (458)	total: 18m 28s	remaining: 1m 54s
906:	learn: 0.7984277	test: 0.7984277	test1: 0.8265831	best: 0.8084096 (458)	total: 18m 29s	remaining: 1m 53s
907:	learn: 0.7984090	test: 0.7984090	test1: 0.8265732	best: 0.8084096 (458)	total: 18m 30s	remaining: 1m 52s
908:	learn: 0.7983843	test: 0.7983843	test1: 0.8265721	best: 0.8084096 (458)	total: 18m 31s	remaining: 1m 51s
909:	learn: 0.7983495	test: 0.7983495	test1: 0.8266656	best: 0.8084096 (458)	total: 18m 32s	remaining: 1m 50s
910:	learn: 0.7983276	test: 0.7983276	test1: 0.8266739	best: 0.8084096 (458)	total: 18m 34s	remaining: 1m 48s
911:	learn: 0.7982988	test: 0.7982988	test1: 0.8266738	best: 0.8084096 (458)	total: 18m 35s	remaining: 1m 47s
912:	learn

978:	learn: 0.7955987	test: 0.7955987	test1: 0.8269038	best: 0.8084096 (458)	total: 19m 57s	remaining: 25.7s
979:	learn: 0.7955496	test: 0.7955496	test1: 0.8269652	best: 0.8084096 (458)	total: 19m 58s	remaining: 24.5s
980:	learn: 0.7955372	test: 0.7955372	test1: 0.8269862	best: 0.8084096 (458)	total: 19m 59s	remaining: 23.2s
981:	learn: 0.7954791	test: 0.7954791	test1: 0.8269844	best: 0.8084096 (458)	total: 20m 1s	remaining: 22s
982:	learn: 0.7954447	test: 0.7954447	test1: 0.8270083	best: 0.8084096 (458)	total: 20m 2s	remaining: 20.8s
983:	learn: 0.7952390	test: 0.7952390	test1: 0.8270681	best: 0.8084096 (458)	total: 20m 3s	remaining: 19.6s
984:	learn: 0.7952080	test: 0.7952080	test1: 0.8270431	best: 0.8084096 (458)	total: 20m 4s	remaining: 18.3s
985:	learn: 0.7951972	test: 0.7951972	test1: 0.8270459	best: 0.8084096 (458)	total: 20m 6s	remaining: 17.1s
986:	learn: 0.7951794	test: 0.7951794	test1: 0.8270026	best: 0.8084096 (458)	total: 20m 7s	remaining: 15.9s
987:	learn: 0.7948792	test:

In [13]:
model.feature_importances_

array([2.33466225e+00, 3.71424383e+00, 4.26210165e+00, 1.58344082e+00,
       1.11080784e+01, 7.94912344e-01, 5.12192955e+00, 6.50866506e-02,
       5.38199587e+00, 1.32489191e+01, 1.94802719e+01, 3.48788710e+00,
       1.23032699e+00, 2.34870404e-01, 2.18394294e-02, 9.74404730e+00,
       9.95907073e-01, 3.73327785e-01, 1.46720778e-01, 4.64580149e-02,
       2.73417963e+00, 6.98071589e-01, 3.55968423e-01, 2.09239078e-01,
       2.01007130e-02, 1.78780328e+00, 6.06280685e-01, 1.86758076e-01,
       9.32738751e-02, 1.00074943e-02, 2.44448450e+00, 9.17196295e-02,
       4.15708077e-02, 1.93526752e-02, 5.11526048e-03, 6.06245261e+00,
       1.03089725e+00, 1.60420299e-01, 2.22323899e-02, 4.30445251e-02])

In [14]:
importance = model.feature_importances_

features = ['month_id', 'shop_id', 'item_id', 'cnt_month', 'city_id', 'cat_id',
            'main_cat', 'sub_cat', 'year', 'month',
            'cnt_month_1','cnt_month_2', 'cnt_month_3', 'cnt_month_6', 'cnt_month_12',
     'item_id_mean_cnt_1', 'item_id_mean_cnt_2', 'item_id_mean_cnt_3', 'item_id_mean_cnt_6', 'item_id_mean_cnt_12', 
     'sub_cat_mean_cnt_1', 'sub_cat_mean_cnt_2', 'sub_cat_mean_cnt_3', 'sub_cat_mean_cnt_6', 'sub_cat_mean_cnt_12', 
     'item_id/city_id_mean_cnt_1', 'item_id/city_id_mean_cnt_2', 'item_id/city_id_mean_cnt_3','item_id/city_id_mean_cnt_6', 'item_id/city_id_mean_cnt_12',
     'shop_id/cat_id_mean_cnt_1', 'shop_id/cat_id_mean_cnt_2','shop_id/cat_id_mean_cnt_3', 'shop_id/cat_id_mean_cnt_6','shop_id/cat_id_mean_cnt_12']

feature_importance = pd.DataFrame({'feature':features,'importance':importance})
feature_importance['ratio'] = feature_importance['importance']/feature_importance['importance'].sum()
feature_importance

ValueError: All arrays must be of the same length

In [None]:
data = feature_importance.sort_values(by='ratio', ascending=False)

plt.figure(figsize=(15,15))
sns.barplot(x=data['ratio'], y=data['feature'])
plt.xlabel('importance ratio')
plt.title('Feature Importance')

In [None]:
Y_pred = model.predict(X_valid)

In [None]:
submission = pd.DataFrame({
    "ID": X_valid.index, 
    "item_cnt_month": Y_pred
})
x = submission['ID'].head(200)
y_pred = submission['item_cnt_month'].head(200)
y_true = Y_valid.head(200)
fig = plt.figure(figsize=(25, 5))
ax = fig.add_subplot(111)
ax.plot(x, y_pred, label = 'predictive value', color = 'red')
ax.plot(x, y_true, label = 'actual value', color = 'green')

ax.legend()

plt.show()

In [None]:
Y_test = model.predict(X_test)

In [None]:
# 제출 파일 생성
submission = pd.DataFrame({
    "ID": test.index, 
    "item_cnt_month": Y_test
})
submission.to_csv('./submission/submission_CatBoost_2022030101.csv', index=False)