In [1]:
#!pip install catboost

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

import seaborn as sns
import time
from catboost import Pool
from catboost import CatBoostRegressor
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [3]:
test = pd.read_csv('./data/test.csv')

In [4]:
df = pd.read_pickle('./data/data_2.pkl')

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8022925 entries, 826485 to 8849409
Data columns (total 41 columns):
 #   Column                       Dtype  
---  ------                       -----  
 0   month_id                     int8   
 1   shop_id                      int8   
 2   item_id                      int16  
 3   cnt_month                    float32
 4   city_id                      int8   
 5   cat_id                       int8   
 6   main_cat                     int8   
 7   sub_cat                      int8   
 8   year                         int16  
 9   month                        int8   
 10  sales_month                  int8   
 11  cnt_month_1                  float32
 12  cnt_month_2                  float32
 13  cnt_month_3                  float32
 14  cnt_month_6                  float32
 15  cnt_month_12                 float32
 16  item_id_mean_cnt_1           float32
 17  item_id_mean_cnt_2           float32
 18  item_id_mean_cnt_3           float32


In [6]:
df.head()

Unnamed: 0,month_id,shop_id,item_id,cnt_month,city_id,cat_id,main_cat,sub_cat,year,month,...,item_id/city_id_mean_cnt_1,item_id/city_id_mean_cnt_2,item_id/city_id_mean_cnt_3,item_id/city_id_mean_cnt_6,item_id/city_id_mean_cnt_12,shop_id/cat_id_mean_cnt_1,shop_id/cat_id_mean_cnt_2,shop_id/cat_id_mean_cnt_3,shop_id/cat_id_mean_cnt_6,shop_id/cat_id_mean_cnt_12
826485,3,25,8092,3.0,13,40,11,6,2013,4,...,1.0,2.142857,0.0,0.0,0.0,1.088162,1.049317,0.0,0.0,0.0
826486,3,25,7850,3.0,13,30,8,54,2013,4,...,3.714286,2.285714,0.0,0.0,0.0,3.672316,2.282443,0.0,0.0,0.0
826487,3,25,8051,3.0,13,66,14,47,2013,4,...,0.0,0.428571,0.0,0.0,0.0,1.827586,1.389474,0.0,0.0,0.0
826488,3,25,8088,1.0,13,55,13,4,2013,4,...,0.285714,0.0,0.0,0.0,0.0,0.598756,0.562549,0.0,0.0,0.0
826489,3,25,8089,1.0,13,55,13,4,2013,4,...,0.571429,0.0,0.0,0.0,0.0,0.598756,0.562549,0.0,0.0,0.0


In [7]:
df.columns

Index(['month_id', 'shop_id', 'item_id', 'cnt_month', 'city_id', 'cat_id',
       'main_cat', 'sub_cat', 'year', 'month', 'sales_month', 'cnt_month_1',
       'cnt_month_2', 'cnt_month_3', 'cnt_month_6', 'cnt_month_12',
       'item_id_mean_cnt_1', 'item_id_mean_cnt_2', 'item_id_mean_cnt_3',
       'item_id_mean_cnt_6', 'item_id_mean_cnt_12', 'sub_cat_mean_cnt_1',
       'sub_cat_mean_cnt_2', 'sub_cat_mean_cnt_3', 'sub_cat_mean_cnt_6',
       'sub_cat_mean_cnt_12', 'price_mean_mean_cnt_1', 'price_mean_mean_cnt_2',
       'price_mean_mean_cnt_3', 'price_mean_mean_cnt_6',
       'price_mean_mean_cnt_12', 'item_id/city_id_mean_cnt_1',
       'item_id/city_id_mean_cnt_2', 'item_id/city_id_mean_cnt_3',
       'item_id/city_id_mean_cnt_6', 'item_id/city_id_mean_cnt_12',
       'shop_id/cat_id_mean_cnt_1', 'shop_id/cat_id_mean_cnt_2',
       'shop_id/cat_id_mean_cnt_3', 'shop_id/cat_id_mean_cnt_6',
       'shop_id/cat_id_mean_cnt_12'],
      dtype='object')

In [8]:
s = ['cnt_month','cnt_month_1','cnt_month_2', 'cnt_month_3', 'cnt_month_6', 'cnt_month_12',
     'item_id_mean_cnt_1', 'item_id_mean_cnt_2', 'item_id_mean_cnt_3', 'item_id_mean_cnt_6', 'item_id_mean_cnt_12', 
     'sub_cat_mean_cnt_1', 'sub_cat_mean_cnt_2', 'sub_cat_mean_cnt_3', 'sub_cat_mean_cnt_6', 'sub_cat_mean_cnt_12', 
     'price_mean_mean_cnt_1', 'price_mean_mean_cnt_2','price_mean_mean_cnt_3', 'price_mean_mean_cnt_6','price_mean_mean_cnt_12',
     'item_id/city_id_mean_cnt_1', 'item_id/city_id_mean_cnt_2', 'item_id/city_id_mean_cnt_3','item_id/city_id_mean_cnt_6', 'item_id/city_id_mean_cnt_12',
     'shop_id/cat_id_mean_cnt_1', 'shop_id/cat_id_mean_cnt_2','shop_id/cat_id_mean_cnt_3', 'shop_id/cat_id_mean_cnt_6','shop_id/cat_id_mean_cnt_12']

In [9]:
# value clip(0~20)
df[s] = df[s].clip(0, 20)

In [10]:
# train/validation/test data split
X_train = df[df['month_id'] < 33].drop(['cnt_month'], axis=1)
Y_train = df[df['month_id'] < 33]['cnt_month']

X_valid = df[df['month_id'] == 33].drop(['cnt_month'], axis=1)
Y_valid = df[df['month_id'] == 33]['cnt_month']

X_test = df[df['month_id'] == 34].drop(['cnt_month'], axis=1)

In [11]:
model = CatBoostRegressor()

In [12]:
ts = time.time()

model.fit(
    X_train, Y_train, 
    eval_set=[(X_train, Y_train), (X_valid, Y_valid)],  # 유효성 검사 데이터 세트
    )

print('소요시간: ',time.time()-ts)

Learning rate set to 0.207046
0:	learn: 1.1452744	test: 1.1452744	test1: 1.0113145	best: 1.0113145 (0)	total: 1.14s	remaining: 19m 2s
1:	learn: 1.0843812	test: 1.0843812	test1: 0.9667822	best: 0.9667822 (1)	total: 2.14s	remaining: 17m 48s
2:	learn: 1.0429334	test: 1.0429334	test1: 0.9376476	best: 0.9376476 (2)	total: 3.17s	remaining: 17m 33s
3:	learn: 1.0112324	test: 1.0112324	test1: 0.9123128	best: 0.9123128 (3)	total: 4.25s	remaining: 17m 39s
4:	learn: 0.9896287	test: 0.9896287	test1: 0.9006498	best: 0.9006498 (4)	total: 5.22s	remaining: 17m 19s
5:	learn: 0.9746630	test: 0.9746630	test1: 0.8916584	best: 0.8916584 (5)	total: 6.25s	remaining: 17m 15s
6:	learn: 0.9622192	test: 0.9622192	test1: 0.8821958	best: 0.8821958 (6)	total: 7.39s	remaining: 17m 28s
7:	learn: 0.9541975	test: 0.9541975	test1: 0.8779316	best: 0.8779316 (7)	total: 8.53s	remaining: 17m 38s
8:	learn: 0.9476041	test: 0.9476041	test1: 0.8743650	best: 0.8743650 (8)	total: 9.64s	remaining: 17m 41s
9:	learn: 0.9396184	test: 

77:	learn: 0.8780848	test: 0.8780848	test1: 0.8207364	best: 0.8202128 (73)	total: 1m 33s	remaining: 18m 20s
78:	learn: 0.8778484	test: 0.8778484	test1: 0.8206725	best: 0.8202128 (73)	total: 1m 34s	remaining: 18m 17s
79:	learn: 0.8776727	test: 0.8776727	test1: 0.8204785	best: 0.8202128 (73)	total: 1m 35s	remaining: 18m 13s
80:	learn: 0.8772680	test: 0.8772680	test1: 0.8207009	best: 0.8202128 (73)	total: 1m 36s	remaining: 18m 16s
81:	learn: 0.8767061	test: 0.8767061	test1: 0.8204248	best: 0.8202128 (73)	total: 1m 38s	remaining: 18m 19s
82:	learn: 0.8765130	test: 0.8765130	test1: 0.8204705	best: 0.8202128 (73)	total: 1m 39s	remaining: 18m 18s
83:	learn: 0.8761751	test: 0.8761751	test1: 0.8205078	best: 0.8202128 (73)	total: 1m 40s	remaining: 18m 16s
84:	learn: 0.8759233	test: 0.8759233	test1: 0.8202902	best: 0.8202128 (73)	total: 1m 41s	remaining: 18m 14s
85:	learn: 0.8757336	test: 0.8757336	test1: 0.8199155	best: 0.8199155 (85)	total: 1m 42s	remaining: 18m 12s
86:	learn: 0.8754661	test: 0

153:	learn: 0.8600928	test: 0.8600928	test1: 0.8156495	best: 0.8153417 (132)	total: 3m 5s	remaining: 16m 57s
154:	learn: 0.8597171	test: 0.8597171	test1: 0.8157090	best: 0.8153417 (132)	total: 3m 6s	remaining: 16m 55s
155:	learn: 0.8595692	test: 0.8595692	test1: 0.8155507	best: 0.8153417 (132)	total: 3m 7s	remaining: 16m 53s
156:	learn: 0.8593406	test: 0.8593406	test1: 0.8155247	best: 0.8153417 (132)	total: 3m 8s	remaining: 16m 52s
157:	learn: 0.8591803	test: 0.8591803	test1: 0.8156827	best: 0.8153417 (132)	total: 3m 9s	remaining: 16m 50s
158:	learn: 0.8590472	test: 0.8590472	test1: 0.8157046	best: 0.8153417 (132)	total: 3m 10s	remaining: 16m 48s
159:	learn: 0.8589202	test: 0.8589202	test1: 0.8155405	best: 0.8153417 (132)	total: 3m 12s	remaining: 16m 48s
160:	learn: 0.8586769	test: 0.8586769	test1: 0.8146720	best: 0.8146720 (160)	total: 3m 13s	remaining: 16m 48s
161:	learn: 0.8584158	test: 0.8584158	test1: 0.8143813	best: 0.8143813 (161)	total: 3m 15s	remaining: 16m 49s
162:	learn: 0.8

228:	learn: 0.8491142	test: 0.8491142	test1: 0.8209865	best: 0.8137711 (163)	total: 4m 35s	remaining: 15m 28s
229:	learn: 0.8490073	test: 0.8490073	test1: 0.8212127	best: 0.8137711 (163)	total: 4m 36s	remaining: 15m 26s
230:	learn: 0.8489445	test: 0.8489445	test1: 0.8212585	best: 0.8137711 (163)	total: 4m 37s	remaining: 15m 25s
231:	learn: 0.8487427	test: 0.8487427	test1: 0.8212989	best: 0.8137711 (163)	total: 4m 39s	remaining: 15m 23s
232:	learn: 0.8486526	test: 0.8486526	test1: 0.8213271	best: 0.8137711 (163)	total: 4m 40s	remaining: 15m 22s
233:	learn: 0.8485780	test: 0.8485780	test1: 0.8212775	best: 0.8137711 (163)	total: 4m 41s	remaining: 15m 20s
234:	learn: 0.8484989	test: 0.8484989	test1: 0.8212041	best: 0.8137711 (163)	total: 4m 42s	remaining: 15m 19s
235:	learn: 0.8483464	test: 0.8483464	test1: 0.8210025	best: 0.8137711 (163)	total: 4m 43s	remaining: 15m 17s
236:	learn: 0.8482993	test: 0.8482993	test1: 0.8210069	best: 0.8137711 (163)	total: 4m 44s	remaining: 15m 15s
237:	learn

303:	learn: 0.8392378	test: 0.8392378	test1: 0.8192240	best: 0.8137711 (163)	total: 6m 8s	remaining: 14m 2s
304:	learn: 0.8391622	test: 0.8391622	test1: 0.8191683	best: 0.8137711 (163)	total: 6m 9s	remaining: 14m 1s
305:	learn: 0.8389891	test: 0.8389891	test1: 0.8189428	best: 0.8137711 (163)	total: 6m 11s	remaining: 14m 1s
306:	learn: 0.8387822	test: 0.8387822	test1: 0.8190273	best: 0.8137711 (163)	total: 6m 12s	remaining: 14m
307:	learn: 0.8386996	test: 0.8386996	test1: 0.8189890	best: 0.8137711 (163)	total: 6m 13s	remaining: 13m 59s
308:	learn: 0.8386424	test: 0.8386424	test1: 0.8189506	best: 0.8137711 (163)	total: 6m 14s	remaining: 13m 58s
309:	learn: 0.8386054	test: 0.8386054	test1: 0.8189430	best: 0.8137711 (163)	total: 6m 16s	remaining: 13m 56s
310:	learn: 0.8385384	test: 0.8385384	test1: 0.8189445	best: 0.8137711 (163)	total: 6m 17s	remaining: 13m 55s
311:	learn: 0.8384865	test: 0.8384865	test1: 0.8188748	best: 0.8137711 (163)	total: 6m 18s	remaining: 13m 54s
312:	learn: 0.83838

378:	learn: 0.8318661	test: 0.8318661	test1: 0.8333124	best: 0.8137711 (163)	total: 7m 40s	remaining: 12m 34s
379:	learn: 0.8318175	test: 0.8318175	test1: 0.8332830	best: 0.8137711 (163)	total: 7m 41s	remaining: 12m 32s
380:	learn: 0.8317431	test: 0.8317431	test1: 0.8351491	best: 0.8137711 (163)	total: 7m 42s	remaining: 12m 31s
381:	learn: 0.8315388	test: 0.8315388	test1: 0.8352878	best: 0.8137711 (163)	total: 7m 43s	remaining: 12m 30s
382:	learn: 0.8314667	test: 0.8314667	test1: 0.8354614	best: 0.8137711 (163)	total: 7m 45s	remaining: 12m 29s
383:	learn: 0.8313668	test: 0.8313668	test1: 0.8354244	best: 0.8137711 (163)	total: 7m 46s	remaining: 12m 27s
384:	learn: 0.8313079	test: 0.8313079	test1: 0.8354533	best: 0.8137711 (163)	total: 7m 47s	remaining: 12m 26s
385:	learn: 0.8312580	test: 0.8312580	test1: 0.8353891	best: 0.8137711 (163)	total: 7m 48s	remaining: 12m 25s
386:	learn: 0.8311763	test: 0.8311763	test1: 0.8352410	best: 0.8137711 (163)	total: 7m 49s	remaining: 12m 24s
387:	learn

453:	learn: 0.8260519	test: 0.8260519	test1: 0.8348220	best: 0.8137711 (163)	total: 9m 9s	remaining: 11m
454:	learn: 0.8260124	test: 0.8260124	test1: 0.8348767	best: 0.8137711 (163)	total: 9m 10s	remaining: 10m 59s
455:	learn: 0.8259797	test: 0.8259797	test1: 0.8348753	best: 0.8137711 (163)	total: 9m 11s	remaining: 10m 57s
456:	learn: 0.8259451	test: 0.8259451	test1: 0.8348866	best: 0.8137711 (163)	total: 9m 12s	remaining: 10m 56s
457:	learn: 0.8259000	test: 0.8259000	test1: 0.8348337	best: 0.8137711 (163)	total: 9m 13s	remaining: 10m 55s
458:	learn: 0.8258519	test: 0.8258519	test1: 0.8348350	best: 0.8137711 (163)	total: 9m 14s	remaining: 10m 53s
459:	learn: 0.8258139	test: 0.8258139	test1: 0.8347480	best: 0.8137711 (163)	total: 9m 15s	remaining: 10m 52s
460:	learn: 0.8257636	test: 0.8257636	test1: 0.8347672	best: 0.8137711 (163)	total: 9m 17s	remaining: 10m 51s
461:	learn: 0.8257334	test: 0.8257334	test1: 0.8346868	best: 0.8137711 (163)	total: 9m 18s	remaining: 10m 50s
462:	learn: 0.8

528:	learn: 0.8201998	test: 0.8201998	test1: 0.8330558	best: 0.8137711 (163)	total: 10m 40s	remaining: 9m 30s
529:	learn: 0.8201497	test: 0.8201497	test1: 0.8331356	best: 0.8137711 (163)	total: 10m 42s	remaining: 9m 29s
530:	learn: 0.8200655	test: 0.8200655	test1: 0.8331563	best: 0.8137711 (163)	total: 10m 43s	remaining: 9m 28s
531:	learn: 0.8200145	test: 0.8200145	test1: 0.8331547	best: 0.8137711 (163)	total: 10m 44s	remaining: 9m 27s
532:	learn: 0.8199012	test: 0.8199012	test1: 0.8329386	best: 0.8137711 (163)	total: 10m 46s	remaining: 9m 26s
533:	learn: 0.8198473	test: 0.8198473	test1: 0.8330517	best: 0.8137711 (163)	total: 10m 47s	remaining: 9m 24s
534:	learn: 0.8198139	test: 0.8198139	test1: 0.8330595	best: 0.8137711 (163)	total: 10m 48s	remaining: 9m 23s
535:	learn: 0.8197625	test: 0.8197625	test1: 0.8330439	best: 0.8137711 (163)	total: 10m 49s	remaining: 9m 22s
536:	learn: 0.8197253	test: 0.8197253	test1: 0.8330167	best: 0.8137711 (163)	total: 10m 50s	remaining: 9m 21s
537:	learn

603:	learn: 0.8154509	test: 0.8154509	test1: 0.8338983	best: 0.8137711 (163)	total: 12m 12s	remaining: 8m
604:	learn: 0.8154193	test: 0.8154193	test1: 0.8339694	best: 0.8137711 (163)	total: 12m 13s	remaining: 7m 58s
605:	learn: 0.8153848	test: 0.8153848	test1: 0.8339741	best: 0.8137711 (163)	total: 12m 14s	remaining: 7m 57s
606:	learn: 0.8153245	test: 0.8153245	test1: 0.8341186	best: 0.8137711 (163)	total: 12m 15s	remaining: 7m 56s
607:	learn: 0.8152498	test: 0.8152498	test1: 0.8350406	best: 0.8137711 (163)	total: 12m 16s	remaining: 7m 55s
608:	learn: 0.8152004	test: 0.8152004	test1: 0.8350668	best: 0.8137711 (163)	total: 12m 18s	remaining: 7m 54s
609:	learn: 0.8151471	test: 0.8151471	test1: 0.8352930	best: 0.8137711 (163)	total: 12m 19s	remaining: 7m 52s
610:	learn: 0.8151084	test: 0.8151084	test1: 0.8352938	best: 0.8137711 (163)	total: 12m 20s	remaining: 7m 51s
611:	learn: 0.8150492	test: 0.8150492	test1: 0.8351944	best: 0.8137711 (163)	total: 12m 22s	remaining: 7m 50s
612:	learn: 0.

678:	learn: 0.8111805	test: 0.8111805	test1: 0.8359964	best: 0.8137711 (163)	total: 13m 43s	remaining: 6m 29s
679:	learn: 0.8109862	test: 0.8109862	test1: 0.8360054	best: 0.8137711 (163)	total: 13m 44s	remaining: 6m 27s
680:	learn: 0.8109488	test: 0.8109488	test1: 0.8359730	best: 0.8137711 (163)	total: 13m 45s	remaining: 6m 26s
681:	learn: 0.8108756	test: 0.8108756	test1: 0.8358310	best: 0.8137711 (163)	total: 13m 46s	remaining: 6m 25s
682:	learn: 0.8108244	test: 0.8108244	test1: 0.8358535	best: 0.8137711 (163)	total: 13m 47s	remaining: 6m 24s
683:	learn: 0.8106613	test: 0.8106613	test1: 0.8357081	best: 0.8137711 (163)	total: 13m 49s	remaining: 6m 23s
684:	learn: 0.8106061	test: 0.8106061	test1: 0.8357208	best: 0.8137711 (163)	total: 13m 50s	remaining: 6m 21s
685:	learn: 0.8105745	test: 0.8105745	test1: 0.8357252	best: 0.8137711 (163)	total: 13m 51s	remaining: 6m 20s
686:	learn: 0.8105304	test: 0.8105304	test1: 0.8358897	best: 0.8137711 (163)	total: 13m 52s	remaining: 6m 19s
687:	learn

753:	learn: 0.8076113	test: 0.8076113	test1: 0.8369440	best: 0.8137711 (163)	total: 15m 13s	remaining: 4m 58s
754:	learn: 0.8075910	test: 0.8075910	test1: 0.8368952	best: 0.8137711 (163)	total: 15m 15s	remaining: 4m 57s
755:	learn: 0.8075474	test: 0.8075474	test1: 0.8370078	best: 0.8137711 (163)	total: 15m 16s	remaining: 4m 55s
756:	learn: 0.8075100	test: 0.8075100	test1: 0.8371417	best: 0.8137711 (163)	total: 15m 17s	remaining: 4m 54s
757:	learn: 0.8074618	test: 0.8074618	test1: 0.8369943	best: 0.8137711 (163)	total: 15m 19s	remaining: 4m 53s
758:	learn: 0.8074379	test: 0.8074379	test1: 0.8369468	best: 0.8137711 (163)	total: 15m 20s	remaining: 4m 52s
759:	learn: 0.8073983	test: 0.8073983	test1: 0.8370362	best: 0.8137711 (163)	total: 15m 21s	remaining: 4m 50s
760:	learn: 0.8073748	test: 0.8073748	test1: 0.8370062	best: 0.8137711 (163)	total: 15m 22s	remaining: 4m 49s
761:	learn: 0.8073499	test: 0.8073499	test1: 0.8369900	best: 0.8137711 (163)	total: 15m 23s	remaining: 4m 48s
762:	learn

828:	learn: 0.8037028	test: 0.8037028	test1: 0.8381943	best: 0.8137711 (163)	total: 16m 46s	remaining: 3m 27s
829:	learn: 0.8036523	test: 0.8036523	test1: 0.8381796	best: 0.8137711 (163)	total: 16m 47s	remaining: 3m 26s
830:	learn: 0.8036151	test: 0.8036151	test1: 0.8381328	best: 0.8137711 (163)	total: 16m 48s	remaining: 3m 25s
831:	learn: 0.8035669	test: 0.8035669	test1: 0.8381044	best: 0.8137711 (163)	total: 16m 49s	remaining: 3m 23s
832:	learn: 0.8035450	test: 0.8035450	test1: 0.8380457	best: 0.8137711 (163)	total: 16m 50s	remaining: 3m 22s
833:	learn: 0.8035135	test: 0.8035135	test1: 0.8382048	best: 0.8137711 (163)	total: 16m 52s	remaining: 3m 21s
834:	learn: 0.8034814	test: 0.8034814	test1: 0.8382220	best: 0.8137711 (163)	total: 16m 52s	remaining: 3m 20s
835:	learn: 0.8034509	test: 0.8034509	test1: 0.8382981	best: 0.8137711 (163)	total: 16m 53s	remaining: 3m 18s
836:	learn: 0.8034296	test: 0.8034296	test1: 0.8382660	best: 0.8137711 (163)	total: 16m 55s	remaining: 3m 17s
837:	learn

903:	learn: 0.8004036	test: 0.8004036	test1: 0.8417465	best: 0.8137711 (163)	total: 18m 16s	remaining: 1m 56s
904:	learn: 0.8003828	test: 0.8003828	test1: 0.8417148	best: 0.8137711 (163)	total: 18m 18s	remaining: 1m 55s
905:	learn: 0.8003403	test: 0.8003403	test1: 0.8416201	best: 0.8137711 (163)	total: 18m 19s	remaining: 1m 54s
906:	learn: 0.8003253	test: 0.8003253	test1: 0.8416338	best: 0.8137711 (163)	total: 18m 20s	remaining: 1m 52s
907:	learn: 0.8002959	test: 0.8002959	test1: 0.8415635	best: 0.8137711 (163)	total: 18m 21s	remaining: 1m 51s
908:	learn: 0.8002752	test: 0.8002752	test1: 0.8416884	best: 0.8137711 (163)	total: 18m 22s	remaining: 1m 50s
909:	learn: 0.8002451	test: 0.8002451	test1: 0.8417618	best: 0.8137711 (163)	total: 18m 23s	remaining: 1m 49s
910:	learn: 0.8002243	test: 0.8002243	test1: 0.8418024	best: 0.8137711 (163)	total: 18m 24s	remaining: 1m 47s
911:	learn: 0.8002064	test: 0.8002064	test1: 0.8418158	best: 0.8137711 (163)	total: 18m 26s	remaining: 1m 46s
912:	learn

978:	learn: 0.7970665	test: 0.7970665	test1: 0.8488005	best: 0.8137711 (163)	total: 19m 49s	remaining: 25.5s
979:	learn: 0.7970510	test: 0.7970510	test1: 0.8487795	best: 0.8137711 (163)	total: 19m 50s	remaining: 24.3s
980:	learn: 0.7970357	test: 0.7970357	test1: 0.8487884	best: 0.8137711 (163)	total: 19m 52s	remaining: 23.1s
981:	learn: 0.7970097	test: 0.7970097	test1: 0.8487725	best: 0.8137711 (163)	total: 19m 53s	remaining: 21.9s
982:	learn: 0.7969912	test: 0.7969912	test1: 0.8487293	best: 0.8137711 (163)	total: 19m 54s	remaining: 20.7s
983:	learn: 0.7969641	test: 0.7969641	test1: 0.8487205	best: 0.8137711 (163)	total: 19m 56s	remaining: 19.5s
984:	learn: 0.7969446	test: 0.7969446	test1: 0.8487462	best: 0.8137711 (163)	total: 19m 57s	remaining: 18.2s
985:	learn: 0.7969177	test: 0.7969177	test1: 0.8487441	best: 0.8137711 (163)	total: 19m 58s	remaining: 17s
986:	learn: 0.7968573	test: 0.7968573	test1: 0.8488594	best: 0.8137711 (163)	total: 20m	remaining: 15.8s
987:	learn: 0.7968409	tes

In [13]:
model.feature_importances_

array([1.87839497e+00, 3.06085430e+00, 1.82604015e+00, 1.53620643e+00,
       9.94895226e+00, 1.39122654e+00, 6.05319221e+00, 0.00000000e+00,
       4.51304366e+00, 1.34916305e+01, 2.43588912e+01, 3.98817777e+00,
       1.31231863e+00, 2.05236319e-01, 5.15605578e-03, 1.02446170e+01,
       7.87085237e-01, 2.31471942e-01, 3.04346641e-02, 1.21903538e-02,
       2.32442034e+00, 3.61213351e-01, 1.24418931e-01, 1.49795465e-02,
       0.00000000e+00, 1.62122252e+00, 5.16397139e-01, 1.57796382e-01,
       3.73346817e-02, 0.00000000e+00, 2.37067789e+00, 2.97709667e-02,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 6.47685640e+00,
       9.17305132e-01, 1.60718892e-01, 1.17675728e-02, 0.00000000e+00])

In [14]:
importance = model.feature_importances_

features = ['month_id', 'shop_id', 'item_id', 'cnt_month', 'city_id', 'cat_id',
            'main_cat', 'sub_cat', 'year', 'month',
            'cnt_month_1','cnt_month_2', 'cnt_month_3', 'cnt_month_6', 'cnt_month_12',
     'item_id_mean_cnt_1', 'item_id_mean_cnt_2', 'item_id_mean_cnt_3', 'item_id_mean_cnt_6', 'item_id_mean_cnt_12', 
     'sub_cat_mean_cnt_1', 'sub_cat_mean_cnt_2', 'sub_cat_mean_cnt_3', 'sub_cat_mean_cnt_6', 'sub_cat_mean_cnt_12', 
     'item_id/city_id_mean_cnt_1', 'item_id/city_id_mean_cnt_2', 'item_id/city_id_mean_cnt_3','item_id/city_id_mean_cnt_6', 'item_id/city_id_mean_cnt_12',
     'shop_id/cat_id_mean_cnt_1', 'shop_id/cat_id_mean_cnt_2','shop_id/cat_id_mean_cnt_3', 'shop_id/cat_id_mean_cnt_6','shop_id/cat_id_mean_cnt_12']

feature_importance = pd.DataFrame({'feature':features,'importance':importance})
feature_importance['ratio'] = feature_importance['importance']/feature_importance['importance'].sum()
feature_importance

ValueError: All arrays must be of the same length

In [None]:
data = feature_importance.sort_values(by='ratio', ascending=False)

plt.figure(figsize=(15,15))
sns.barplot(x=data['ratio'], y=data['feature'])
plt.xlabel('importance ratio')
plt.title('Feature Importance')

In [None]:
Y_pred = model.predict(X_valid)

In [None]:
submission = pd.DataFrame({
    "ID": X_valid.index, 
    "item_cnt_month": Y_pred
})
x = submission['ID'].head(200)
y_pred = submission['item_cnt_month'].head(200)
y_true = Y_valid.head(200)
fig = plt.figure(figsize=(25, 5))
ax = fig.add_subplot(111)
ax.plot(x, y_pred, label = 'predictive value', color = 'red')
ax.plot(x, y_true, label = 'actual value', color = 'green')

ax.legend()

plt.show()

In [None]:
Y_test = model.predict(X_test)

In [None]:
# 제출 파일 생성
submission = pd.DataFrame({
    "ID": test.index, 
    "item_cnt_month": Y_test
})
submission.to_csv('./submission/submission_CatBoost_2022030101.csv', index=False)