In [1]:
import numpy as np
import pandas as pd
# category variable
category = ['cat0', 'cat1', 'cat2', 'cat3', 'cat4', 'cat5', 'cat6', 'cat7', 'cat8', 'cat9']

# continuous variable
continuous = ['cont0', 'cont1', 'cont2', 'cont3', 'cont4', 'cont5', 'cont6', 
              'cont7', 'cont8', 'cont9', 'cont10', 'cont11', 'cont12', 'cont13']

In [8]:
features = ['cat1', 'cont11', 'cont13', 'cont0', 'cont2', 'cat8', 'cont8', 'cont9', 'cat3', 'cat6', 'cat5']
#features = ['cat1', 'cont11', 'cont13', 'cont0', 'cont2', 'cat8', 'cont8']

train_dataset = pd.read_csv('../input/tabular-playground-series-feb-2021/train.csv')
test_data = pd.read_csv('../input/tabular-playground-series-feb-2021/test.csv')
dataset = pd.concat([train_dataset, test_data])

# あきらかな外れ値は削除
dataset = dataset[dataset['id'] != 166042]

# idとtargetは避難させておく
id = dataset['id']
target = dataset['target']
# 避難させたので遠慮なく削除
dataset = dataset.drop(columns=['id', 'target'])
# 相関が高いものだけを使用
dataset = dataset.loc[:,features]

# 重要度を確認するためにはLabelEncodingが有効
# CatBoost, LightGBM, LassoにおいてOne-Hotのほうが1%程度RMSEの結果が優れていた
from sklearn.preprocessing import LabelEncoder, RobustScaler
encoder = LabelEncoder()
scaler = RobustScaler()
import scipy.stats as stat
feature_cat = []
feature_num = []
for x in features:
    # もしpandas.seriesが文字列ならOne-Hot Encodingとおもったけど、catboostを使う場合は
    # モデルの生成時にカテゴリ変数を指定できるので、LabelEncodingを採用
    if dataset[x].dtype == object:
        feature_cat.append(x)
        #dataset[x] = encoder.fit_transform(dataset[x])
        #dataset = pd.get_dummies(dataset, columns=[x], drop_first=True)
    else:
        feature_num.append(x)
    
# 量的変数はスケーリング
dataset[feature_num] = scaler.fit_transform(dataset.loc[:,feature_num].values)

# データセットにidとtargetを元に戻して
dataset = pd.concat([id,dataset,target], axis=1)
# targetのあるなしでtrainとtestを分割
train = dataset.loc[dataset['target'].notnull(), :]
test  = dataset.loc[dataset['target'].isnull(), :]

X = train.drop(columns=['id', 'target'])
y = train['target']
X_prediction = test.drop(columns=['id', 'target'])
prediction_id = test.loc[:,'id']

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [14]:
from sklearn.preprocessing import RobustScaler, StandardScaler
from sklearn.linear_model import LinearRegression, Lasso, ElasticNet, BayesianRidge, LassoLarsIC
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
import lightgbm as lgb
import xgboost as xgb
from sklearn.kernel_ridge import KernelRidge
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import StratifiedKFold, KFold
from sklearn.model_selection import cross_val_score
from catboost import CatBoostRegressor

from sklearn.ensemble import StackingRegressor

kfold = KFold(n_splits=5, shuffle=True, random_state=42)

def cross_val(model):
    score = cross_val_score(model, X, y, scoring='neg_mean_squared_error', cv=kfold)
    print(f'{type(model).__name__} score = : {score.mean():.8f}')


regressors = [LinearRegression(), Lasso(alpha=0.005),
              #RandomForestRegressor(n_estimators = 100, criterion='mae', random_state=0, bootstrap=True),
              lgb.LGBMRegressor(objective='regression', num_leaves=5, learning_rate=0.05, n_estimators=720, bagging_fraction=0.8, verbose=10),
              GradientBoostingRegressor(n_estimators = 3000, learning_rate=0.05, max_depth=4, max_features='sqrt', min_samples_leaf=15, min_samples_split=10, loss='RMSE', random_state=0),
              xgb.XGBRegressor(colsample_bytree=0.463, gamma=0.0468, learning_rate=0.05, max_depth=3, min_child_weight=1.7817, n_estimators=2200, reg_alpha=0.4640, reg_lambda=0.8571, subsample=0.5213, silent=1, random_state=0, nthread=-1)
             ]

#for x in regressors:
#    cross_val(x)

In [7]:
from sklearn.model_selection import GridSearchCV
'''
light_grid = {'max_depth': [3, 5, 8],
             'learning_rate': [0.001, 0.0001, 0.0001],
             'n_estimators': [10, 100, 200, 500],
              'num_leaves': [100, 300, 500]
             }
lgb_grid_search = GridSearchCV(lgb.LGBMRegressor(), light_grid, cv=kfold, scoring='neg_mean_squared_error', n_jobs=-1, verbose=2)
lgb_grid_search.fit(X,y)
lgb_param = lgb_grid_search.best_params_
print(lgb_param)

grad_grid_parameters = [
    {'loss': ['ls', 'huber'],
     'learning_rate': [0.0001, 0.005, 0.01],
     'n_estimators': [10, 100, 200, 500],
     'max_depth':[3, 5, 7, 9, 10]
    }
]

grad_grid_search = GridSearchCV(GradientBoostingRegressor(), grad_grid_parameters, cv=kfold, scoring='neg_mean_squared_error', n_jobs = -1, verbose=2)
grad_grid_search.fit(X, y)
grad_param = grad_grid_search.best_params_
print(grad_param)
'''

cat_grid_parameters = [
    {'depth': [3, 5, 7, 9, 10],
     'learning_rate': [0.0001, 0.005, 0.01],
     'l2_leaf_reg': [1,5,7,10],
     'iterations': [500, 1000]
    }
]

cat_grid_search = GridSearchCV(CatBoostRegressor(), cat_grid_parameters, cv=kfold, scoring='neg_mean_squared_error', n_jobs = -1, verbose=2)
cat_grid_search.fit(X, y)
cat_param = cat_grid_search.best_params_
print(grad_param)

Fitting 5 folds for each of 120 candidates, totalling 600 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.


KeyboardInterrupt: 

In [25]:
from sklearn.base import BaseEstimator, TransformerMixin, RegressorMixin, clone
from sklearn.metrics import r2_score, mean_squared_error

class AveragingModels(BaseEstimator, RegressorMixin, TransformerMixin):
    def __init__(self, models):
        self.models = models
    
    def fit(self, X, y, X_test, y_test):
        self.models_ = [clone(x) for x in self.models]

        for model in self.models_:
            # CatBoostに関しては、eval_setが必須のためここでエラーが発生する。
            # その場合はexceptでfit処理を行う
            print(f'************ RUNNING : {model} ************')
            try:
                model.fit(X, y)
            except:
                model.fit(X, y, eval_set=(X_test, y_test))
                
            y_pred = model.predict(X_test)
            print(f'{model} RMSE: {mean_squared_error(y_test, y_pred)}')
            
        return self
    
    def predict(self, X):
        predictions = np.column_stack(
            [model.predict(X) for model in self.models_]
            )
        return np.mean(predictions, axis=1)

lgb_tuned = lgb.LGBMRegressor(learning_rate=0.001, max_depth=8, n_estimators=500, num_leaves=300)
lasso = Lasso(alpha=0.0005, random_state=0)
gboost = GradientBoostingRegressor(n_estimators=100, learning_rate=0.0001, max_depth=5, max_features='sqrt', 
                                   min_samples_leaf=15, min_samples_split=10, loss='huber', random_state=0)
xgb_model = xgb.XGBRegressor(learning_rate=0.0001, max_depth=3, random_state=0, nthread=-1)
cat = CatBoostRegressor(iterations=1000, use_best_model=True, eval_metric='RMSE')

averaged_models = AveragingModels(models = (cat, lasso, lgb_tuned))
averaged_models.fit(X_train, y_train, X_test, y_test)

# 予想してみる
from sklearn.metrics import mean_squared_error
y_pred = averaged_models.predict(X_test)

************ RUNNING : <catboost.core.CatBoostRegressor object at 0x7fdcc5f58890> ************
Learning rate set to 0.5
0:	learn: 0.8761419	test: 0.8764754	best: 0.8764754 (0)	total: 40.9ms	remaining: 4.05s
1:	learn: 0.8709719	test: 0.8708975	best: 0.8708975 (1)	total: 80.2ms	remaining: 3.93s
2:	learn: 0.8682746	test: 0.8677823	best: 0.8677823 (2)	total: 121ms	remaining: 3.92s
3:	learn: 0.8660799	test: 0.8657195	best: 0.8657195 (3)	total: 159ms	remaining: 3.82s
4:	learn: 0.8649015	test: 0.8647458	best: 0.8647458 (4)	total: 193ms	remaining: 3.67s
5:	learn: 0.8640391	test: 0.8641089	best: 0.8641089 (5)	total: 231ms	remaining: 3.63s
6:	learn: 0.8634856	test: 0.8637367	best: 0.8637367 (6)	total: 269ms	remaining: 3.58s
7:	learn: 0.8626769	test: 0.8628568	best: 0.8628568 (7)	total: 309ms	remaining: 3.55s
8:	learn: 0.8620206	test: 0.8621279	best: 0.8621279 (8)	total: 348ms	remaining: 3.52s
9:	learn: 0.8614802	test: 0.8617162	best: 0.8617162 (9)	total: 393ms	remaining: 3.53s
10:	learn: 0.86083

96:	learn: 0.8487323	test: 0.8583324	best: 0.8577541 (49)	total: 3.65s	remaining: 113ms
97:	learn: 0.8486100	test: 0.8583881	best: 0.8577541 (49)	total: 3.69s	remaining: 75.2ms
98:	learn: 0.8485588	test: 0.8583931	best: 0.8577541 (49)	total: 3.71s	remaining: 37.5ms
99:	learn: 0.8484389	test: 0.8583727	best: 0.8577541 (49)	total: 3.76s	remaining: 0us

bestTest = 0.8577540867
bestIteration = 49

Shrink model to first 50 iterations.
<catboost.core.CatBoostRegressor object at 0x7fdcc5f58890> RMSE: 0.7357420732823325
************ RUNNING : Lasso(alpha=0.0005, random_state=0) ************
Lasso(alpha=0.0005, random_state=0) RMSE: 0.7610232932037936
************ RUNNING : LGBMRegressor(learning_rate=0.001, max_depth=8, n_estimators=500,
              num_leaves=300) ************
LGBMRegressor(learning_rate=0.001, max_depth=8, n_estimators=500,
              num_leaves=300) RMSE: 0.7644274502327526


In [12]:
from catboost import CatBoostRegressor
from sklearn.metrics import r2_score, mean_squared_error

from sklearn.model_selection import StratifiedKFold, KFold
kfold = KFold(n_splits=5, shuffle=True, random_state=42)
'''
from sklearn.model_selection import GridSearchCV
param_cat = {'depth':[6,8,10],
            'learning_rate':[0.005, 0.001],
            'l2_leaf_reg':[1,4,9],
            'iterations':[5000],
            'cat_features':[feature_cat],
            'eval_metric':['RMSE'],
             'task_type': ['GPU']
            }

grid_result = GridSearchCV(estimator=CatBoostRegressor(),param_grid=param_cat, cv=kfold, scoring='neg_mean_squared_error', n_jobs = -1, verbose=2)
grid_result.fit(X_train, y_train)
grid_param = grid_result.best_params_
print(grid_param)

'''
#cat = CatBoostRegressor(cat_features=feature_cat, iterations=3000, use_best_model=True, depth=10, eval_metric='RMSE', l2_leaf_reg=1, learning_rate=0.005, early_stopping_rounds=3)
cat = CatBoostRegressor(iterations=3000, od_type='Iter', grow_policy='SymmetricTree', subsample=0.8, use_best_model=True, depth=10, eval_metric='RMSE', l2_leaf_reg=1, learning_rate=0.005, early_stopping_rounds=3)
cat.fit(X_train, y_train, cat_features=feature_cat, eval_set = (X_test, y_test))
y_pred = cat.predict(X_test)
print(f'RMSE: {mean_squared_error(y_test, y_pred)}')

0:	learn: 0.8868219	test: 0.8877613	best: 0.8877613 (0)	total: 268ms	remaining: 13m 24s
1:	learn: 0.8866325	test: 0.8875721	best: 0.8875721 (1)	total: 511ms	remaining: 12m 45s
2:	learn: 0.8864462	test: 0.8873847	best: 0.8873847 (2)	total: 740ms	remaining: 12m 19s
3:	learn: 0.8862608	test: 0.8872017	best: 0.8872017 (3)	total: 998ms	remaining: 12m 27s
4:	learn: 0.8860862	test: 0.8870260	best: 0.8870260 (4)	total: 1.22s	remaining: 12m 8s
5:	learn: 0.8859048	test: 0.8868473	best: 0.8868473 (5)	total: 1.44s	remaining: 11m 58s
6:	learn: 0.8857201	test: 0.8866668	best: 0.8866668 (6)	total: 1.66s	remaining: 11m 51s
7:	learn: 0.8855374	test: 0.8864880	best: 0.8864880 (7)	total: 1.94s	remaining: 12m 3s
8:	learn: 0.8853589	test: 0.8863117	best: 0.8863117 (8)	total: 2.15s	remaining: 11m 54s
9:	learn: 0.8851797	test: 0.8861387	best: 0.8861387 (9)	total: 2.44s	remaining: 12m 11s
10:	learn: 0.8850040	test: 0.8859669	best: 0.8859669 (10)	total: 2.7s	remaining: 12m 12s
11:	learn: 0.8848310	test: 0.8857

93:	learn: 0.8744288	test: 0.8757927	best: 0.8757927 (93)	total: 23.7s	remaining: 12m 11s
94:	learn: 0.8743447	test: 0.8757113	best: 0.8757113 (94)	total: 23.9s	remaining: 12m 11s
95:	learn: 0.8742560	test: 0.8756233	best: 0.8756233 (95)	total: 24.2s	remaining: 12m 11s
96:	learn: 0.8741644	test: 0.8755368	best: 0.8755368 (96)	total: 24.4s	remaining: 12m 11s
97:	learn: 0.8740785	test: 0.8754575	best: 0.8754575 (97)	total: 24.7s	remaining: 12m 11s
98:	learn: 0.8739898	test: 0.8753779	best: 0.8753779 (98)	total: 24.9s	remaining: 12m 10s
99:	learn: 0.8739030	test: 0.8752975	best: 0.8752975 (99)	total: 25.2s	remaining: 12m 10s
100:	learn: 0.8738195	test: 0.8752187	best: 0.8752187 (100)	total: 25.4s	remaining: 12m 9s
101:	learn: 0.8737289	test: 0.8751344	best: 0.8751344 (101)	total: 25.7s	remaining: 12m 8s
102:	learn: 0.8736419	test: 0.8750521	best: 0.8750521 (102)	total: 25.9s	remaining: 12m 9s
103:	learn: 0.8735613	test: 0.8749760	best: 0.8749760 (103)	total: 26.2s	remaining: 12m 9s
104:	l

183:	learn: 0.8684499	test: 0.8702609	best: 0.8702609 (183)	total: 45.9s	remaining: 11m 42s
184:	learn: 0.8684024	test: 0.8702184	best: 0.8702184 (184)	total: 46.1s	remaining: 11m 41s
185:	learn: 0.8683589	test: 0.8701803	best: 0.8701803 (185)	total: 46.4s	remaining: 11m 41s
186:	learn: 0.8683079	test: 0.8701383	best: 0.8701383 (186)	total: 46.6s	remaining: 11m 41s
187:	learn: 0.8682571	test: 0.8700957	best: 0.8700957 (187)	total: 46.9s	remaining: 11m 41s
188:	learn: 0.8682103	test: 0.8700541	best: 0.8700541 (188)	total: 47.1s	remaining: 11m 40s
189:	learn: 0.8681653	test: 0.8700125	best: 0.8700125 (189)	total: 47.3s	remaining: 11m 39s
190:	learn: 0.8681206	test: 0.8699715	best: 0.8699715 (190)	total: 47.5s	remaining: 11m 39s
191:	learn: 0.8680762	test: 0.8699325	best: 0.8699325 (191)	total: 47.8s	remaining: 11m 39s
192:	learn: 0.8680316	test: 0.8698924	best: 0.8698924 (192)	total: 48.1s	remaining: 11m 38s
193:	learn: 0.8679845	test: 0.8698500	best: 0.8698500 (193)	total: 48.3s	remaini

273:	learn: 0.8650445	test: 0.8673189	best: 0.8673189 (273)	total: 1m 8s	remaining: 11m 22s
274:	learn: 0.8650173	test: 0.8672964	best: 0.8672964 (274)	total: 1m 8s	remaining: 11m 22s
275:	learn: 0.8649871	test: 0.8672715	best: 0.8672715 (275)	total: 1m 9s	remaining: 11m 22s
276:	learn: 0.8649558	test: 0.8672458	best: 0.8672458 (276)	total: 1m 9s	remaining: 11m 22s
277:	learn: 0.8649293	test: 0.8672247	best: 0.8672247 (277)	total: 1m 9s	remaining: 11m 21s
278:	learn: 0.8648983	test: 0.8672019	best: 0.8672019 (278)	total: 1m 9s	remaining: 11m 21s
279:	learn: 0.8648713	test: 0.8671775	best: 0.8671775 (279)	total: 1m 10s	remaining: 11m 21s
280:	learn: 0.8648409	test: 0.8671544	best: 0.8671544 (280)	total: 1m 10s	remaining: 11m 20s
281:	learn: 0.8648101	test: 0.8671290	best: 0.8671290 (281)	total: 1m 10s	remaining: 11m 20s
282:	learn: 0.8647848	test: 0.8671074	best: 0.8671074 (282)	total: 1m 10s	remaining: 11m 20s
283:	learn: 0.8647555	test: 0.8670829	best: 0.8670829 (283)	total: 1m 11s	re

362:	learn: 0.8628517	test: 0.8655519	best: 0.8655519 (362)	total: 1m 31s	remaining: 11m 3s
363:	learn: 0.8628292	test: 0.8655378	best: 0.8655378 (363)	total: 1m 31s	remaining: 11m 3s
364:	learn: 0.8628088	test: 0.8655240	best: 0.8655240 (364)	total: 1m 31s	remaining: 11m 2s
365:	learn: 0.8627854	test: 0.8655048	best: 0.8655048 (365)	total: 1m 32s	remaining: 11m 2s
366:	learn: 0.8627647	test: 0.8654896	best: 0.8654896 (366)	total: 1m 32s	remaining: 11m 2s
367:	learn: 0.8627380	test: 0.8654683	best: 0.8654683 (367)	total: 1m 32s	remaining: 11m 2s
368:	learn: 0.8627165	test: 0.8654562	best: 0.8654562 (368)	total: 1m 32s	remaining: 11m 2s
369:	learn: 0.8626944	test: 0.8654410	best: 0.8654410 (369)	total: 1m 33s	remaining: 11m 1s
370:	learn: 0.8626725	test: 0.8654254	best: 0.8654254 (370)	total: 1m 33s	remaining: 11m 1s
371:	learn: 0.8626546	test: 0.8654127	best: 0.8654127 (371)	total: 1m 33s	remaining: 11m 1s
372:	learn: 0.8626355	test: 0.8653974	best: 0.8653974 (372)	total: 1m 33s	remain

451:	learn: 0.8612489	test: 0.8643486	best: 0.8643486 (451)	total: 1m 54s	remaining: 10m 45s
452:	learn: 0.8612304	test: 0.8643343	best: 0.8643343 (452)	total: 1m 54s	remaining: 10m 44s
453:	learn: 0.8612109	test: 0.8643194	best: 0.8643194 (453)	total: 1m 55s	remaining: 10m 44s
454:	learn: 0.8611936	test: 0.8643070	best: 0.8643070 (454)	total: 1m 55s	remaining: 10m 44s
455:	learn: 0.8611816	test: 0.8642979	best: 0.8642979 (455)	total: 1m 55s	remaining: 10m 44s
456:	learn: 0.8611635	test: 0.8642861	best: 0.8642861 (456)	total: 1m 55s	remaining: 10m 44s
457:	learn: 0.8611467	test: 0.8642764	best: 0.8642764 (457)	total: 1m 56s	remaining: 10m 43s
458:	learn: 0.8611270	test: 0.8642617	best: 0.8642617 (458)	total: 1m 56s	remaining: 10m 43s
459:	learn: 0.8611133	test: 0.8642496	best: 0.8642496 (459)	total: 1m 56s	remaining: 10m 43s
460:	learn: 0.8610926	test: 0.8642361	best: 0.8642361 (460)	total: 1m 56s	remaining: 10m 43s
461:	learn: 0.8610762	test: 0.8642271	best: 0.8642271 (461)	total: 1m 

540:	learn: 0.8599459	test: 0.8634355	best: 0.8634355 (540)	total: 2m 17s	remaining: 10m 24s
541:	learn: 0.8599272	test: 0.8634236	best: 0.8634236 (541)	total: 2m 17s	remaining: 10m 24s
542:	learn: 0.8599157	test: 0.8634169	best: 0.8634169 (542)	total: 2m 17s	remaining: 10m 23s
543:	learn: 0.8599047	test: 0.8634087	best: 0.8634087 (543)	total: 2m 18s	remaining: 10m 23s
544:	learn: 0.8598983	test: 0.8634022	best: 0.8634022 (544)	total: 2m 18s	remaining: 10m 22s
545:	learn: 0.8598876	test: 0.8633944	best: 0.8633944 (545)	total: 2m 18s	remaining: 10m 22s
546:	learn: 0.8598709	test: 0.8633824	best: 0.8633824 (546)	total: 2m 18s	remaining: 10m 22s
547:	learn: 0.8598553	test: 0.8633721	best: 0.8633721 (547)	total: 2m 19s	remaining: 10m 22s
548:	learn: 0.8598400	test: 0.8633651	best: 0.8633651 (548)	total: 2m 19s	remaining: 10m 21s
549:	learn: 0.8598248	test: 0.8633527	best: 0.8633527 (549)	total: 2m 19s	remaining: 10m 21s
550:	learn: 0.8598077	test: 0.8633392	best: 0.8633392 (550)	total: 2m 

629:	learn: 0.8588540	test: 0.8627108	best: 0.8627108 (629)	total: 2m 40s	remaining: 10m 3s
630:	learn: 0.8588407	test: 0.8627034	best: 0.8627034 (630)	total: 2m 40s	remaining: 10m 2s
631:	learn: 0.8588306	test: 0.8626960	best: 0.8626960 (631)	total: 2m 40s	remaining: 10m 2s
632:	learn: 0.8588213	test: 0.8626888	best: 0.8626888 (632)	total: 2m 41s	remaining: 10m 2s
633:	learn: 0.8588102	test: 0.8626826	best: 0.8626826 (633)	total: 2m 41s	remaining: 10m 2s
634:	learn: 0.8587981	test: 0.8626741	best: 0.8626741 (634)	total: 2m 41s	remaining: 10m 1s
635:	learn: 0.8587859	test: 0.8626665	best: 0.8626665 (635)	total: 2m 41s	remaining: 10m 1s
636:	learn: 0.8587718	test: 0.8626555	best: 0.8626555 (636)	total: 2m 42s	remaining: 10m 1s
637:	learn: 0.8587612	test: 0.8626478	best: 0.8626478 (637)	total: 2m 42s	remaining: 10m 1s
638:	learn: 0.8587499	test: 0.8626405	best: 0.8626405 (638)	total: 2m 42s	remaining: 10m 1s
639:	learn: 0.8587387	test: 0.8626319	best: 0.8626319 (639)	total: 2m 42s	remain

719:	learn: 0.8578899	test: 0.8620807	best: 0.8620807 (719)	total: 3m 3s	remaining: 9m 42s
720:	learn: 0.8578806	test: 0.8620752	best: 0.8620752 (720)	total: 3m 4s	remaining: 9m 41s
721:	learn: 0.8578730	test: 0.8620696	best: 0.8620696 (721)	total: 3m 4s	remaining: 9m 41s
722:	learn: 0.8578624	test: 0.8620626	best: 0.8620626 (722)	total: 3m 4s	remaining: 9m 41s
723:	learn: 0.8578576	test: 0.8620588	best: 0.8620588 (723)	total: 3m 4s	remaining: 9m 41s
724:	learn: 0.8578487	test: 0.8620542	best: 0.8620542 (724)	total: 3m 5s	remaining: 9m 40s
725:	learn: 0.8578404	test: 0.8620484	best: 0.8620484 (725)	total: 3m 5s	remaining: 9m 40s
726:	learn: 0.8578301	test: 0.8620413	best: 0.8620413 (726)	total: 3m 5s	remaining: 9m 40s
727:	learn: 0.8578209	test: 0.8620338	best: 0.8620338 (727)	total: 3m 5s	remaining: 9m 40s
728:	learn: 0.8578104	test: 0.8620268	best: 0.8620268 (728)	total: 3m 6s	remaining: 9m 40s
729:	learn: 0.8578000	test: 0.8620210	best: 0.8620210 (729)	total: 3m 6s	remaining: 9m 39s

809:	learn: 0.8570563	test: 0.8616031	best: 0.8616031 (809)	total: 3m 27s	remaining: 9m 20s
810:	learn: 0.8570477	test: 0.8616002	best: 0.8616002 (810)	total: 3m 27s	remaining: 9m 20s
811:	learn: 0.8570409	test: 0.8615958	best: 0.8615958 (811)	total: 3m 27s	remaining: 9m 20s
812:	learn: 0.8570299	test: 0.8615894	best: 0.8615894 (812)	total: 3m 28s	remaining: 9m 20s
813:	learn: 0.8570189	test: 0.8615836	best: 0.8615836 (813)	total: 3m 28s	remaining: 9m 20s
814:	learn: 0.8570094	test: 0.8615766	best: 0.8615766 (814)	total: 3m 29s	remaining: 9m 20s
815:	learn: 0.8569997	test: 0.8615693	best: 0.8615693 (815)	total: 3m 29s	remaining: 9m 20s
816:	learn: 0.8569927	test: 0.8615636	best: 0.8615636 (816)	total: 3m 29s	remaining: 9m 20s
817:	learn: 0.8569829	test: 0.8615583	best: 0.8615583 (817)	total: 3m 29s	remaining: 9m 20s
818:	learn: 0.8569727	test: 0.8615499	best: 0.8615499 (818)	total: 3m 30s	remaining: 9m 19s
819:	learn: 0.8569642	test: 0.8615472	best: 0.8615472 (819)	total: 3m 30s	remain

899:	learn: 0.8563315	test: 0.8612135	best: 0.8612135 (899)	total: 3m 50s	remaining: 8m 57s
900:	learn: 0.8563257	test: 0.8612093	best: 0.8612093 (900)	total: 3m 50s	remaining: 8m 57s
901:	learn: 0.8563162	test: 0.8612060	best: 0.8612060 (901)	total: 3m 50s	remaining: 8m 56s
902:	learn: 0.8563060	test: 0.8612010	best: 0.8612010 (902)	total: 3m 51s	remaining: 8m 56s
903:	learn: 0.8562963	test: 0.8611971	best: 0.8611971 (903)	total: 3m 51s	remaining: 8m 56s
904:	learn: 0.8562860	test: 0.8611907	best: 0.8611907 (904)	total: 3m 51s	remaining: 8m 56s
905:	learn: 0.8562852	test: 0.8611896	best: 0.8611896 (905)	total: 3m 51s	remaining: 8m 55s
906:	learn: 0.8562759	test: 0.8611847	best: 0.8611847 (906)	total: 3m 52s	remaining: 8m 55s
907:	learn: 0.8562707	test: 0.8611808	best: 0.8611808 (907)	total: 3m 52s	remaining: 8m 55s
908:	learn: 0.8562629	test: 0.8611770	best: 0.8611770 (908)	total: 3m 52s	remaining: 8m 55s
909:	learn: 0.8562515	test: 0.8611712	best: 0.8611712 (909)	total: 3m 52s	remain

989:	learn: 0.8556717	test: 0.8608791	best: 0.8608791 (989)	total: 4m 11s	remaining: 8m 31s
990:	learn: 0.8556654	test: 0.8608777	best: 0.8608777 (990)	total: 4m 11s	remaining: 8m 30s
991:	learn: 0.8556591	test: 0.8608716	best: 0.8608716 (991)	total: 4m 12s	remaining: 8m 30s
992:	learn: 0.8556512	test: 0.8608690	best: 0.8608690 (992)	total: 4m 12s	remaining: 8m 30s
993:	learn: 0.8556416	test: 0.8608627	best: 0.8608627 (993)	total: 4m 12s	remaining: 8m 30s
994:	learn: 0.8556327	test: 0.8608603	best: 0.8608603 (994)	total: 4m 13s	remaining: 8m 29s
995:	learn: 0.8556259	test: 0.8608578	best: 0.8608578 (995)	total: 4m 13s	remaining: 8m 29s
996:	learn: 0.8556209	test: 0.8608545	best: 0.8608545 (996)	total: 4m 13s	remaining: 8m 29s
997:	learn: 0.8556159	test: 0.8608510	best: 0.8608510 (997)	total: 4m 13s	remaining: 8m 28s
998:	learn: 0.8556038	test: 0.8608476	best: 0.8608476 (998)	total: 4m 13s	remaining: 8m 28s
999:	learn: 0.8555958	test: 0.8608448	best: 0.8608448 (999)	total: 4m 14s	remain

1077:	learn: 0.8550921	test: 0.8606025	best: 0.8606025 (1077)	total: 4m 33s	remaining: 8m 7s
1078:	learn: 0.8550807	test: 0.8605986	best: 0.8605986 (1078)	total: 4m 33s	remaining: 8m 7s
1079:	learn: 0.8550762	test: 0.8605971	best: 0.8605971 (1079)	total: 4m 33s	remaining: 8m 6s
1080:	learn: 0.8550663	test: 0.8605947	best: 0.8605947 (1080)	total: 4m 34s	remaining: 8m 6s
1081:	learn: 0.8550583	test: 0.8605931	best: 0.8605931 (1081)	total: 4m 34s	remaining: 8m 6s
1082:	learn: 0.8550555	test: 0.8605899	best: 0.8605899 (1082)	total: 4m 34s	remaining: 8m 5s
1083:	learn: 0.8550433	test: 0.8605875	best: 0.8605875 (1083)	total: 4m 34s	remaining: 8m 5s
1084:	learn: 0.8550379	test: 0.8605843	best: 0.8605843 (1084)	total: 4m 35s	remaining: 8m 5s
1085:	learn: 0.8550345	test: 0.8605823	best: 0.8605823 (1085)	total: 4m 35s	remaining: 8m 5s
1086:	learn: 0.8550269	test: 0.8605786	best: 0.8605786 (1086)	total: 4m 35s	remaining: 8m 4s
1087:	learn: 0.8550167	test: 0.8605744	best: 0.8605744 (1087)	total: 4

1165:	learn: 0.8545239	test: 0.8603667	best: 0.8603667 (1165)	total: 4m 54s	remaining: 7m 43s
1166:	learn: 0.8545170	test: 0.8603610	best: 0.8603610 (1166)	total: 4m 55s	remaining: 7m 43s
1167:	learn: 0.8545114	test: 0.8603581	best: 0.8603581 (1167)	total: 4m 55s	remaining: 7m 43s
1168:	learn: 0.8545037	test: 0.8603561	best: 0.8603561 (1168)	total: 4m 55s	remaining: 7m 42s
1169:	learn: 0.8544942	test: 0.8603518	best: 0.8603518 (1169)	total: 4m 55s	remaining: 7m 42s
1170:	learn: 0.8544869	test: 0.8603487	best: 0.8603487 (1170)	total: 4m 56s	remaining: 7m 42s
1171:	learn: 0.8544796	test: 0.8603465	best: 0.8603465 (1171)	total: 4m 56s	remaining: 7m 42s
1172:	learn: 0.8544750	test: 0.8603454	best: 0.8603454 (1172)	total: 4m 56s	remaining: 7m 42s
1173:	learn: 0.8544680	test: 0.8603417	best: 0.8603417 (1173)	total: 4m 56s	remaining: 7m 41s
1174:	learn: 0.8544592	test: 0.8603397	best: 0.8603397 (1174)	total: 4m 57s	remaining: 7m 41s
1175:	learn: 0.8544498	test: 0.8603366	best: 0.8603366 (1175

1253:	learn: 0.8539801	test: 0.8601421	best: 0.8601421 (1253)	total: 5m 16s	remaining: 7m 20s
1254:	learn: 0.8539767	test: 0.8601393	best: 0.8601393 (1254)	total: 5m 16s	remaining: 7m 20s
1255:	learn: 0.8539684	test: 0.8601348	best: 0.8601348 (1255)	total: 5m 17s	remaining: 7m 20s
1256:	learn: 0.8539626	test: 0.8601309	best: 0.8601309 (1256)	total: 5m 17s	remaining: 7m 20s
1257:	learn: 0.8539575	test: 0.8601290	best: 0.8601290 (1257)	total: 5m 17s	remaining: 7m 19s
1258:	learn: 0.8539470	test: 0.8601240	best: 0.8601240 (1258)	total: 5m 17s	remaining: 7m 19s
1259:	learn: 0.8539452	test: 0.8601229	best: 0.8601229 (1259)	total: 5m 17s	remaining: 7m 19s
1260:	learn: 0.8539372	test: 0.8601193	best: 0.8601193 (1260)	total: 5m 18s	remaining: 7m 18s
1261:	learn: 0.8539292	test: 0.8601174	best: 0.8601174 (1261)	total: 5m 18s	remaining: 7m 18s
1262:	learn: 0.8539219	test: 0.8601153	best: 0.8601153 (1262)	total: 5m 18s	remaining: 7m 18s
1263:	learn: 0.8539166	test: 0.8601131	best: 0.8601131 (1263

1341:	learn: 0.8534770	test: 0.8599406	best: 0.8599406 (1341)	total: 5m 37s	remaining: 6m 57s
1342:	learn: 0.8534692	test: 0.8599386	best: 0.8599386 (1342)	total: 5m 37s	remaining: 6m 56s
1343:	learn: 0.8534626	test: 0.8599372	best: 0.8599372 (1343)	total: 5m 38s	remaining: 6m 56s
1344:	learn: 0.8534554	test: 0.8599330	best: 0.8599330 (1344)	total: 5m 38s	remaining: 6m 56s
1345:	learn: 0.8534524	test: 0.8599303	best: 0.8599303 (1345)	total: 5m 38s	remaining: 6m 56s
1346:	learn: 0.8534459	test: 0.8599269	best: 0.8599269 (1346)	total: 5m 38s	remaining: 6m 55s
1347:	learn: 0.8534387	test: 0.8599233	best: 0.8599233 (1347)	total: 5m 39s	remaining: 6m 55s
1348:	learn: 0.8534324	test: 0.8599199	best: 0.8599199 (1348)	total: 5m 39s	remaining: 6m 55s
1349:	learn: 0.8534308	test: 0.8599184	best: 0.8599184 (1349)	total: 5m 39s	remaining: 6m 54s
1350:	learn: 0.8534298	test: 0.8599179	best: 0.8599179 (1350)	total: 5m 39s	remaining: 6m 54s
1351:	learn: 0.8534256	test: 0.8599171	best: 0.8599171 (1351

1429:	learn: 0.8530141	test: 0.8597697	best: 0.8597697 (1429)	total: 5m 58s	remaining: 6m 33s
1430:	learn: 0.8530099	test: 0.8597667	best: 0.8597667 (1430)	total: 5m 58s	remaining: 6m 33s
1431:	learn: 0.8530027	test: 0.8597640	best: 0.8597640 (1431)	total: 5m 59s	remaining: 6m 33s
1432:	learn: 0.8530025	test: 0.8597638	best: 0.8597638 (1432)	total: 5m 59s	remaining: 6m 32s
1433:	learn: 0.8529983	test: 0.8597614	best: 0.8597614 (1433)	total: 5m 59s	remaining: 6m 32s
1434:	learn: 0.8529925	test: 0.8597589	best: 0.8597589 (1434)	total: 5m 59s	remaining: 6m 32s
1435:	learn: 0.8529863	test: 0.8597569	best: 0.8597569 (1435)	total: 5m 59s	remaining: 6m 31s
1436:	learn: 0.8529767	test: 0.8597551	best: 0.8597551 (1436)	total: 6m	remaining: 6m 31s
1437:	learn: 0.8529728	test: 0.8597540	best: 0.8597540 (1437)	total: 6m	remaining: 6m 31s
1438:	learn: 0.8529708	test: 0.8597519	best: 0.8597519 (1438)	total: 6m	remaining: 6m 31s
1439:	learn: 0.8529669	test: 0.8597501	best: 0.8597501 (1439)	total: 6m	

1517:	learn: 0.8525641	test: 0.8596113	best: 0.8596113 (1517)	total: 6m 18s	remaining: 6m 9s
1518:	learn: 0.8525632	test: 0.8596104	best: 0.8596104 (1518)	total: 6m 18s	remaining: 6m 9s
1519:	learn: 0.8525582	test: 0.8596068	best: 0.8596068 (1519)	total: 6m 19s	remaining: 6m 9s
1520:	learn: 0.8525579	test: 0.8596063	best: 0.8596063 (1520)	total: 6m 19s	remaining: 6m 8s
1521:	learn: 0.8525550	test: 0.8596058	best: 0.8596058 (1521)	total: 6m 19s	remaining: 6m 8s
1522:	learn: 0.8525499	test: 0.8596048	best: 0.8596048 (1522)	total: 6m 19s	remaining: 6m 8s
1523:	learn: 0.8525430	test: 0.8596019	best: 0.8596019 (1523)	total: 6m 19s	remaining: 6m 7s
1524:	learn: 0.8525429	test: 0.8596017	best: 0.8596017 (1524)	total: 6m 19s	remaining: 6m 7s
1525:	learn: 0.8525393	test: 0.8596002	best: 0.8596002 (1525)	total: 6m 20s	remaining: 6m 7s
1526:	learn: 0.8525361	test: 0.8595989	best: 0.8595989 (1526)	total: 6m 20s	remaining: 6m 6s
1527:	learn: 0.8525305	test: 0.8595947	best: 0.8595947 (1527)	total: 6

1605:	learn: 0.8521185	test: 0.8594949	best: 0.8594949 (1605)	total: 6m 39s	remaining: 5m 46s
1606:	learn: 0.8521156	test: 0.8594935	best: 0.8594935 (1606)	total: 6m 39s	remaining: 5m 46s
1607:	learn: 0.8521108	test: 0.8594902	best: 0.8594902 (1607)	total: 6m 39s	remaining: 5m 45s
1608:	learn: 0.8521052	test: 0.8594884	best: 0.8594884 (1608)	total: 6m 39s	remaining: 5m 45s
1609:	learn: 0.8521017	test: 0.8594874	best: 0.8594874 (1609)	total: 6m 40s	remaining: 5m 45s
1610:	learn: 0.8520934	test: 0.8594859	best: 0.8594859 (1610)	total: 6m 40s	remaining: 5m 45s
1611:	learn: 0.8520878	test: 0.8594836	best: 0.8594836 (1611)	total: 6m 40s	remaining: 5m 44s
1612:	learn: 0.8520765	test: 0.8594826	best: 0.8594826 (1612)	total: 6m 40s	remaining: 5m 44s
1613:	learn: 0.8520715	test: 0.8594816	best: 0.8594816 (1613)	total: 6m 41s	remaining: 5m 44s
1614:	learn: 0.8520620	test: 0.8594811	best: 0.8594811 (1614)	total: 6m 41s	remaining: 5m 44s
1615:	learn: 0.8520547	test: 0.8594804	best: 0.8594804 (1615

KeyboardInterrupt: 

## cat = CatBoostRegressor(cat_features=feature_cat, iterations=3000, use_best_model=True, depth=10, eval_metric='RMSE', l2_leaf_reg=1, learning_rate=0.005, early_stopping_rounds=3)

### Featuresを前処理などせずそのままCatBoostにぶち込む
1821:	learn: 0.8512851	test: 0.8582574	best: 0.8582561 (1820)	total: 7m 32s	remaining: 4m 52s  
1822:	learn: 0.8512811	test: 0.8582564	best: 0.8582561 (1820)	total: 7m 32s	remaining: 4m 52s  
1823:	learn: 0.8512797	test: 0.8582563	best: 0.8582561 (1820)	total: 7m 32s	remaining: 4m 51s  
Stopped by overfitting detector  (3 iterations wait)  
bestTest = 0.8582561211  
bestIteration = 1820  
Shrink model to first 1821 iterations.  
RMSE: 0.7366035702105443  

### Featuresの量的変数にRobustScalerを適用する
2516:	learn: 0.8475800	test: 0.8584843	best: 0.8584836 (2515)	total: 10m 23s	remaining: 1m 59s  
2517:	learn: 0.8475709	test: 0.8584850	best: 0.8584836 (2515)	total: 10m 24s	remaining: 1m 59s  
2518:	learn: 0.8475628	test: 0.8584842	best: 0.8584836 (2515)	total: 10m 24s	remaining: 1m 59s  
Stopped by overfitting detector  (3 iterations wait)  

bestTest = 0.8584835866  
bestIteration = 2515  

Shrink model to first 2516 iterations.  
RMSE: 0.7369940700404239  


In [8]:
y_pred=cat.predict(X_prediction)
output = pd.DataFrame({'id': prediction_id, 'target': y_pred})
output.to_csv('my_submission.csv', index=False)
print("Your submission was successfully saved!")

Your submission was successfully saved!
