# Catboost ensemble_starter

In [1]:
#load the package
import numpy as np 
import pandas as pd 
from catboost import CatBoostRegressor
from tqdm import tqdm
import datetime as dt

# Load the data, formatting data

In [2]:
#load the properties data
properties2016 = pd.read_csv('properties_2016.csv', low_memory = False)
properties2017 = pd.read_csv('properties_2017.csv', low_memory = False)
#load the train data
train2016 = pd.read_csv('train_2016_v2.csv', parse_dates=['transactiondate'], low_memory=False)
train2017 = pd.read_csv('train_2017.csv', parse_dates=['transactiondate'], low_memory=False)
#load sample data
sample_submission = pd.read_csv('sample_submission.csv', low_memory = False)

In [3]:
def trans_datetime_features(df):
    df["year"] = df["transactiondate"].dt.year
    df["quarter"] = (df["transactiondate"].dt.year - 2016)*4 +df["transactiondate"].dt.quarter
    df["month"] = (df["transactiondate"].dt.year - 2016)*12 + df["transactiondate"].dt.month
    df["day"] = df["transactiondate"].dt.day
    df.drop(["transactiondate"], inplace=True, axis=1)
    return df

In [4]:
# transform datatime feature
train2016 = trans_datetime_features(train2016)
train2017 = trans_datetime_features(train2017)
#merge the properties and train data 
train2016 = pd.merge(train2016, properties2016, how = 'left', on = 'parcelid')
train2017 = pd.merge(train2017, properties2017, how = 'left', on = 'parcelid')

In [5]:
#tax feature in 2017, np.nan
train2017.iloc[:, train2017.columns.str.startswith('tax')] = np.nan
#concat Train 2016, 2017
train_df = pd.concat([train2016, train2017], axis = 0)
#merge submission, properties2016
test_df = pd.merge(sample_submission[['ParcelId']], properties2016.rename(columns = {'parcelid': 'ParcelId'}), how = 'left', on = 'ParcelId')

In [6]:
train_df.shape, test_df.shape

((167888, 63), (2985217, 58))

# Exclue Missing data > 98%

In [7]:
# find the columns with the missing values, and set the threshhold 98
missing = pd.concat([train_df.isnull().sum(), 100 * train_df.isnull().mean()], axis=1)
missing.columns=['count', '%']
remove_missing_cols = missing[missing.iloc[:,1] > 98].index.tolist()
print("Remove containing missing values 98 percent of cols: %s" % len(remove_missing_cols))
print('Remove_missing_cols are: \n %s' % remove_missing_cols)

Remove containing missing values 98 percent of cols: 15
Remove_missing_cols are: 
 ['architecturalstyletypeid', 'basementsqft', 'buildingclasstypeid', 'decktypeid', 'finishedsquarefeet13', 'finishedsquarefeet6', 'poolsizesum', 'pooltypeid10', 'pooltypeid2', 'storytypeid', 'typeconstructiontypeid', 'yardbuildingsqft26', 'fireplaceflag', 'taxdelinquencyflag', 'taxdelinquencyyear']


# Exclue features that only have one values

In [8]:
# check the cols that with one unique values, and remove them

remove_unique_col = []
for col in train_df.columns:
    num_of_uniques = len(train_df[col].unique())
    if train_df[col].isnull().sum() != 0:
        num_of_uniques -= 1
    if num_of_uniques == 1:
        remove_unique_col.append(col)

print("Remove the cols with unique value: %s" % len(remove_unique_col))
print('Removed unique value cols are: \n %s' % remove_unique_col)

Remove the cols with unique value: 9
Removed unique value cols are: 
 ['decktypeid', 'hashottuborspa', 'poolcnt', 'pooltypeid10', 'pooltypeid2', 'pooltypeid7', 'storytypeid', 'fireplaceflag', 'taxdelinquencyflag']


# Prepare training features

In [11]:
exclude_list = ['parcelid', 'propertyzoningdesc', 'logerror', 'propertycountylandusecode']
exclude_list = exclude_list + remove_missing_cols + remove_unique_col
train_features = [e for e in train_df.columns if e not in exclude_list]
print("Training features are: %s" % len(train_features))
print('Training features are: \n %s ' % train_features)

Training features are: 41
Training features are: 
 ['year', 'quarter', 'month', 'day', 'airconditioningtypeid', 'bathroomcnt', 'bedroomcnt', 'buildingqualitytypeid', 'calculatedbathnbr', 'finishedfloor1squarefeet', 'calculatedfinishedsquarefeet', 'finishedsquarefeet12', 'finishedsquarefeet15', 'finishedsquarefeet50', 'fips', 'fireplacecnt', 'fullbathcnt', 'garagecarcnt', 'garagetotalsqft', 'heatingorsystemtypeid', 'latitude', 'longitude', 'lotsizesquarefeet', 'propertylandusetypeid', 'rawcensustractandblock', 'regionidcity', 'regionidcounty', 'regionidneighborhood', 'regionidzip', 'roomcnt', 'threequarterbathnbr', 'unitcnt', 'yardbuildingsqft17', 'yearbuilt', 'numberofstories', 'structuretaxvaluedollarcnt', 'taxvaluedollarcnt', 'assessmentyear', 'landtaxvaluedollarcnt', 'taxamount', 'censustractandblock'] 


# Define categorical features among training features

In [12]:
feature_nunique = pd.DataFrame(train_df[train_features].nunique(), columns=['count'])
cat_possible_list = feature_nunique[feature_nunique['count'] < 1000].index.tolist()
cat_feature_list = [f for f in cat_possible_list if 'sqft' not in f and 'cnt' not in f and 'nbr' not in f and 'number' not in f]
cat_features = [train_df[train_features].columns.get_loc(col) for col in cat_feature_list]
print ("Number of categorical features are defined while they have less than 1000 unique values and \n have no numeric types of features in the list: %s" % len(cat_feature_list))
print('\n', '************************************************************************************************************')
print ("Categorical features are defined while they have less than 1000 unique values and \n have no numeric types of features in the list: \n %s" % cat_feature_list)


Number of categorical features are defined while they have less than 1000 unique values and 
 have no numeric types of features in the list: 15

 ************************************************************************************************************
Categorical features are defined while they have less than 1000 unique values and 
 have no numeric types of features in the list: 
 ['year', 'quarter', 'month', 'day', 'airconditioningtypeid', 'buildingqualitytypeid', 'fips', 'heatingorsystemtypeid', 'propertylandusetypeid', 'regionidcity', 'regionidcounty', 'regionidneighborhood', 'regionidzip', 'yearbuilt', 'assessmentyear']


# Fill NA, and TRAINING

In [17]:
# fillna with 'None' string before calculating hashes.
# helpfullink:https://github.com/catboost/catboost/blob/master/open_problems/open_problems.md

train_df.replace(np.nan, 'None', inplace=True)
test_df.replace(np.nan, 'None', inplace=True)


In [19]:

# separate predictor variables and target variable 
X_train = train_df[train_features]
y_train = train_df.logerror
print("X_train.shape, y_train.shape are: ", X_train.shape, y_train.shape)

# creat transactiondate column and assign timestamp
test_df['transactiondate'] = pd.Timestamp('2016-12-01') 
test_df = trans_datetime_features(test_df)
X_test = test_df[train_features]
print("X_test.shape is: ", X_test.shape)

X_train.shape, y_train.shape are:  (167888, 41) (167888,)
X_test.shape is:  (2985217, 41)


In [24]:
X_train[cat_feature_list] = X_train[cat_feature_list].astype(str)
X_test[cat_feature_list] = X_test[cat_feature_list].astype(str)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


In [25]:
num_ensembles = 5
y_pred = 0.0
for i in tqdm(range(num_ensembles)):
    model = CatBoostRegressor(
        iterations=630, learning_rate=0.03,
        depth=6, l2_leaf_reg=3,
        loss_function='MAE',
        eval_metric='MAE',
        random_seed=i)
    model.fit(
        X_train, y_train,
        cat_features=cat_features)
    y_pred += model.predict(X_test)
y_pred /= num_ensembles


  0%|          | 0/5 [00:00<?, ?it/s]

0:	learn: 0.0688436	total: 341ms	remaining: 3m 34s
1:	learn: 0.0688280	total: 513ms	remaining: 2m 40s
2:	learn: 0.0688137	total: 680ms	remaining: 2m 22s
3:	learn: 0.0687987	total: 832ms	remaining: 2m 10s
4:	learn: 0.0687850	total: 997ms	remaining: 2m 4s
5:	learn: 0.0687704	total: 1.22s	remaining: 2m 6s
6:	learn: 0.0687583	total: 1.4s	remaining: 2m 4s
7:	learn: 0.0687450	total: 1.58s	remaining: 2m 3s
8:	learn: 0.0687324	total: 1.76s	remaining: 2m 1s
9:	learn: 0.0687211	total: 1.91s	remaining: 1m 58s
10:	learn: 0.0687105	total: 2.08s	remaining: 1m 56s
11:	learn: 0.0687001	total: 2.29s	remaining: 1m 57s
12:	learn: 0.0686910	total: 2.42s	remaining: 1m 55s
13:	learn: 0.0686813	total: 2.59s	remaining: 1m 53s
14:	learn: 0.0686723	total: 2.74s	remaining: 1m 52s
15:	learn: 0.0686631	total: 2.92s	remaining: 1m 51s
16:	learn: 0.0686533	total: 3.09s	remaining: 1m 51s
17:	learn: 0.0686447	total: 3.31s	remaining: 1m 52s
18:	learn: 0.0686371	total: 3.46s	remaining: 1m 51s
19:	learn: 0.0686304	total: 

159:	learn: 0.0681933	total: 29.3s	remaining: 1m 26s
160:	learn: 0.0681919	total: 29.6s	remaining: 1m 26s
161:	learn: 0.0681910	total: 29.8s	remaining: 1m 25s
162:	learn: 0.0681891	total: 29.9s	remaining: 1m 25s
163:	learn: 0.0681878	total: 30.1s	remaining: 1m 25s
164:	learn: 0.0681867	total: 30.2s	remaining: 1m 25s
165:	learn: 0.0681860	total: 30.3s	remaining: 1m 24s
166:	learn: 0.0681849	total: 30.5s	remaining: 1m 24s
167:	learn: 0.0681822	total: 30.7s	remaining: 1m 24s
168:	learn: 0.0681815	total: 30.8s	remaining: 1m 24s
169:	learn: 0.0681794	total: 31s	remaining: 1m 23s
170:	learn: 0.0681780	total: 31.1s	remaining: 1m 23s
171:	learn: 0.0681760	total: 31.3s	remaining: 1m 23s
172:	learn: 0.0681753	total: 31.4s	remaining: 1m 23s
173:	learn: 0.0681736	total: 31.6s	remaining: 1m 22s
174:	learn: 0.0681716	total: 31.7s	remaining: 1m 22s
175:	learn: 0.0681701	total: 31.9s	remaining: 1m 22s
176:	learn: 0.0681686	total: 32s	remaining: 1m 21s
177:	learn: 0.0681675	total: 32.2s	remaining: 1m 2

317:	learn: 0.0680195	total: 57s	remaining: 55.9s
318:	learn: 0.0680180	total: 57.1s	remaining: 55.7s
319:	learn: 0.0680170	total: 57.3s	remaining: 55.5s
320:	learn: 0.0680152	total: 57.5s	remaining: 55.3s
321:	learn: 0.0680147	total: 57.6s	remaining: 55.1s
322:	learn: 0.0680134	total: 57.8s	remaining: 54.9s
323:	learn: 0.0680117	total: 57.9s	remaining: 54.7s
324:	learn: 0.0680112	total: 58.1s	remaining: 54.6s
325:	learn: 0.0680100	total: 58.3s	remaining: 54.4s
326:	learn: 0.0680089	total: 58.4s	remaining: 54.1s
327:	learn: 0.0680081	total: 58.6s	remaining: 54s
328:	learn: 0.0680074	total: 58.8s	remaining: 53.8s
329:	learn: 0.0680066	total: 59s	remaining: 53.6s
330:	learn: 0.0680050	total: 59.1s	remaining: 53.4s
331:	learn: 0.0680042	total: 59.3s	remaining: 53.2s
332:	learn: 0.0680038	total: 59.5s	remaining: 53.1s
333:	learn: 0.0680034	total: 59.7s	remaining: 52.9s
334:	learn: 0.0680025	total: 59.9s	remaining: 52.7s
335:	learn: 0.0680006	total: 1m	remaining: 52.5s
336:	learn: 0.0680005

475:	learn: 0.0678576	total: 1m 25s	remaining: 27.6s
476:	learn: 0.0678572	total: 1m 25s	remaining: 27.5s
477:	learn: 0.0678562	total: 1m 25s	remaining: 27.3s
478:	learn: 0.0678542	total: 1m 25s	remaining: 27.1s
479:	learn: 0.0678534	total: 1m 26s	remaining: 26.9s
480:	learn: 0.0678518	total: 1m 26s	remaining: 26.7s
481:	learn: 0.0678513	total: 1m 26s	remaining: 26.6s
482:	learn: 0.0678506	total: 1m 26s	remaining: 26.4s
483:	learn: 0.0678482	total: 1m 26s	remaining: 26.2s
484:	learn: 0.0678457	total: 1m 27s	remaining: 26s
485:	learn: 0.0678437	total: 1m 27s	remaining: 25.8s
486:	learn: 0.0678427	total: 1m 27s	remaining: 25.7s
487:	learn: 0.0678412	total: 1m 27s	remaining: 25.5s
488:	learn: 0.0678401	total: 1m 27s	remaining: 25.3s
489:	learn: 0.0678390	total: 1m 28s	remaining: 25.1s
490:	learn: 0.0678387	total: 1m 28s	remaining: 25s
491:	learn: 0.0678377	total: 1m 28s	remaining: 24.8s
492:	learn: 0.0678366	total: 1m 28s	remaining: 24.6s
493:	learn: 0.0678347	total: 1m 28s	remaining: 24.

 20%|██        | 1/5 [02:12<08:48, 132.18s/it]

0:	learn: 0.0688425	total: 166ms	remaining: 1m 44s
1:	learn: 0.0688269	total: 317ms	remaining: 1m 39s
2:	learn: 0.0688099	total: 479ms	remaining: 1m 40s
3:	learn: 0.0687954	total: 665ms	remaining: 1m 44s
4:	learn: 0.0687791	total: 844ms	remaining: 1m 45s
5:	learn: 0.0687668	total: 1.01s	remaining: 1m 45s
6:	learn: 0.0687526	total: 1.22s	remaining: 1m 48s
7:	learn: 0.0687410	total: 1.39s	remaining: 1m 48s
8:	learn: 0.0687292	total: 1.59s	remaining: 1m 49s
9:	learn: 0.0687171	total: 1.77s	remaining: 1m 49s
10:	learn: 0.0687056	total: 1.96s	remaining: 1m 50s
11:	learn: 0.0686933	total: 2.14s	remaining: 1m 50s
12:	learn: 0.0686842	total: 2.31s	remaining: 1m 49s
13:	learn: 0.0686749	total: 2.45s	remaining: 1m 47s
14:	learn: 0.0686646	total: 2.67s	remaining: 1m 49s
15:	learn: 0.0686553	total: 2.82s	remaining: 1m 48s
16:	learn: 0.0686462	total: 3s	remaining: 1m 48s
17:	learn: 0.0686379	total: 3.15s	remaining: 1m 47s
18:	learn: 0.0686272	total: 3.33s	remaining: 1m 46s
19:	learn: 0.0686184	tota

158:	learn: 0.0681813	total: 29.9s	remaining: 1m 28s
159:	learn: 0.0681804	total: 30s	remaining: 1m 28s
160:	learn: 0.0681795	total: 30.2s	remaining: 1m 27s
161:	learn: 0.0681785	total: 30.4s	remaining: 1m 27s
162:	learn: 0.0681762	total: 30.6s	remaining: 1m 27s
163:	learn: 0.0681746	total: 30.8s	remaining: 1m 27s
164:	learn: 0.0681734	total: 31s	remaining: 1m 27s
165:	learn: 0.0681713	total: 31.2s	remaining: 1m 27s
166:	learn: 0.0681701	total: 31.3s	remaining: 1m 26s
167:	learn: 0.0681680	total: 31.5s	remaining: 1m 26s
168:	learn: 0.0681658	total: 31.6s	remaining: 1m 26s
169:	learn: 0.0681638	total: 31.8s	remaining: 1m 26s
170:	learn: 0.0681622	total: 32s	remaining: 1m 25s
171:	learn: 0.0681612	total: 32.2s	remaining: 1m 25s
172:	learn: 0.0681599	total: 32.4s	remaining: 1m 25s
173:	learn: 0.0681577	total: 32.6s	remaining: 1m 25s
174:	learn: 0.0681569	total: 32.9s	remaining: 1m 25s
175:	learn: 0.0681559	total: 33.2s	remaining: 1m 25s
176:	learn: 0.0681546	total: 33.4s	remaining: 1m 25s

315:	learn: 0.0680094	total: 58.1s	remaining: 57.7s
316:	learn: 0.0680086	total: 58.2s	remaining: 57.5s
317:	learn: 0.0680077	total: 58.4s	remaining: 57.3s
318:	learn: 0.0680066	total: 58.6s	remaining: 57.2s
319:	learn: 0.0680060	total: 58.8s	remaining: 57s
320:	learn: 0.0680055	total: 58.9s	remaining: 56.7s
321:	learn: 0.0680051	total: 59.2s	remaining: 56.6s
322:	learn: 0.0680047	total: 59.4s	remaining: 56.4s
323:	learn: 0.0680044	total: 59.5s	remaining: 56.2s
324:	learn: 0.0680030	total: 59.7s	remaining: 56.1s
325:	learn: 0.0680022	total: 59.9s	remaining: 55.9s
326:	learn: 0.0680015	total: 1m	remaining: 55.9s
327:	learn: 0.0680013	total: 1m	remaining: 55.7s
328:	learn: 0.0680003	total: 1m	remaining: 55.5s
329:	learn: 0.0679994	total: 1m	remaining: 55.3s
330:	learn: 0.0679990	total: 1m	remaining: 55s
331:	learn: 0.0679981	total: 1m 1s	remaining: 54.8s
332:	learn: 0.0679974	total: 1m 1s	remaining: 54.7s
333:	learn: 0.0679964	total: 1m 1s	remaining: 54.5s
334:	learn: 0.0679957	total: 1m

472:	learn: 0.0678491	total: 1m 25s	remaining: 28.5s
473:	learn: 0.0678470	total: 1m 26s	remaining: 28.3s
474:	learn: 0.0678452	total: 1m 26s	remaining: 28.2s
475:	learn: 0.0678436	total: 1m 26s	remaining: 28s
476:	learn: 0.0678424	total: 1m 26s	remaining: 27.8s
477:	learn: 0.0678416	total: 1m 26s	remaining: 27.6s
478:	learn: 0.0678406	total: 1m 26s	remaining: 27.4s
479:	learn: 0.0678390	total: 1m 27s	remaining: 27.2s
480:	learn: 0.0678383	total: 1m 27s	remaining: 27s
481:	learn: 0.0678376	total: 1m 27s	remaining: 26.9s
482:	learn: 0.0678367	total: 1m 27s	remaining: 26.7s
483:	learn: 0.0678354	total: 1m 27s	remaining: 26.5s
484:	learn: 0.0678343	total: 1m 27s	remaining: 26.3s
485:	learn: 0.0678336	total: 1m 28s	remaining: 26.1s
486:	learn: 0.0678323	total: 1m 28s	remaining: 25.9s
487:	learn: 0.0678293	total: 1m 28s	remaining: 25.8s
488:	learn: 0.0678272	total: 1m 28s	remaining: 25.6s
489:	learn: 0.0678256	total: 1m 28s	remaining: 25.4s
490:	learn: 0.0678247	total: 1m 28s	remaining: 25.

628:	learn: 0.0676791	total: 1m 53s	remaining: 180ms
629:	learn: 0.0676779	total: 1m 53s	remaining: 0us


 40%|████      | 2/5 [04:22<06:34, 131.37s/it]

0:	learn: 0.0688431	total: 190ms	remaining: 1m 59s
1:	learn: 0.0688283	total: 372ms	remaining: 1m 56s
2:	learn: 0.0688116	total: 550ms	remaining: 1m 54s
3:	learn: 0.0687980	total: 726ms	remaining: 1m 53s
4:	learn: 0.0687846	total: 891ms	remaining: 1m 51s
5:	learn: 0.0687709	total: 1.03s	remaining: 1m 47s
6:	learn: 0.0687571	total: 1.21s	remaining: 1m 47s
7:	learn: 0.0687459	total: 1.36s	remaining: 1m 45s
8:	learn: 0.0687330	total: 1.52s	remaining: 1m 44s
9:	learn: 0.0687234	total: 1.71s	remaining: 1m 45s
10:	learn: 0.0687134	total: 1.88s	remaining: 1m 45s
11:	learn: 0.0687031	total: 2.06s	remaining: 1m 46s
12:	learn: 0.0686918	total: 2.22s	remaining: 1m 45s
13:	learn: 0.0686821	total: 2.37s	remaining: 1m 44s
14:	learn: 0.0686720	total: 2.52s	remaining: 1m 43s
15:	learn: 0.0686626	total: 2.73s	remaining: 1m 44s
16:	learn: 0.0686504	total: 2.91s	remaining: 1m 44s
17:	learn: 0.0686423	total: 3.06s	remaining: 1m 44s
18:	learn: 0.0686326	total: 3.2s	remaining: 1m 42s
19:	learn: 0.0686251	to

158:	learn: 0.0681860	total: 28.9s	remaining: 1m 25s
159:	learn: 0.0681843	total: 29.1s	remaining: 1m 25s
160:	learn: 0.0681832	total: 29.3s	remaining: 1m 25s
161:	learn: 0.0681812	total: 29.5s	remaining: 1m 25s
162:	learn: 0.0681798	total: 29.7s	remaining: 1m 25s
163:	learn: 0.0681787	total: 29.9s	remaining: 1m 24s
164:	learn: 0.0681764	total: 30.1s	remaining: 1m 24s
165:	learn: 0.0681752	total: 30.2s	remaining: 1m 24s
166:	learn: 0.0681742	total: 30.4s	remaining: 1m 24s
167:	learn: 0.0681731	total: 30.6s	remaining: 1m 24s
168:	learn: 0.0681715	total: 30.8s	remaining: 1m 23s
169:	learn: 0.0681699	total: 31s	remaining: 1m 23s
170:	learn: 0.0681670	total: 31.1s	remaining: 1m 23s
171:	learn: 0.0681658	total: 31.4s	remaining: 1m 23s
172:	learn: 0.0681646	total: 31.6s	remaining: 1m 23s
173:	learn: 0.0681631	total: 31.7s	remaining: 1m 23s
174:	learn: 0.0681621	total: 32s	remaining: 1m 23s
175:	learn: 0.0681605	total: 32.1s	remaining: 1m 22s
176:	learn: 0.0681594	total: 32.3s	remaining: 1m 2

315:	learn: 0.0680136	total: 57.3s	remaining: 56.9s
316:	learn: 0.0680125	total: 57.4s	remaining: 56.7s
317:	learn: 0.0680113	total: 57.6s	remaining: 56.5s
318:	learn: 0.0680106	total: 57.8s	remaining: 56.4s
319:	learn: 0.0680094	total: 58s	remaining: 56.2s
320:	learn: 0.0680087	total: 58.2s	remaining: 56s
321:	learn: 0.0680086	total: 58.4s	remaining: 55.9s
322:	learn: 0.0680083	total: 58.7s	remaining: 55.8s
323:	learn: 0.0680077	total: 58.9s	remaining: 55.6s
324:	learn: 0.0680072	total: 59.1s	remaining: 55.4s
325:	learn: 0.0680071	total: 59.2s	remaining: 55.2s
326:	learn: 0.0680066	total: 59.4s	remaining: 55s
327:	learn: 0.0680051	total: 59.6s	remaining: 54.9s
328:	learn: 0.0680038	total: 59.8s	remaining: 54.7s
329:	learn: 0.0680027	total: 60s	remaining: 54.5s
330:	learn: 0.0680019	total: 1m	remaining: 54.4s
331:	learn: 0.0680004	total: 1m	remaining: 54.2s
332:	learn: 0.0679987	total: 1m	remaining: 54s
333:	learn: 0.0679981	total: 1m	remaining: 53.8s
334:	learn: 0.0679970	total: 1m	re

472:	learn: 0.0678400	total: 1m 26s	remaining: 28.7s
473:	learn: 0.0678379	total: 1m 26s	remaining: 28.5s
474:	learn: 0.0678374	total: 1m 26s	remaining: 28.3s
475:	learn: 0.0678353	total: 1m 27s	remaining: 28.2s
476:	learn: 0.0678339	total: 1m 27s	remaining: 28s
477:	learn: 0.0678322	total: 1m 27s	remaining: 27.8s
478:	learn: 0.0678303	total: 1m 27s	remaining: 27.6s
479:	learn: 0.0678292	total: 1m 27s	remaining: 27.4s
480:	learn: 0.0678282	total: 1m 27s	remaining: 27.2s
481:	learn: 0.0678278	total: 1m 28s	remaining: 27.1s
482:	learn: 0.0678262	total: 1m 28s	remaining: 26.9s
483:	learn: 0.0678257	total: 1m 28s	remaining: 26.7s
484:	learn: 0.0678249	total: 1m 28s	remaining: 26.5s
485:	learn: 0.0678242	total: 1m 28s	remaining: 26.4s
486:	learn: 0.0678231	total: 1m 29s	remaining: 26.2s
487:	learn: 0.0678222	total: 1m 29s	remaining: 26s
488:	learn: 0.0678212	total: 1m 29s	remaining: 25.8s
489:	learn: 0.0678207	total: 1m 29s	remaining: 25.7s
490:	learn: 0.0678194	total: 1m 29s	remaining: 25.

628:	learn: 0.0676921	total: 1m 56s	remaining: 185ms
629:	learn: 0.0676913	total: 1m 56s	remaining: 0us


 60%|██████    | 3/5 [06:37<04:25, 132.67s/it]

0:	learn: 0.0688421	total: 175ms	remaining: 1m 50s
1:	learn: 0.0688268	total: 321ms	remaining: 1m 40s
2:	learn: 0.0688119	total: 500ms	remaining: 1m 44s
3:	learn: 0.0687964	total: 724ms	remaining: 1m 53s
4:	learn: 0.0687842	total: 930ms	remaining: 1m 56s
5:	learn: 0.0687722	total: 1.13s	remaining: 1m 57s
6:	learn: 0.0687590	total: 1.35s	remaining: 2m
7:	learn: 0.0687465	total: 1.54s	remaining: 2m
8:	learn: 0.0687342	total: 1.72s	remaining: 1m 58s
9:	learn: 0.0687214	total: 1.9s	remaining: 1m 57s
10:	learn: 0.0687103	total: 2.09s	remaining: 1m 57s
11:	learn: 0.0686998	total: 2.29s	remaining: 1m 57s
12:	learn: 0.0686887	total: 2.43s	remaining: 1m 55s
13:	learn: 0.0686783	total: 2.62s	remaining: 1m 55s
14:	learn: 0.0686690	total: 2.8s	remaining: 1m 54s
15:	learn: 0.0686595	total: 2.96s	remaining: 1m 53s
16:	learn: 0.0686495	total: 3.16s	remaining: 1m 54s
17:	learn: 0.0686408	total: 3.34s	remaining: 1m 53s
18:	learn: 0.0686322	total: 3.5s	remaining: 1m 52s
19:	learn: 0.0686242	total: 3.69s

158:	learn: 0.0681983	total: 28.8s	remaining: 1m 25s
159:	learn: 0.0681963	total: 29s	remaining: 1m 25s
160:	learn: 0.0681948	total: 29.2s	remaining: 1m 25s
161:	learn: 0.0681930	total: 29.3s	remaining: 1m 24s
162:	learn: 0.0681912	total: 29.5s	remaining: 1m 24s
163:	learn: 0.0681901	total: 29.7s	remaining: 1m 24s
164:	learn: 0.0681873	total: 29.8s	remaining: 1m 23s
165:	learn: 0.0681862	total: 30s	remaining: 1m 23s
166:	learn: 0.0681853	total: 30.2s	remaining: 1m 23s
167:	learn: 0.0681843	total: 30.3s	remaining: 1m 23s
168:	learn: 0.0681835	total: 30.5s	remaining: 1m 23s
169:	learn: 0.0681818	total: 30.7s	remaining: 1m 23s
170:	learn: 0.0681806	total: 30.9s	remaining: 1m 23s
171:	learn: 0.0681783	total: 31.1s	remaining: 1m 22s
172:	learn: 0.0681777	total: 31.3s	remaining: 1m 22s
173:	learn: 0.0681756	total: 31.4s	remaining: 1m 22s
174:	learn: 0.0681743	total: 31.7s	remaining: 1m 22s
175:	learn: 0.0681728	total: 31.8s	remaining: 1m 22s
176:	learn: 0.0681721	total: 32s	remaining: 1m 21s

316:	learn: 0.0680204	total: 56.5s	remaining: 55.8s
317:	learn: 0.0680196	total: 56.7s	remaining: 55.6s
318:	learn: 0.0680181	total: 57s	remaining: 55.5s
319:	learn: 0.0680176	total: 57.2s	remaining: 55.4s
320:	learn: 0.0680172	total: 57.4s	remaining: 55.2s
321:	learn: 0.0680165	total: 57.6s	remaining: 55.1s
322:	learn: 0.0680160	total: 57.7s	remaining: 54.9s
323:	learn: 0.0680142	total: 57.9s	remaining: 54.6s
324:	learn: 0.0680139	total: 58s	remaining: 54.5s
325:	learn: 0.0680130	total: 58.2s	remaining: 54.3s
326:	learn: 0.0680125	total: 58.3s	remaining: 54.1s
327:	learn: 0.0680115	total: 58.5s	remaining: 53.8s
328:	learn: 0.0680106	total: 58.7s	remaining: 53.7s
329:	learn: 0.0680099	total: 58.9s	remaining: 53.6s
330:	learn: 0.0680091	total: 59.1s	remaining: 53.4s
331:	learn: 0.0680086	total: 59.2s	remaining: 53.1s
332:	learn: 0.0680080	total: 59.4s	remaining: 53s
333:	learn: 0.0680065	total: 59.6s	remaining: 52.9s
334:	learn: 0.0680054	total: 59.8s	remaining: 52.7s
335:	learn: 0.0680

473:	learn: 0.0678538	total: 1m 24s	remaining: 27.8s
474:	learn: 0.0678522	total: 1m 24s	remaining: 27.6s
475:	learn: 0.0678506	total: 1m 24s	remaining: 27.4s
476:	learn: 0.0678493	total: 1m 24s	remaining: 27.2s
477:	learn: 0.0678479	total: 1m 25s	remaining: 27s
478:	learn: 0.0678471	total: 1m 25s	remaining: 26.9s
479:	learn: 0.0678455	total: 1m 25s	remaining: 26.7s
480:	learn: 0.0678441	total: 1m 25s	remaining: 26.5s
481:	learn: 0.0678432	total: 1m 25s	remaining: 26.3s
482:	learn: 0.0678415	total: 1m 25s	remaining: 26.1s
483:	learn: 0.0678396	total: 1m 26s	remaining: 25.9s
484:	learn: 0.0678384	total: 1m 26s	remaining: 25.8s
485:	learn: 0.0678372	total: 1m 26s	remaining: 25.6s
486:	learn: 0.0678356	total: 1m 26s	remaining: 25.4s
487:	learn: 0.0678341	total: 1m 26s	remaining: 25.2s
488:	learn: 0.0678321	total: 1m 26s	remaining: 25s
489:	learn: 0.0678309	total: 1m 27s	remaining: 24.9s
490:	learn: 0.0678303	total: 1m 27s	remaining: 24.7s
491:	learn: 0.0678289	total: 1m 27s	remaining: 24.

 80%|████████  | 4/5 [08:45<02:11, 131.09s/it]

0:	learn: 0.0688428	total: 169ms	remaining: 1m 46s
1:	learn: 0.0688272	total: 315ms	remaining: 1m 38s
2:	learn: 0.0688098	total: 494ms	remaining: 1m 43s
3:	learn: 0.0687958	total: 631ms	remaining: 1m 38s
4:	learn: 0.0687836	total: 825ms	remaining: 1m 43s
5:	learn: 0.0687695	total: 1.03s	remaining: 1m 47s
6:	learn: 0.0687557	total: 1.24s	remaining: 1m 50s
7:	learn: 0.0687437	total: 1.41s	remaining: 1m 49s
8:	learn: 0.0687331	total: 1.61s	remaining: 1m 50s
9:	learn: 0.0687227	total: 1.77s	remaining: 1m 49s
10:	learn: 0.0687090	total: 1.95s	remaining: 1m 49s
11:	learn: 0.0686974	total: 2.09s	remaining: 1m 47s
12:	learn: 0.0686881	total: 2.25s	remaining: 1m 46s
13:	learn: 0.0686790	total: 2.42s	remaining: 1m 46s
14:	learn: 0.0686712	total: 2.6s	remaining: 1m 46s
15:	learn: 0.0686610	total: 2.78s	remaining: 1m 46s
16:	learn: 0.0686519	total: 2.97s	remaining: 1m 47s
17:	learn: 0.0686434	total: 3.13s	remaining: 1m 46s
18:	learn: 0.0686330	total: 3.31s	remaining: 1m 46s
19:	learn: 0.0686248	to

158:	learn: 0.0681945	total: 28.9s	remaining: 1m 25s
159:	learn: 0.0681933	total: 29.2s	remaining: 1m 25s
160:	learn: 0.0681926	total: 29.3s	remaining: 1m 25s
161:	learn: 0.0681907	total: 29.6s	remaining: 1m 25s
162:	learn: 0.0681889	total: 29.8s	remaining: 1m 25s
163:	learn: 0.0681878	total: 29.9s	remaining: 1m 24s
164:	learn: 0.0681856	total: 30.2s	remaining: 1m 24s
165:	learn: 0.0681842	total: 30.4s	remaining: 1m 24s
166:	learn: 0.0681822	total: 30.6s	remaining: 1m 24s
167:	learn: 0.0681810	total: 30.8s	remaining: 1m 24s
168:	learn: 0.0681795	total: 31s	remaining: 1m 24s
169:	learn: 0.0681790	total: 31.2s	remaining: 1m 24s
170:	learn: 0.0681767	total: 31.3s	remaining: 1m 24s
171:	learn: 0.0681756	total: 31.6s	remaining: 1m 24s
172:	learn: 0.0681736	total: 31.8s	remaining: 1m 23s
173:	learn: 0.0681728	total: 32s	remaining: 1m 23s
174:	learn: 0.0681719	total: 32.2s	remaining: 1m 23s
175:	learn: 0.0681706	total: 32.4s	remaining: 1m 23s
176:	learn: 0.0681696	total: 32.6s	remaining: 1m 2

315:	learn: 0.0680267	total: 59.2s	remaining: 58.8s
316:	learn: 0.0680258	total: 59.3s	remaining: 58.6s
317:	learn: 0.0680249	total: 59.5s	remaining: 58.4s
318:	learn: 0.0680238	total: 59.7s	remaining: 58.2s
319:	learn: 0.0680225	total: 59.9s	remaining: 58s
320:	learn: 0.0680211	total: 1m	remaining: 57.8s
321:	learn: 0.0680208	total: 1m	remaining: 57.6s
322:	learn: 0.0680195	total: 1m	remaining: 57.4s
323:	learn: 0.0680187	total: 1m	remaining: 57.2s
324:	learn: 0.0680180	total: 1m	remaining: 57s
325:	learn: 0.0680172	total: 1m	remaining: 56.8s
326:	learn: 0.0680158	total: 1m 1s	remaining: 56.6s
327:	learn: 0.0680149	total: 1m 1s	remaining: 56.4s
328:	learn: 0.0680144	total: 1m 1s	remaining: 56.2s
329:	learn: 0.0680135	total: 1m 1s	remaining: 56s
330:	learn: 0.0680127	total: 1m 1s	remaining: 55.8s
331:	learn: 0.0680121	total: 1m 1s	remaining: 55.6s
332:	learn: 0.0680117	total: 1m 2s	remaining: 55.4s
333:	learn: 0.0680101	total: 1m 2s	remaining: 55.3s
334:	learn: 0.0680093	total: 1m 2s	r

472:	learn: 0.0678615	total: 1m 27s	remaining: 28.9s
473:	learn: 0.0678593	total: 1m 27s	remaining: 28.7s
474:	learn: 0.0678568	total: 1m 27s	remaining: 28.5s
475:	learn: 0.0678551	total: 1m 27s	remaining: 28.3s
476:	learn: 0.0678543	total: 1m 27s	remaining: 28.1s
477:	learn: 0.0678531	total: 1m 27s	remaining: 28s
478:	learn: 0.0678520	total: 1m 28s	remaining: 27.8s
479:	learn: 0.0678507	total: 1m 28s	remaining: 27.6s
480:	learn: 0.0678498	total: 1m 28s	remaining: 27.4s
481:	learn: 0.0678484	total: 1m 28s	remaining: 27.2s
482:	learn: 0.0678476	total: 1m 28s	remaining: 27s
483:	learn: 0.0678461	total: 1m 29s	remaining: 26.9s
484:	learn: 0.0678455	total: 1m 29s	remaining: 26.7s
485:	learn: 0.0678441	total: 1m 29s	remaining: 26.5s
486:	learn: 0.0678427	total: 1m 29s	remaining: 26.3s
487:	learn: 0.0678418	total: 1m 29s	remaining: 26.2s
488:	learn: 0.0678397	total: 1m 30s	remaining: 26s
489:	learn: 0.0678380	total: 1m 30s	remaining: 25.8s
490:	learn: 0.0678363	total: 1m 30s	remaining: 25.6s

628:	learn: 0.0676943	total: 1m 54s	remaining: 182ms
629:	learn: 0.0676927	total: 1m 54s	remaining: 0us


100%|██████████| 5/5 [10:58<00:00, 131.60s/it]


In [26]:
submission = pd.DataFrame({
    'ParcelId': test_df['ParcelId'],
})

test_dates = {
    '201610': pd.Timestamp('2016-09-30'),
    '201611': pd.Timestamp('2016-10-31'),
    '201612': pd.Timestamp('2016-11-30'),
    '201710': pd.Timestamp('2017-09-30'),
    '201711': pd.Timestamp('2017-10-31'),
    '201712': pd.Timestamp('2017-11-30')
}

for label, test_date in test_dates.items():
    print("Predicting for: %s . " % (label))
    submission[label] = y_pred
    
submission.to_csv('CatBoost_prediction.csv', float_format='%.6f',index=False)

Predicting for: 201610 . 
Predicting for: 201611 . 
Predicting for: 201612 . 
Predicting for: 201710 . 
Predicting for: 201711 . 
Predicting for: 201712 . 


In [27]:
submission

Unnamed: 0,ParcelId,201610,201611,201612,201710,201711,201712
0,10754147,0.054393,0.054393,0.054393,0.054393,0.054393,0.054393
1,10759547,0.050820,0.050820,0.050820,0.050820,0.050820,0.050820
2,10843547,-0.025382,-0.025382,-0.025382,-0.025382,-0.025382,-0.025382
3,10859147,0.027895,0.027895,0.027895,0.027895,0.027895,0.027895
4,10879947,0.007612,0.007612,0.007612,0.007612,0.007612,0.007612
...,...,...,...,...,...,...,...
2985212,168176230,0.010926,0.010926,0.010926,0.010926,0.010926,0.010926
2985213,14273630,0.010926,0.010926,0.010926,0.010926,0.010926,0.010926
2985214,168040630,0.010926,0.010926,0.010926,0.010926,0.010926,0.010926
2985215,168040830,0.010926,0.010926,0.010926,0.010926,0.010926,0.010926
