In [1]:
import numpy as np
from catboost import Pool, CatBoostRegressor
from sklearn.metrics import make_scorer, accuracy_score, mean_absolute_error

In [2]:
import pandas as pd
import numpy as np
pd.set_option('display.max_columns', 500)
import json
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
import sklearn
import xgboost as xgb
from xgboost import XGBClassifier 
from xgboost import XGBRegressor 
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import mean_squared_error
from math import sqrt
from scipy.stats import uniform, randint
from sklearn.model_selection import TimeSeriesSplit, cross_val_score, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import OneHotEncoder 
import pickle
import time

In [4]:
df_train = pd.read_csv('train.csv')

In [5]:
categorical_features = ['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 
        'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2',
        'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st',
        'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation',
        'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2',
        'Heating', 'HeatingQC', 'Electrical', 'KitchenQual',
        'Functional', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual',
        'GarageCond', 'PoolQC', 'Fence', 'MiscFeature',
        'SaleType', 'SaleCondition','Utilities','PavedDrive']

In [6]:
numerical_columns = [ 'MSSubClass', 'LotArea', 'OverallQual', 'OverallCond',
        'YearBuilt', 'YearRemodAdd', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF',
        'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF', 'GrLivArea',
        'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr',
        'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'GarageCars',
        'GarageArea', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch',
        'ScreenPorch', 'PoolArea', 'MiscVal', 'MoSold', 'YrSold','LotFrontage', 'MasVnrArea',
                     'GarageYrBlt']

## Checking for missing data

In [7]:
total = df_train.isnull().sum().sort_values(ascending=False)
percent = (df_train.isnull().sum()/df_train.isnull().count()).sort_values(ascending=False)
missing_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent'])
missing_data.head(20)

Unnamed: 0,Total,Percent
PoolQC,1453,0.995205
MiscFeature,1406,0.963014
Alley,1369,0.937671
Fence,1179,0.807534
FireplaceQu,690,0.472603
LotFrontage,259,0.177397
GarageCond,81,0.055479
GarageType,81,0.055479
GarageYrBlt,81,0.055479
GarageFinish,81,0.055479


In [8]:
df_train.columns.to_series().groupby(df_train.dtypes).groups

{int64: ['Id', 'MSSubClass', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'GarageCars', 'GarageArea', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'MiscVal', 'MoSold', 'YrSold', 'SalePrice'], float64: ['LotFrontage', 'MasVnrArea', 'GarageYrBlt'], object: ['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2', 'Heating', 'HeatingQC', 'CentralAir', 'Electrical', 'KitchenQual', 'Functional', 'FireplaceQu', 'Garage

In [9]:
# Drop Ids

In [10]:
df_train.drop('Id', axis=1,inplace=True) 

In [11]:
target = ['SalePrice']

In [12]:
numerical_columns = [ 'MSSubClass', 'LotArea', 'OverallQual', 'OverallCond',
        'YearBuilt', 'YearRemodAdd', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF',
        'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF', 'GrLivArea',
        'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr',
        'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'GarageCars',
        'GarageArea', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch',
        'ScreenPorch', 'PoolArea', 'MiscVal', 'MoSold', 'YrSold','LotFrontage', 'MasVnrArea',
                     'GarageYrBlt']

In [13]:
target_encode_columns = ['MSZoning', 'Street', 'Alley', 'LotShape', 'LandContour', 
        'LotConfig', 'LandSlope', 'Neighborhood', 'Condition1', 'Condition2',
        'BldgType', 'HouseStyle', 'RoofStyle', 'RoofMatl', 'Exterior1st',
        'Exterior2nd', 'MasVnrType', 'ExterQual', 'ExterCond', 'Foundation',
        'BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2',
        'Heating', 'HeatingQC', 'Electrical', 'KitchenQual',
        'Functional', 'FireplaceQu', 'GarageType', 'GarageFinish', 'GarageQual',
        'GarageCond', 'PoolQC', 'Fence', 'MiscFeature',
        'SaleType', 'SaleCondition','Utilities','PavedDrive']


target_encode_df = df_train[target_encode_columns + target].reset_index().drop(columns = 'index', axis = 1)


In [14]:
df_train['HasCentralAir'] = np.where(df_train['CentralAir']=='Y',1,0)
df_train.drop('CentralAir',axis=1,inplace=True)

In [15]:
target_name = target[0]
target_df = pd.DataFrame()
encoding_dict = {}
for embed_col in target_encode_columns:
    val_map = target_encode_df.groupby(embed_col)[target].mean().to_dict()[target_name]
    encoding_dict[embed_col] = val_map
    target_df[embed_col] = target_encode_df[embed_col].map(val_map).values

# # Save encoding dict

with open('encoding_dict.txt', 'w') as file:
    json.dump(encoding_dict, file)

In [16]:
target_df.head()

Unnamed: 0,MSZoning,Street,Alley,LotShape,LandContour,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinType2,Heating,HeatingQC,Electrical,KitchenQual,Functional,FireplaceQu,GarageType,GarageFinish,GarageQual,GarageCond,PoolQC,Fence,MiscFeature,SaleType,SaleCondition,Utilities,PavedDrive
0,191004.994787,181130.538514,,164754.818378,180183.746758,176938.047529,179956.799566,197965.773333,184495.492063,181169.405536,185763.807377,210051.764045,171483.956179,179803.679219,213732.900971,214432.460317,204691.87191,231633.510246,184034.896256,225230.44204,202688.478964,183632.6209,165652.295908,235413.720096,184694.690287,182021.195378,214914.42915,186825.113193,212116.023891,183429.147059,,202892.656322,202068.869668,187489.836003,187885.735294,,,,173401.836622,175202.219533,180950.95682,186433.973881
1,191004.994787,181130.538514,,164754.818378,180183.746758,177934.574468,179956.799566,238772.727273,142475.481481,181169.405536,185763.807377,175985.477961,171483.956179,179803.679219,149422.177273,149803.172897,156221.891204,144341.313466,184034.896256,149805.714511,202688.478964,183632.6209,257689.80597,161573.068182,184694.690287,182021.195378,214914.42915,186825.113193,139962.511565,183429.147059,205723.488818,202892.656322,202068.869668,187489.836003,187885.735294,,,,173401.836622,175202.219533,180950.95682,186433.973881
2,191004.994787,181130.538514,,206101.665289,180183.746758,176938.047529,179956.799566,197965.773333,184495.492063,181169.405536,185763.807377,210051.764045,171483.956179,179803.679219,213732.900971,214432.460317,204691.87191,231633.510246,184034.896256,225230.44204,202688.478964,183632.6209,192789.657895,235413.720096,184694.690287,182021.195378,214914.42915,186825.113193,212116.023891,183429.147059,205723.488818,202892.656322,202068.869668,187489.836003,187885.735294,,,,173401.836622,175202.219533,180950.95682,186433.973881
3,191004.994787,181130.538514,,206101.665289,180183.746758,181623.425856,179956.799566,210624.72549,184495.492063,181169.405536,185763.807377,210051.764045,171483.956179,179803.679219,149841.645631,161328.947368,156221.891204,144341.313466,184034.896256,132291.075342,140759.818182,213599.907692,165652.295908,161573.068182,184694.690287,182021.195378,156858.871369,186825.113193,212116.023891,183429.147059,226351.415789,134091.162791,142156.42314,187489.836003,187885.735294,,,,173401.836622,146526.623762,180950.95682,186433.973881
4,191004.994787,181130.538514,,206101.665289,180183.746758,177934.574468,179956.799566,335295.317073,184495.492063,181169.405536,185763.807377,210051.764045,171483.956179,179803.679219,213732.900971,214432.460317,204691.87191,231633.510246,184034.896256,225230.44204,202688.478964,183632.6209,206643.420814,235413.720096,184694.690287,182021.195378,214914.42915,186825.113193,212116.023891,183429.147059,205723.488818,202892.656322,202068.869668,187489.836003,187885.735294,,,,173401.836622,175202.219533,180950.95682,186433.973881


In [17]:
df_train_encoding_target_drop = df_train.drop(target_encode_columns, axis = 1).reset_index().drop(columns = 'index', axis = 1)
df_target = pd.concat([df_train_encoding_target_drop, target_df], axis = 1)

In [18]:
df_target.head()

Unnamed: 0,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,TotRmsAbvGrd,Fireplaces,GarageYrBlt,GarageCars,GarageArea,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SalePrice,HasCentralAir,MSZoning,Street,Alley,LotShape,LandContour,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinType2,Heating,HeatingQC,Electrical,KitchenQual,Functional,FireplaceQu,GarageType,GarageFinish,GarageQual,GarageCond,PoolQC,Fence,MiscFeature,SaleType,SaleCondition,Utilities,PavedDrive
0,60,65.0,8450,7,5,2003,2003,196.0,706,0,150,856,856,854,0,1710,1,0,2,1,3,1,8,0,2003.0,2,548,0,61,0,0,0,0,0,2,2008,208500,1,191004.994787,181130.538514,,164754.818378,180183.746758,176938.047529,179956.799566,197965.773333,184495.492063,181169.405536,185763.807377,210051.764045,171483.956179,179803.679219,213732.900971,214432.460317,204691.87191,231633.510246,184034.896256,225230.44204,202688.478964,183632.6209,165652.295908,235413.720096,184694.690287,182021.195378,214914.42915,186825.113193,212116.023891,183429.147059,,202892.656322,202068.869668,187489.836003,187885.735294,,,,173401.836622,175202.219533,180950.95682,186433.973881
1,20,80.0,9600,6,8,1976,1976,0.0,978,0,284,1262,1262,0,0,1262,0,1,2,0,3,1,6,1,1976.0,2,460,298,0,0,0,0,0,0,5,2007,181500,1,191004.994787,181130.538514,,164754.818378,180183.746758,177934.574468,179956.799566,238772.727273,142475.481481,181169.405536,185763.807377,175985.477961,171483.956179,179803.679219,149422.177273,149803.172897,156221.891204,144341.313466,184034.896256,149805.714511,202688.478964,183632.6209,257689.80597,161573.068182,184694.690287,182021.195378,214914.42915,186825.113193,139962.511565,183429.147059,205723.488818,202892.656322,202068.869668,187489.836003,187885.735294,,,,173401.836622,175202.219533,180950.95682,186433.973881
2,60,68.0,11250,7,5,2001,2002,162.0,486,0,434,920,920,866,0,1786,1,0,2,1,3,1,6,1,2001.0,2,608,0,42,0,0,0,0,0,9,2008,223500,1,191004.994787,181130.538514,,206101.665289,180183.746758,176938.047529,179956.799566,197965.773333,184495.492063,181169.405536,185763.807377,210051.764045,171483.956179,179803.679219,213732.900971,214432.460317,204691.87191,231633.510246,184034.896256,225230.44204,202688.478964,183632.6209,192789.657895,235413.720096,184694.690287,182021.195378,214914.42915,186825.113193,212116.023891,183429.147059,205723.488818,202892.656322,202068.869668,187489.836003,187885.735294,,,,173401.836622,175202.219533,180950.95682,186433.973881
3,70,60.0,9550,7,5,1915,1970,0.0,216,0,540,756,961,756,0,1717,1,0,1,0,3,1,7,1,1998.0,3,642,0,35,272,0,0,0,0,2,2006,140000,1,191004.994787,181130.538514,,206101.665289,180183.746758,181623.425856,179956.799566,210624.72549,184495.492063,181169.405536,185763.807377,210051.764045,171483.956179,179803.679219,149841.645631,161328.947368,156221.891204,144341.313466,184034.896256,132291.075342,140759.818182,213599.907692,165652.295908,161573.068182,184694.690287,182021.195378,156858.871369,186825.113193,212116.023891,183429.147059,226351.415789,134091.162791,142156.42314,187489.836003,187885.735294,,,,173401.836622,146526.623762,180950.95682,186433.973881
4,60,84.0,14260,8,5,2000,2000,350.0,655,0,490,1145,1145,1053,0,2198,1,0,2,1,4,1,9,1,2000.0,3,836,192,84,0,0,0,0,0,12,2008,250000,1,191004.994787,181130.538514,,206101.665289,180183.746758,177934.574468,179956.799566,335295.317073,184495.492063,181169.405536,185763.807377,210051.764045,171483.956179,179803.679219,213732.900971,214432.460317,204691.87191,231633.510246,184034.896256,225230.44204,202688.478964,183632.6209,206643.420814,235413.720096,184694.690287,182021.195378,214914.42915,186825.113193,212116.023891,183429.147059,205723.488818,202892.656322,202068.869668,187489.836003,187885.735294,,,,173401.836622,175202.219533,180950.95682,186433.973881


In [19]:
Y= df_target['SalePrice']
df_target.drop('SalePrice',axis=1,inplace=True)
X= df_target

In [20]:
seed = 7
test_size = 0.33
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=seed)


In [28]:
clf = CatBoostRegressor()
params = {'iterations': [700],
          'depth': [2,4,6,10,20],
          'eval_metric': ['MAE'],
           'bagging_temperature': [0.2],
          'border_count':[10,50,100,500],
          'loss_function': ['RMSE'],
          #'learning_rate': [0.01, 0.03, 0.05],
          #'loss_function': ['RMSE', 'MultiRMSE'],          
          'l2_leaf_reg': np.logspace(-20, -19, 3),
          'leaf_estimation_iterations': [10],
          'metric_period': [100],
            'od_wait':[100],

#           'use_best_model': ['True'],
          #'logging_level':['Silent'],
          'random_seed': [42]
         }
scorer = make_scorer(mean_absolute_error)
clf_grid = GridSearchCV(estimator=clf, param_grid=params, scoring=scorer, cv=5)

In [None]:
clf_grid.fit(X_train, y_train)
best_param = clf_grid.best_params_
best_param

0:	learn: 55858.1832904	total: 1.21ms	remaining: 846ms
75:	learn: 23440.7883042	total: 62.7ms	remaining: 515ms
150:	learn: 18506.5362178	total: 108ms	remaining: 392ms
225:	learn: 16543.8221593	total: 153ms	remaining: 320ms
300:	learn: 15210.0031696	total: 200ms	remaining: 265ms
375:	learn: 14351.2747302	total: 258ms	remaining: 222ms
450:	learn: 13800.7298906	total: 310ms	remaining: 171ms
525:	learn: 13341.1922898	total: 363ms	remaining: 120ms
600:	learn: 12941.7340897	total: 423ms	remaining: 69.6ms
675:	learn: 12588.9971968	total: 476ms	remaining: 16.9ms
699:	learn: 12465.2299394	total: 491ms	remaining: 0us
0:	learn: 53995.1387035	total: 1.12ms	remaining: 785ms
75:	learn: 22125.1772560	total: 48.7ms	remaining: 400ms
150:	learn: 17165.4371095	total: 96.9ms	remaining: 352ms
225:	learn: 15579.0047789	total: 156ms	remaining: 327ms
300:	learn: 14582.2494399	total: 204ms	remaining: 270ms
375:	learn: 13951.8589356	total: 251ms	remaining: 216ms
450:	learn: 13410.6386992	total: 299ms	remaining:

525:	learn: 13601.1961670	total: 366ms	remaining: 121ms
600:	learn: 13236.0083682	total: 416ms	remaining: 68.6ms
675:	learn: 12863.8469560	total: 469ms	remaining: 16.6ms
699:	learn: 12755.8737292	total: 485ms	remaining: 0us
0:	learn: 52182.9407514	total: 876us	remaining: 613ms
75:	learn: 22478.7216130	total: 54.6ms	remaining: 448ms
150:	learn: 17807.3390403	total: 113ms	remaining: 411ms
225:	learn: 16023.1679101	total: 165ms	remaining: 347ms
300:	learn: 14867.3351351	total: 214ms	remaining: 284ms
375:	learn: 14115.9573570	total: 267ms	remaining: 230ms
450:	learn: 13633.1194605	total: 317ms	remaining: 175ms
525:	learn: 13207.5473789	total: 366ms	remaining: 121ms
600:	learn: 12819.2982256	total: 415ms	remaining: 68.3ms
675:	learn: 12440.6181291	total: 466ms	remaining: 16.6ms
699:	learn: 12306.8930744	total: 485ms	remaining: 0us
0:	learn: 55803.6206638	total: 1.07ms	remaining: 749ms
75:	learn: 19033.6207717	total: 79ms	remaining: 649ms
150:	learn: 14457.0258418	total: 157ms	remaining: 572

225:	learn: 12945.0914356	total: 249ms	remaining: 523ms
300:	learn: 11539.2520075	total: 329ms	remaining: 435ms
375:	learn: 10435.4805854	total: 408ms	remaining: 351ms
450:	learn: 9535.7466481	total: 492ms	remaining: 272ms
525:	learn: 8759.4852059	total: 571ms	remaining: 189ms
600:	learn: 8034.5389246	total: 653ms	remaining: 107ms
675:	learn: 7440.2712430	total: 738ms	remaining: 26.2ms
699:	learn: 7293.1254702	total: 764ms	remaining: 0us
0:	learn: 55756.7175158	total: 1.6ms	remaining: 1.12s
75:	learn: 19588.3130026	total: 83.6ms	remaining: 687ms
150:	learn: 14805.6132072	total: 176ms	remaining: 641ms
225:	learn: 12936.4218162	total: 251ms	remaining: 527ms
300:	learn: 11640.7597252	total: 327ms	remaining: 434ms
375:	learn: 10589.4550747	total: 407ms	remaining: 351ms
450:	learn: 9641.9804083	total: 484ms	remaining: 267ms
525:	learn: 8890.4562521	total: 564ms	remaining: 186ms
600:	learn: 8259.3331546	total: 647ms	remaining: 107ms
675:	learn: 7703.1559989	total: 726ms	remaining: 25.8ms
699

675:	learn: 3492.8445118	total: 1.14s	remaining: 40.4ms
699:	learn: 3333.7043614	total: 1.19s	remaining: 0us
0:	learn: 53874.6320522	total: 5.39ms	remaining: 3.77s
75:	learn: 15925.4728227	total: 136ms	remaining: 1.11s
150:	learn: 11248.4866456	total: 245ms	remaining: 890ms
225:	learn: 9225.7794298	total: 360ms	remaining: 755ms
300:	learn: 7642.4980637	total: 477ms	remaining: 633ms
375:	learn: 6304.4457035	total: 601ms	remaining: 517ms
450:	learn: 5367.2241265	total: 720ms	remaining: 397ms
525:	learn: 4626.9694049	total: 844ms	remaining: 279ms
600:	learn: 4051.5362747	total: 969ms	remaining: 160ms
675:	learn: 3550.0156894	total: 1.09s	remaining: 38.8ms
699:	learn: 3396.8539699	total: 1.13s	remaining: 0us
0:	learn: 55836.9593928	total: 1.54ms	remaining: 1.08s
75:	learn: 17074.7297602	total: 124ms	remaining: 1.01s
150:	learn: 11942.4018806	total: 260ms	remaining: 945ms
225:	learn: 9795.8842226	total: 409ms	remaining: 859ms
300:	learn: 8027.3457557	total: 558ms	remaining: 739ms
375:	learn

450:	learn: 768.2080945	total: 2.15s	remaining: 1.19s
525:	learn: 481.0590630	total: 2.51s	remaining: 830ms
600:	learn: 307.0626042	total: 2.9s	remaining: 478ms
675:	learn: 196.5645753	total: 3.27s	remaining: 116ms
699:	learn: 171.7638912	total: 3.4s	remaining: 0us
0:	learn: 55566.0932933	total: 10ms	remaining: 7s
75:	learn: 12453.9299661	total: 417ms	remaining: 3.42s
150:	learn: 6328.5359931	total: 782ms	remaining: 2.84s
225:	learn: 3805.3131978	total: 1.16s	remaining: 2.44s
300:	learn: 2290.8684860	total: 1.53s	remaining: 2.03s
375:	learn: 1311.3722704	total: 1.93s	remaining: 1.66s
450:	learn: 769.9700282	total: 2.33s	remaining: 1.29s
525:	learn: 465.1889985	total: 2.73s	remaining: 905ms
600:	learn: 280.0506164	total: 3.13s	remaining: 515ms
675:	learn: 172.8700569	total: 3.5s	remaining: 124ms
699:	learn: 149.6686440	total: 3.62s	remaining: 0us
0:	learn: 53682.5984352	total: 6.5ms	remaining: 4.54s
75:	learn: 11722.7317811	total: 397ms	remaining: 3.26s
150:	learn: 5924.9239509	total: 7

Traceback (most recent call last):
  File "/Users/facu/Downloads/test_virtualenv/house_prices/lib/python3.7/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/facu/Downloads/test_virtualenv/house_prices/lib/python3.7/site-packages/catboost/core.py", line 4850, in fit
    save_snapshot, snapshot_file, snapshot_interval, init_model)
  File "/Users/facu/Downloads/test_virtualenv/house_prices/lib/python3.7/site-packages/catboost/core.py", line 1797, in _fit
    save_snapshot, snapshot_file, snapshot_interval, init_model
  File "/Users/facu/Downloads/test_virtualenv/house_prices/lib/python3.7/site-packages/catboost/core.py", line 1724, in _prepare_train_params
    _check_train_params(params)
  File "_catboost.pyx", line 5521, in _catboost._check_train_params
  File "_catboost.pyx", line 5540, in _catboost._check_train_params
_catboost.CatBoostError: catboost/private/libs/options/oblivious_tree_o

0:	learn: 55858.1832904	total: 886us	remaining: 620ms
75:	learn: 23171.8422003	total: 59.8ms	remaining: 491ms
150:	learn: 17974.8003271	total: 122ms	remaining: 442ms
225:	learn: 15883.4689011	total: 178ms	remaining: 372ms
300:	learn: 14511.3803274	total: 252ms	remaining: 333ms
375:	learn: 13660.3487579	total: 306ms	remaining: 264ms
450:	learn: 13054.5418090	total: 361ms	remaining: 200ms
525:	learn: 12581.5548589	total: 415ms	remaining: 137ms
600:	learn: 12176.3264308	total: 475ms	remaining: 78.3ms
675:	learn: 11840.0333110	total: 530ms	remaining: 18.8ms
699:	learn: 11709.3831666	total: 548ms	remaining: 0us
0:	learn: 54017.4873184	total: 820us	remaining: 574ms
75:	learn: 21976.5660990	total: 57.7ms	remaining: 474ms
150:	learn: 16887.5834778	total: 118ms	remaining: 430ms
225:	learn: 14846.1516553	total: 181ms	remaining: 380ms
300:	learn: 13745.3053790	total: 243ms	remaining: 322ms
375:	learn: 13016.6273898	total: 306ms	remaining: 263ms
450:	learn: 12493.0771329	total: 369ms	remaining: 20

525:	learn: 12591.4809664	total: 445ms	remaining: 147ms
600:	learn: 12150.4281580	total: 502ms	remaining: 82.7ms
675:	learn: 11734.8993932	total: 554ms	remaining: 19.7ms
699:	learn: 11622.1799488	total: 572ms	remaining: 0us
0:	learn: 52182.9407514	total: 1.1ms	remaining: 771ms
75:	learn: 22336.3163568	total: 59.2ms	remaining: 486ms
150:	learn: 17533.6197286	total: 117ms	remaining: 424ms
225:	learn: 15517.8475010	total: 198ms	remaining: 415ms
300:	learn: 14214.0875330	total: 320ms	remaining: 424ms
375:	learn: 13441.2520453	total: 380ms	remaining: 328ms
450:	learn: 12813.0003210	total: 438ms	remaining: 242ms
525:	learn: 12337.4707180	total: 495ms	remaining: 164ms
600:	learn: 11880.2159720	total: 556ms	remaining: 91.6ms
675:	learn: 11467.5575556	total: 611ms	remaining: 21.7ms
699:	learn: 11339.7409514	total: 628ms	remaining: 0us
0:	learn: 55803.6206638	total: 1.23ms	remaining: 860ms
75:	learn: 19309.1432091	total: 84.1ms	remaining: 691ms
150:	learn: 14324.8555434	total: 180ms	remaining: 6

150:	learn: 14480.1735349	total: 172ms	remaining: 624ms
225:	learn: 12365.8975073	total: 256ms	remaining: 538ms
300:	learn: 10990.5537084	total: 341ms	remaining: 452ms
375:	learn: 9897.8684714	total: 429ms	remaining: 370ms
450:	learn: 8935.8250708	total: 513ms	remaining: 283ms
525:	learn: 8168.2298899	total: 598ms	remaining: 198ms
600:	learn: 7546.1500214	total: 690ms	remaining: 114ms
675:	learn: 6992.8948637	total: 775ms	remaining: 27.5ms
699:	learn: 6820.6192030	total: 802ms	remaining: 0us
0:	learn: 55756.7175158	total: 1.11ms	remaining: 778ms
75:	learn: 19185.2020568	total: 90.2ms	remaining: 740ms
150:	learn: 14245.5174235	total: 176ms	remaining: 638ms
225:	learn: 12305.3299638	total: 259ms	remaining: 544ms
300:	learn: 10951.1846289	total: 348ms	remaining: 461ms
375:	learn: 9880.8329934	total: 432ms	remaining: 372ms
450:	learn: 8922.5539730	total: 517ms	remaining: 285ms
525:	learn: 8173.5056130	total: 642ms	remaining: 212ms
600:	learn: 7554.2301894	total: 736ms	remaining: 121ms
675:

675:	learn: 3476.6633225	total: 1.31s	remaining: 46.7ms
699:	learn: 3342.6044883	total: 1.36s	remaining: 0us
0:	learn: 53803.3037970	total: 1.83ms	remaining: 1.28s
75:	learn: 15865.1076896	total: 137ms	remaining: 1.12s
150:	learn: 11024.9827287	total: 272ms	remaining: 988ms
225:	learn: 9011.1056976	total: 422ms	remaining: 885ms
300:	learn: 7455.9044143	total: 561ms	remaining: 744ms
375:	learn: 6162.5793330	total: 687ms	remaining: 592ms
450:	learn: 5239.6805142	total: 808ms	remaining: 446ms
525:	learn: 4488.7957433	total: 942ms	remaining: 311ms
600:	learn: 3889.3461005	total: 1.07s	remaining: 177ms
675:	learn: 3405.1600693	total: 1.21s	remaining: 42.9ms
699:	learn: 3262.0176042	total: 1.25s	remaining: 0us
0:	learn: 55874.6024703	total: 2.65ms	remaining: 1.85s
75:	learn: 17086.6915574	total: 138ms	remaining: 1.13s
150:	learn: 11713.5951232	total: 267ms	remaining: 972ms
225:	learn: 9426.6516928	total: 400ms	remaining: 838ms
300:	learn: 7796.2937140	total: 529ms	remaining: 701ms
375:	learn

450:	learn: 804.1339795	total: 4.12s	remaining: 2.27s
525:	learn: 497.7571235	total: 4.83s	remaining: 1.6s
600:	learn: 308.9766923	total: 5.54s	remaining: 912ms
675:	learn: 206.5145660	total: 6.33s	remaining: 225ms
699:	learn: 181.5084426	total: 6.57s	remaining: 0us
0:	learn: 55571.2093599	total: 13.2ms	remaining: 9.25s
75:	learn: 12496.3284947	total: 788ms	remaining: 6.47s
150:	learn: 6368.9580134	total: 1.55s	remaining: 5.64s
225:	learn: 4029.4047885	total: 2.3s	remaining: 4.83s
300:	learn: 2404.6077485	total: 3.1s	remaining: 4.1s
375:	learn: 1471.6175766	total: 3.89s	remaining: 3.35s
450:	learn: 946.3070521	total: 4.9s	remaining: 2.71s
525:	learn: 581.8022285	total: 5.7s	remaining: 1.89s
600:	learn: 377.6591020	total: 6.42s	remaining: 1.06s
675:	learn: 247.9932906	total: 7.17s	remaining: 255ms
699:	learn: 212.6597984	total: 7.41s	remaining: 0us
0:	learn: 53699.5499369	total: 12.4ms	remaining: 8.65s
75:	learn: 12085.3828191	total: 780ms	remaining: 6.4s
150:	learn: 6177.7529546	total:

Traceback (most recent call last):
  File "/Users/facu/Downloads/test_virtualenv/house_prices/lib/python3.7/site-packages/sklearn/model_selection/_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Users/facu/Downloads/test_virtualenv/house_prices/lib/python3.7/site-packages/catboost/core.py", line 4850, in fit
    save_snapshot, snapshot_file, snapshot_interval, init_model)
  File "/Users/facu/Downloads/test_virtualenv/house_prices/lib/python3.7/site-packages/catboost/core.py", line 1797, in _fit
    save_snapshot, snapshot_file, snapshot_interval, init_model
  File "/Users/facu/Downloads/test_virtualenv/house_prices/lib/python3.7/site-packages/catboost/core.py", line 1724, in _prepare_train_params
    _check_train_params(params)
  File "_catboost.pyx", line 5521, in _catboost._check_train_params
  File "_catboost.pyx", line 5540, in _catboost._check_train_params
_catboost.CatBoostError: catboost/private/libs/options/oblivious_tree_o

0:	learn: 55858.1832904	total: 1.07ms	remaining: 748ms
75:	learn: 23228.7144605	total: 67ms	remaining: 550ms
150:	learn: 17943.2400748	total: 135ms	remaining: 492ms
225:	learn: 15883.0363841	total: 202ms	remaining: 423ms
300:	learn: 14507.2380986	total: 285ms	remaining: 377ms
375:	learn: 13599.1324116	total: 367ms	remaining: 316ms
450:	learn: 13000.9209676	total: 447ms	remaining: 247ms
525:	learn: 12458.6312473	total: 512ms	remaining: 169ms
600:	learn: 11981.7550868	total: 576ms	remaining: 94.9ms
675:	learn: 11614.9456075	total: 642ms	remaining: 22.8ms
699:	learn: 11507.8845495	total: 665ms	remaining: 0us
0:	learn: 54017.4873184	total: 918us	remaining: 642ms
75:	learn: 22069.1766688	total: 68.2ms	remaining: 560ms
150:	learn: 17073.2935575	total: 134ms	remaining: 487ms
225:	learn: 14956.1783451	total: 203ms	remaining: 425ms
300:	learn: 13809.0521398	total: 268ms	remaining: 356ms
375:	learn: 13020.7127950	total: 334ms	remaining: 288ms
450:	learn: 12326.5999136	total: 402ms	remaining: 222

300:	learn: 14698.3018031	total: 276ms	remaining: 366ms
375:	learn: 13871.1375924	total: 337ms	remaining: 290ms
450:	learn: 13291.7447882	total: 395ms	remaining: 218ms
525:	learn: 12773.9974462	total: 453ms	remaining: 150ms
600:	learn: 12312.4187213	total: 513ms	remaining: 84.6ms
675:	learn: 11920.6379629	total: 574ms	remaining: 20.4ms
699:	learn: 11792.6188009	total: 594ms	remaining: 0us
0:	learn: 52182.9407514	total: 1.32ms	remaining: 927ms
75:	learn: 22403.5336733	total: 63.2ms	remaining: 519ms
150:	learn: 17470.3850925	total: 124ms	remaining: 451ms
225:	learn: 15593.6638944	total: 181ms	remaining: 379ms
300:	learn: 14271.8893056	total: 242ms	remaining: 321ms
375:	learn: 13433.0799923	total: 300ms	remaining: 259ms
450:	learn: 12844.0309332	total: 363ms	remaining: 200ms
525:	learn: 12283.3465539	total: 421ms	remaining: 139ms
600:	learn: 11817.9508799	total: 475ms	remaining: 78.2ms
675:	learn: 11418.7169456	total: 533ms	remaining: 18.9ms
699:	learn: 11283.8581200	total: 552ms	remainin

75:	learn: 19278.9326183	total: 95.8ms	remaining: 786ms
150:	learn: 14490.1349510	total: 188ms	remaining: 683ms
225:	learn: 12340.4136598	total: 286ms	remaining: 599ms
300:	learn: 10847.4495203	total: 377ms	remaining: 500ms
375:	learn: 9652.1318568	total: 481ms	remaining: 414ms
450:	learn: 8768.4032127	total: 575ms	remaining: 318ms
525:	learn: 8047.5278110	total: 664ms	remaining: 220ms
600:	learn: 7422.6136495	total: 758ms	remaining: 125ms
675:	learn: 6887.6998076	total: 849ms	remaining: 30.1ms
699:	learn: 6714.1111016	total: 878ms	remaining: 0us
0:	learn: 55756.7175158	total: 2.09ms	remaining: 1.46s
75:	learn: 19079.0343901	total: 101ms	remaining: 830ms
150:	learn: 14200.9608231	total: 194ms	remaining: 706ms
225:	learn: 12267.7714565	total: 297ms	remaining: 623ms
300:	learn: 10937.6761266	total: 391ms	remaining: 518ms
375:	learn: 9850.8839258	total: 484ms	remaining: 417ms
450:	learn: 8921.7945053	total: 579ms	remaining: 320ms
525:	learn: 8159.8756800	total: 675ms	remaining: 223ms
600:

675:	learn: 3592.6420720	total: 1.37s	remaining: 48.6ms
699:	learn: 3420.0643033	total: 1.41s	remaining: 0us
0:	learn: 53757.6006898	total: 2.27ms	remaining: 1.58s
75:	learn: 15940.1749953	total: 152ms	remaining: 1.25s
150:	learn: 10891.4085869	total: 295ms	remaining: 1.07s
225:	learn: 9015.2465771	total: 433ms	remaining: 909ms
300:	learn: 7496.7908420	total: 569ms	remaining: 755ms
375:	learn: 6260.3134624	total: 708ms	remaining: 610ms
450:	learn: 5306.8581078	total: 851ms	remaining: 470ms
525:	learn: 4559.4731381	total: 992ms	remaining: 328ms
600:	learn: 3915.0188301	total: 1.13s	remaining: 187ms
675:	learn: 3403.9259521	total: 1.27s	remaining: 45.2ms
699:	learn: 3254.5324687	total: 1.32s	remaining: 0us
0:	learn: 55874.6024703	total: 1.86ms	remaining: 1.3s
75:	learn: 16772.3850734	total: 136ms	remaining: 1.12s
150:	learn: 11545.2591517	total: 280ms	remaining: 1.02s
225:	learn: 9412.3623556	total: 420ms	remaining: 880ms
300:	learn: 7783.4666068	total: 552ms	remaining: 732ms
375:	learn:

450:	learn: 844.7983146	total: 6.98s	remaining: 3.85s
525:	learn: 529.9110712	total: 8.03s	remaining: 2.65s
600:	learn: 330.8713525	total: 9.07s	remaining: 1.49s
675:	learn: 216.2826759	total: 10.2s	remaining: 362ms
699:	learn: 188.0018214	total: 10.6s	remaining: 0us
0:	learn: 55571.2093599	total: 19ms	remaining: 13.3s
75:	learn: 12584.2284952	total: 1.14s	remaining: 9.38s
150:	learn: 6452.9906173	total: 2.27s	remaining: 8.27s
225:	learn: 4057.2843779	total: 3.41s	remaining: 7.16s
300:	learn: 2518.6626701	total: 4.53s	remaining: 6.01s
375:	learn: 1537.2146986	total: 5.67s	remaining: 4.89s
450:	learn: 945.3814331	total: 6.88s	remaining: 3.8s
525:	learn: 596.1568522	total: 8.05s	remaining: 2.66s
600:	learn: 382.7129845	total: 9.18s	remaining: 1.51s
675:	learn: 245.5182455	total: 10.3s	remaining: 365ms
699:	learn: 216.0612044	total: 10.6s	remaining: 0us
0:	learn: 53683.1529772	total: 19.8ms	remaining: 13.8s
75:	learn: 12179.5752429	total: 1.1s	remaining: 9.03s
150:	learn: 6276.5225285	tot

In [None]:
STOP

In [None]:
# Train Model
print("Train CatBoost Decision Tree")
modelstart= time.time()
cb_model = CatBoostRegressor(**best_params)
cb_model.fit(X_train, y_train,
             eval_set=(X_test,y_test),
             #use_best_model=True,
             verbose=True)

# # Feature Importance
# fea_imp = pd.DataFrame({'imp': cb_model.feature_importances_, 'col': X.columns})
# fea_imp = fea_imp.sort_values(['imp', 'col'], ascending=[True, False]).iloc[-30:]
# _ = fea_imp.plot(kind='barh', x='col', y='imp', figsize=(20, 10))
# plt.savefig('catboost_feature_importance.png')   

print("Model Evaluation Stage")
print(cb_model.get_params())


In [None]:
# Train Model
print("Train CatBoost Decision Tree")
modelstart= time.time()
cb_model = CatBoostRegressor(iterations=700,
                             learning_rate=0.02,
                             depth=12,
                             eval_metric='MAE',
                             random_seed = 23,
                             bagging_temperature = 0.2,
                             od_type='Iter',
                             metric_period = 75,
                             od_wait=100)
cb_model.fit(X_train, y_train,
             eval_set=(X_test,y_test),
             use_best_model=True,
             verbose=True)

# # Feature Importance
# fea_imp = pd.DataFrame({'imp': cb_model.feature_importances_, 'col': X.columns})
# fea_imp = fea_imp.sort_values(['imp', 'col'], ascending=[True, False]).iloc[-30:]
# _ = fea_imp.plot(kind='barh', x='col', y='imp', figsize=(20, 10))
# plt.savefig('catboost_feature_importance.png')   

print("Model Evaluation Stage")
print(cb_model.get_params())


In [None]:
sklearn.metrics.mean_absolute_error(best_model.predict(X_test), y_test)

In [None]:
print('RMSE:', np.sqrt(mean_squared_error(y_test, cb_model.predict(X_test))))
catpred = cb_model.predict(X_test)
catsub = pd.DataFrame(catpred,columns=["deal_probability"],index=testdex)
catsub['deal_probability'].clip(0.0, 1.0, inplace=True)
catsub.to_csv("catsub.csv",index=True,header=True) # Between 0 and 1
print("Model Runtime: %0.2f Minutes"%((time.time() - modelstart)/60))
print("Notebook Runtime: %0.2f Minutes"%((time.time() - notebookstart)/60))