## Trained on Google Colab using Google GPU

In [1]:
import tensorflow as tf
tf.test.gpu_device_name()

'/device:GPU:0'

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
!pip install xgboost



In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV
import xgboost
import pickle

In [6]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [7]:
preprocessed_train_path = '/content/drive/My Drive/Colab Notebooks/AHPP/preprocessed_train.csv'
preprocessed_test_path = '/content/drive/My Drive/Colab Notebooks/AHPP/preprocessed_test.csv'

In [8]:
df_train = pd.read_csv(preprocessed_train_path)
df_test = pd.read_csv(preprocessed_test_path)

In [9]:
print(df_train.shape)
print(df_test.shape)

(1456, 80)
(1459, 79)


In [10]:
df_train.head(10)

Unnamed: 0,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,TotRmsAbvGrd,Fireplaces,GarageYrBlt,GarageCars,GarageArea,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,MSZoning_me,Street_me,Alley_me,LotShape_me,LandContour_me,Utilities_me,LotConfig_me,LandSlope_me,Neighborhood_me,Condition1_me,Condition2_me,BldgType_me,HouseStyle_me,RoofStyle_me,RoofMatl_me,Exterior1st_me,Exterior2nd_me,MasVnrType_me,ExterQual_me,ExterCond_me,Foundation_me,BsmtQual_me,BsmtCond_me,BsmtExposure_me,BsmtFinType1_me,BsmtFinType2_me,Heating_me,HeatingQC_me,CentralAir_me,Electrical_me,KitchenQual_me,Functional_me,FireplaceQu_me,GarageType_me,GarageFinish_me,GarageQual_me,GarageCond_me,PavedDrive_me,PoolQC_me,Fence_me,MiscFeature_me,SaleType_me,SaleCondition_me,SalePrice
0,60,65.0,8450,7,5,2003,2003,196.0,706.0,0.0,150.0,856.0,856,854,0,1710,1.0,0.0,2,1,3,1,8,0,2003.0,2.0,548.0,0,61,0,0,0,0,0,2,2008,12.083,12.023,12.035,11.936,12.02,12.022,12.002,12.018,12.164,12.041,12.024,12.045,12.177,11.983,12.018,12.206,12.21,12.161,12.309,12.041,12.258,12.18,12.04,11.956,12.297,12.041,12.03,12.204,12.059,12.059,12.222,12.036,11.809,12.158,12.173,12.066,12.068,12.058,12.021,12.057,12.028,11.989,12.004,12.247699
1,20,80.0,9600,6,8,1976,1976,0.0,978.0,0.0,284.0,1262.0,1262,0,0,1262,0.0,1.0,2,0,3,1,6,1,1976.0,2.0,460.0,298,0,0,0,0,0,0,5,2007,12.083,12.023,12.035,11.936,12.02,12.022,12.035,12.018,12.344,11.816,12.024,12.045,11.994,11.983,12.018,11.86,11.862,11.895,11.838,12.041,11.87,12.18,12.04,12.36,11.953,12.041,12.03,12.204,12.059,12.059,11.811,12.036,12.182,12.158,12.173,12.066,12.068,12.058,12.021,12.057,12.028,11.989,12.004,12.109016
2,60,68.0,11250,7,5,2001,2002,162.0,486.0,0.0,434.0,920.0,920,866,0,1786,1.0,0.0,2,1,3,1,6,1,2001.0,2.0,608.0,0,42,0,0,0,0,0,9,2008,12.083,12.023,12.035,12.158,12.02,12.022,12.002,12.018,12.164,12.041,12.024,12.045,12.177,11.983,12.018,12.206,12.21,12.161,12.309,12.041,12.258,12.18,12.04,12.095,12.297,12.041,12.03,12.204,12.059,12.059,12.222,12.036,12.182,12.158,12.173,12.066,12.068,12.058,12.021,12.057,12.028,11.989,12.004,12.317171
3,70,60.0,9550,7,5,1915,1970,0.0,216.0,0.0,540.0,756.0,961,756,0,1717,1.0,0.0,1,0,3,1,7,1,1998.0,3.0,642.0,0,35,272,0,0,0,0,2,2006,12.083,12.023,12.035,12.158,12.02,12.022,12.016,12.018,12.207,12.041,12.024,12.045,12.177,11.983,12.018,11.83,11.892,11.895,11.838,12.041,11.723,11.811,12.22,11.956,11.953,12.041,12.03,11.91,12.059,12.059,12.222,12.036,12.258,11.766,11.819,12.066,12.068,12.058,12.021,12.057,12.028,11.989,11.771,11.849405
4,60,84.0,14260,8,5,2000,2000,350.0,655.0,0.0,490.0,1145.0,1145,1053,0,2198,1.0,0.0,2,1,4,1,9,1,2000.0,3.0,836.0,192,84,0,0,0,0,0,12,2008,12.083,12.023,12.035,12.158,12.02,12.022,12.035,12.018,12.632,12.041,12.024,12.045,12.177,11.983,12.018,12.206,12.21,12.161,12.309,12.041,12.258,12.18,12.04,12.152,12.297,12.041,12.03,12.204,12.059,12.059,12.222,12.036,12.182,12.158,12.173,12.066,12.068,12.058,12.021,12.057,12.028,11.989,12.004,12.42922
5,50,85.0,14115,5,5,1993,1995,0.0,732.0,0.0,64.0,796.0,796,566,0,1362,1.0,0.0,1,1,1,1,5,0,1993.0,2.0,480.0,40,30,0,320,0,0,700,10,2009,12.083,12.023,12.035,12.158,12.02,12.022,12.002,12.018,11.934,12.041,12.024,12.045,11.814,11.983,12.018,12.206,12.21,11.895,11.838,12.041,12.102,12.18,12.04,11.956,12.297,12.041,12.03,12.204,12.059,12.059,11.811,12.036,11.809,12.158,11.819,12.066,12.068,12.058,12.021,11.84,11.868,11.989,12.004,11.870607
6,20,75.0,10084,8,5,2004,2005,186.0,1369.0,0.0,317.0,1686.0,1694,0,0,1694,1.0,0.0,2,0,3,1,7,1,2004.0,2.0,636.0,255,57,0,0,0,0,0,8,2007,12.083,12.023,12.035,11.936,12.02,12.022,12.002,12.018,12.297,12.041,12.024,12.045,11.994,11.983,12.018,12.206,12.21,12.431,12.309,12.041,12.258,12.635,12.04,12.152,12.297,12.041,12.03,12.204,12.059,12.059,12.222,12.036,12.258,12.158,12.173,12.066,12.068,12.058,12.021,12.057,12.028,11.989,12.004,12.634606
7,60,69.0,10382,7,6,1973,1973,240.0,859.0,32.0,216.0,1107.0,1107,983,0,2090,1.0,0.0,2,1,3,1,7,2,1973.0,2.0,484.0,235,204,228,0,0,0,350,11,2009,12.083,12.023,12.035,12.158,12.02,12.022,12.016,12.018,12.131,12.244,12.024,12.045,12.177,11.983,12.018,11.945,11.967,12.431,11.838,12.041,11.87,12.18,12.04,12.095,11.953,11.888,12.03,12.204,12.059,12.059,11.811,12.036,12.182,12.158,12.173,12.066,12.068,12.058,12.021,12.057,11.868,11.989,12.004,12.206078
8,50,51.0,6120,7,5,1931,1950,0.0,0.0,0.0,952.0,952.0,1022,752,0,1774,0.0,0.0,2,0,2,2,8,2,1931.0,2.0,468.0,90,0,205,0,0,0,0,4,2008,11.693,12.023,12.035,11.936,12.02,12.022,12.002,12.018,11.704,11.742,12.024,12.045,11.814,11.983,12.018,12.088,11.892,11.895,11.838,12.041,11.723,11.811,12.04,11.956,11.964,12.041,12.03,11.91,12.059,11.54,11.811,11.862,12.182,11.766,11.819,11.675,12.068,12.058,12.021,12.057,12.028,11.989,11.771,11.774528
9,190,50.0,7420,5,6,1939,1950,0.0,851.0,0.0,140.0,991.0,1077,0,0,1077,1.0,0.0,1,0,2,2,5,2,1939.0,1.0,205.0,0,4,0,0,0,0,0,1,2008,12.083,12.023,12.035,11.936,12.02,12.022,12.016,12.018,11.68,11.742,11.57,11.725,11.595,11.983,12.018,11.86,11.862,11.895,11.838,12.041,11.723,11.811,12.04,11.956,12.297,12.041,12.03,12.204,12.059,12.059,11.811,12.036,12.182,12.158,12.173,12.22,12.068,12.058,12.021,12.057,12.028,11.989,12.004,11.678448


In [11]:
df_test.head(10)

Unnamed: 0,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,TotRmsAbvGrd,Fireplaces,GarageYrBlt,GarageCars,GarageArea,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,MSZoning_me,Street_me,Alley_me,LotShape_me,LandContour_me,Utilities_me,LotConfig_me,LandSlope_me,Neighborhood_me,Condition1_me,Condition2_me,BldgType_me,HouseStyle_me,RoofStyle_me,RoofMatl_me,Exterior1st_me,Exterior2nd_me,MasVnrType_me,ExterQual_me,ExterCond_me,Foundation_me,BsmtQual_me,BsmtCond_me,BsmtExposure_me,BsmtFinType1_me,BsmtFinType2_me,Heating_me,HeatingQC_me,CentralAir_me,Electrical_me,KitchenQual_me,Functional_me,FireplaceQu_me,GarageType_me,GarageFinish_me,GarageQual_me,GarageCond_me,PavedDrive_me,PoolQC_me,Fence_me,MiscFeature_me,SaleType_me,SaleCondition_me
0,20,80.0,11622,5,6,1961,1961,0.0,468.0,144.0,270.0,882.0,896,0,0,896,0.0,0.0,1,0,2,1,5,0,1961.0,1.0,730.0,140,0,0,0,120,0,0,6,2010,11.75,12.023,12.035,11.936,12.02,12.022,12.002,12.018,11.868,11.816,12.024,12.045,11.994,11.983,12.018,12.206,12.21,11.895,11.838,12.041,11.87,11.811,12.04,11.956,11.853,11.978,12.03,11.816,12.059,12.059,11.811,12.036,11.809,12.158,11.819,12.066,12.068,12.058,12.021,11.84,12.028,11.989,12.004
1,20,81.0,14267,6,6,1958,1958,108.0,923.0,0.0,406.0,1329.0,1329,0,0,1329,0.0,0.0,1,1,3,1,6,0,1958.0,1.0,312.0,393,36,0,0,0,0,12500,6,2010,12.083,12.023,12.035,12.158,12.02,12.022,12.016,12.018,11.868,12.041,12.024,12.045,11.994,12.181,12.018,11.83,11.838,12.161,11.838,12.041,11.87,11.811,12.04,11.956,11.953,12.041,12.03,11.816,12.059,12.059,12.222,12.036,11.809,12.158,11.819,12.066,12.068,12.058,12.021,12.057,12.042,11.989,12.004
2,60,74.0,13830,5,5,1997,1998,0.0,791.0,0.0,137.0,928.0,928,701,0,1629,0.0,0.0,2,1,3,1,6,1,1997.0,2.0,482.0,212,34,0,0,0,0,0,3,2010,12.083,12.023,12.035,12.158,12.02,12.022,12.002,12.018,12.156,12.041,12.024,12.045,12.177,11.983,12.018,12.206,12.21,11.895,11.838,12.041,12.258,12.18,12.04,11.956,12.297,12.041,12.03,11.91,12.059,12.059,11.811,12.036,12.182,12.158,12.315,12.066,12.068,12.058,12.021,11.84,12.028,11.989,12.004
3,60,78.0,9978,6,6,1998,1998,20.0,602.0,0.0,324.0,926.0,926,678,0,1604,0.0,0.0,2,1,3,1,7,1,1998.0,2.0,470.0,360,36,0,0,0,0,0,6,2010,12.083,12.023,12.035,12.158,12.02,12.022,12.002,12.018,12.156,12.041,12.024,12.045,12.177,11.983,12.018,12.206,12.21,12.161,11.838,12.041,12.258,11.811,12.04,11.956,12.297,12.041,12.03,12.204,12.059,12.059,12.222,12.036,12.258,12.158,12.315,12.066,12.068,12.058,12.021,12.057,12.028,11.989,12.004
4,120,43.0,5005,8,5,1992,1992,0.0,263.0,0.0,1017.0,1280.0,1280,0,0,1280,0.0,0.0,2,0,2,1,5,0,1992.0,2.0,506.0,0,82,0,0,144,0,0,1,2010,12.083,12.023,12.035,12.158,12.259,12.022,12.002,12.018,12.585,12.041,12.024,12.059,11.994,11.983,12.018,11.945,11.967,11.895,12.309,12.041,12.258,12.18,12.04,11.956,11.953,12.041,12.03,12.204,12.059,12.059,12.222,12.036,11.809,12.158,12.173,12.066,12.068,12.058,12.021,12.057,12.028,11.989,12.004
5,60,75.0,10000,6,5,1993,1994,0.0,0.0,0.0,763.0,763.0,763,892,0,1655,0.0,0.0,2,1,3,1,7,1,1993.0,2.0,440.0,157,84,0,0,0,0,0,4,2010,12.083,12.023,12.035,12.158,12.02,12.022,12.016,12.018,12.156,12.041,12.024,12.045,12.177,11.983,12.018,11.945,11.967,11.895,11.838,12.041,12.258,12.18,12.04,11.956,11.964,12.041,12.03,11.91,12.059,12.059,11.811,12.036,12.182,12.158,12.315,12.066,12.068,12.058,12.021,12.057,12.028,11.989,12.004
6,20,67.0,7980,6,7,1992,2007,0.0,935.0,0.0,233.0,1168.0,1187,0,0,1187,1.0,0.0,2,0,3,1,6,0,1992.0,2.0,420.0,483,21,0,0,0,0,500,3,2010,12.083,12.023,12.035,12.158,12.02,12.022,12.002,12.018,12.156,12.041,12.024,12.045,11.994,11.983,12.018,11.945,11.967,11.895,11.838,11.969,12.258,12.18,12.04,11.956,11.953,12.041,12.03,12.204,12.059,12.059,11.811,12.036,11.809,12.158,12.315,12.066,12.068,12.058,12.021,12.057,11.868,11.989,12.004
7,60,63.0,8402,6,5,1998,1998,0.0,0.0,0.0,789.0,789.0,789,676,0,1465,0.0,0.0,2,1,3,1,7,1,1998.0,2.0,393.0,0,75,0,0,0,0,0,5,2010,12.083,12.023,12.035,12.158,12.02,12.022,12.002,12.018,12.156,12.041,12.024,12.045,12.177,11.983,12.018,12.206,12.21,11.895,11.838,12.041,12.258,12.18,12.04,11.956,11.964,12.041,12.03,11.91,12.059,12.059,11.811,12.036,12.258,12.158,12.315,12.066,12.068,12.058,12.021,12.057,12.028,11.989,12.004
8,20,85.0,10176,7,5,1990,1990,0.0,637.0,0.0,663.0,1300.0,1341,0,0,1341,1.0,0.0,1,1,2,1,5,1,1990.0,2.0,506.0,192,0,0,0,0,0,0,2,2010,12.083,12.023,12.035,11.936,12.02,12.022,12.002,12.018,12.156,12.041,12.024,12.045,11.994,11.983,12.018,11.945,11.967,11.895,11.838,12.041,12.258,12.18,12.04,12.36,12.297,12.041,12.03,11.91,12.059,12.059,12.222,12.036,11.739,12.158,11.819,12.066,12.068,12.058,12.021,12.057,12.028,11.989,12.004
9,20,70.0,8400,4,5,1970,1970,0.0,804.0,78.0,0.0,882.0,882,0,0,882,1.0,0.0,1,0,2,1,4,0,1970.0,2.0,525.0,240,0,0,0,0,0,0,4,2010,12.083,12.023,12.035,11.936,12.02,12.022,12.016,12.018,11.868,12.041,12.024,12.045,11.994,11.983,12.018,12.041,11.995,11.895,11.838,12.041,11.87,11.811,12.04,11.956,11.953,11.965,12.03,11.816,12.059,12.059,11.811,12.036,11.809,12.158,12.315,12.066,12.068,12.058,12.021,11.84,12.028,11.989,12.004


In [12]:
X = df_train.iloc[:, :-1]
y = df_train.iloc[:, -1]

In [13]:
X.head()

Unnamed: 0,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,TotRmsAbvGrd,Fireplaces,GarageYrBlt,GarageCars,GarageArea,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,MSZoning_me,Street_me,Alley_me,LotShape_me,LandContour_me,Utilities_me,LotConfig_me,LandSlope_me,Neighborhood_me,Condition1_me,Condition2_me,BldgType_me,HouseStyle_me,RoofStyle_me,RoofMatl_me,Exterior1st_me,Exterior2nd_me,MasVnrType_me,ExterQual_me,ExterCond_me,Foundation_me,BsmtQual_me,BsmtCond_me,BsmtExposure_me,BsmtFinType1_me,BsmtFinType2_me,Heating_me,HeatingQC_me,CentralAir_me,Electrical_me,KitchenQual_me,Functional_me,FireplaceQu_me,GarageType_me,GarageFinish_me,GarageQual_me,GarageCond_me,PavedDrive_me,PoolQC_me,Fence_me,MiscFeature_me,SaleType_me,SaleCondition_me
0,60,65.0,8450,7,5,2003,2003,196.0,706.0,0.0,150.0,856.0,856,854,0,1710,1.0,0.0,2,1,3,1,8,0,2003.0,2.0,548.0,0,61,0,0,0,0,0,2,2008,12.083,12.023,12.035,11.936,12.02,12.022,12.002,12.018,12.164,12.041,12.024,12.045,12.177,11.983,12.018,12.206,12.21,12.161,12.309,12.041,12.258,12.18,12.04,11.956,12.297,12.041,12.03,12.204,12.059,12.059,12.222,12.036,11.809,12.158,12.173,12.066,12.068,12.058,12.021,12.057,12.028,11.989,12.004
1,20,80.0,9600,6,8,1976,1976,0.0,978.0,0.0,284.0,1262.0,1262,0,0,1262,0.0,1.0,2,0,3,1,6,1,1976.0,2.0,460.0,298,0,0,0,0,0,0,5,2007,12.083,12.023,12.035,11.936,12.02,12.022,12.035,12.018,12.344,11.816,12.024,12.045,11.994,11.983,12.018,11.86,11.862,11.895,11.838,12.041,11.87,12.18,12.04,12.36,11.953,12.041,12.03,12.204,12.059,12.059,11.811,12.036,12.182,12.158,12.173,12.066,12.068,12.058,12.021,12.057,12.028,11.989,12.004
2,60,68.0,11250,7,5,2001,2002,162.0,486.0,0.0,434.0,920.0,920,866,0,1786,1.0,0.0,2,1,3,1,6,1,2001.0,2.0,608.0,0,42,0,0,0,0,0,9,2008,12.083,12.023,12.035,12.158,12.02,12.022,12.002,12.018,12.164,12.041,12.024,12.045,12.177,11.983,12.018,12.206,12.21,12.161,12.309,12.041,12.258,12.18,12.04,12.095,12.297,12.041,12.03,12.204,12.059,12.059,12.222,12.036,12.182,12.158,12.173,12.066,12.068,12.058,12.021,12.057,12.028,11.989,12.004
3,70,60.0,9550,7,5,1915,1970,0.0,216.0,0.0,540.0,756.0,961,756,0,1717,1.0,0.0,1,0,3,1,7,1,1998.0,3.0,642.0,0,35,272,0,0,0,0,2,2006,12.083,12.023,12.035,12.158,12.02,12.022,12.016,12.018,12.207,12.041,12.024,12.045,12.177,11.983,12.018,11.83,11.892,11.895,11.838,12.041,11.723,11.811,12.22,11.956,11.953,12.041,12.03,11.91,12.059,12.059,12.222,12.036,12.258,11.766,11.819,12.066,12.068,12.058,12.021,12.057,12.028,11.989,11.771
4,60,84.0,14260,8,5,2000,2000,350.0,655.0,0.0,490.0,1145.0,1145,1053,0,2198,1.0,0.0,2,1,4,1,9,1,2000.0,3.0,836.0,192,84,0,0,0,0,0,12,2008,12.083,12.023,12.035,12.158,12.02,12.022,12.035,12.018,12.632,12.041,12.024,12.045,12.177,11.983,12.018,12.206,12.21,12.161,12.309,12.041,12.258,12.18,12.04,12.152,12.297,12.041,12.03,12.204,12.059,12.059,12.222,12.036,12.182,12.158,12.173,12.066,12.068,12.058,12.021,12.057,12.028,11.989,12.004


In [14]:
y.head()

0    12.247699
1    12.109016
2    12.317171
3    11.849405
4    12.429220
Name: SalePrice, dtype: float64

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1
                                                    , random_state=72)

In [16]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(1310, 79)
(1310,)
(146, 79)
(146,)


In [17]:
xgb =xgboost.XGBRegressor(tree_method = "gpu_hist", )

## Model Random Search

In [18]:
## Hyper Parameter Optimization
# Type of learner
booster=['gbtree','gblinear']
# Learning rate
learning_rate=[0.01, 0.05, 0.1, 0.15, 0.2]
# Minimum sum of weights of all observations required in a child
min_child_weight=[1,2,3,4]
# Maximum depth of a tree
max_depth = [3, 5, 7, 9, 11, 13, 15]
# Fraction of observations to be randomly samples for each tree.
subsample = [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1]
# Fraction of columns to be randomly samples for each tree.
colsample_bytree = [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1]
# Number of trees in XGB
n_estimators = [int(x) for x in np.linspace(start = 1200, stop = 4000, num = 15)]

In [19]:
# Create the random grid
random_grid = {
                'booster':booster,
                'learning_rate':learning_rate,
                'min_child_weight':min_child_weight,
                'max_depth':max_depth,
                'subsample': subsample,
                'colsample_bytree':colsample_bytree,
                'n_estimators': n_estimators 
                }
print(random_grid)

{'booster': ['gbtree', 'gblinear'], 'learning_rate': [0.01, 0.05, 0.1, 0.15, 0.2], 'min_child_weight': [1, 2, 3, 4], 'max_depth': [3, 5, 7, 9, 11, 13, 15], 'subsample': [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1], 'colsample_bytree': [0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1], 'n_estimators': [1200, 1400, 1600, 1800, 2000, 2200, 2400, 2600, 2800, 3000, 3200, 3400, 3600, 3800, 4000]}


In [20]:
# Set up the random search with 5-fold cross validation
xgb_random = RandomizedSearchCV(estimator=xgb,
            param_distributions=random_grid,
            cv=5, n_iter=20,
            scoring = 'neg_mean_squared_log_error',
            verbose = 10,
            random_state=72,
            n_jobs = -1)

In [21]:
xgb_random.fit(X_train, y_train)

Fitting 5 folds for each of 20 candidates, totalling 100 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:  2.0min
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  3.9min
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:  4.3min
[Parallel(n_jobs=-1)]: Done  14 tasks      | elapsed:  8.1min
[Parallel(n_jobs=-1)]: Done  21 tasks      | elapsed:  9.4min
[Parallel(n_jobs=-1)]: Done  28 tasks      | elapsed: 11.8min
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed: 14.8min
[Parallel(n_jobs=-1)]: Done  46 tasks      | elapsed: 17.1min
[Parallel(n_jobs=-1)]: Done  57 tasks      | elapsed: 19.5min
[Parallel(n_jobs=-1)]: Done  68 tasks      | elapsed: 21.3min
[Parallel(n_jobs=-1)]: Done  81 tasks      | elapsed: 22.6min
[Parallel(n_jobs=-1)]: Done  94 tasks      | elapsed: 25.9min
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed: 29.6min finished




RandomizedSearchCV(cv=5, error_score=nan,
                   estimator=XGBRegressor(base_score=0.5, booster='gbtree',
                                          colsample_bylevel=1,
                                          colsample_bynode=1,
                                          colsample_bytree=1, gamma=0,
                                          importance_type='gain',
                                          learning_rate=0.1, max_delta_step=0,
                                          max_depth=3, min_child_weight=1,
                                          missing=None, n_estimators=100,
                                          n_jobs=1, nthread=None,
                                          objective='reg:linear',
                                          random_state=0, reg_alpha=...
                                        'learning_rate': [0.01, 0.05, 0.1, 0.15,
                                                          0.2],
                                        'ma

In [23]:
xgb_random.best_estimator_

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=0.5, gamma=0,
             importance_type='gain', learning_rate=0.01, max_delta_step=0,
             max_depth=3, min_child_weight=3, missing=None, n_estimators=3400,
             n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
             silent=None, subsample=0.75, tree_method='gpu_hist', verbosity=1)

In [22]:
#Randomized search best score
np.sqrt((-1*xgb_random.best_score_))

0.00875396815386545

In [24]:
#Train best score
np.sqrt((-1*xgb_random.score(X_train, y_train)))

0.003978707139608652

In [25]:
#Test best score
np.sqrt((-1*xgb_random.score(X_test, y_test)))

0.009381349730020245

In [26]:
with open('/content/drive/My Drive/Colab Notebooks/AHPP/xgb', "wb") as f:
    pickle.dump(xgb_random, f)

## Predictions with Model 

In [27]:
with open('/content/drive/My Drive/Colab Notebooks/AHPP/xgb', "rb") as f:
    xgb = pickle.load(f)



In [28]:
Id = np.arange(start=1461, stop=2920, step=1)
xgb_submission = pd.DataFrame(data = Id, columns = ['Id'])

In [29]:
xgb_submission.head()

Unnamed: 0,Id
0,1461
1,1462
2,1463
3,1464
4,1465


In [30]:
xgb_prediction = xgb.predict(df_test)

In [31]:
xgb_submission['SalePrice'] = np.expm1(xgb_prediction)

In [32]:
xgb_submission.head()

Unnamed: 0,Id,SalePrice
0,1461,127731.109375
1,1462,157358.84375
2,1463,185077.3125
3,1464,191816.921875
4,1465,186313.78125


In [33]:
xgb_submission.to_csv('/content/drive/My Drive/Colab Notebooks/AHPP/xgb_submission.csv', index = False)