In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import itertools

In [2]:
dataset = pd.read_csv('data/train.csv')
dataset_test = pd.read_csv('data/test.csv')

In [3]:
type(dataset)

pandas.core.frame.DataFrame

In [4]:
dataset.describe()

Unnamed: 0,Id,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,...,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SalePrice
count,1460.0,1460.0,1201.0,1460.0,1460.0,1460.0,1460.0,1460.0,1452.0,1460.0,...,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0
mean,730.5,56.89726,70.049958,10516.828082,6.099315,5.575342,1971.267808,1984.865753,103.685262,443.639726,...,94.244521,46.660274,21.95411,3.409589,15.060959,2.758904,43.489041,6.321918,2007.815753,180921.19589
std,421.610009,42.300571,24.284752,9981.264932,1.382997,1.112799,30.202904,20.645407,181.066207,456.098091,...,125.338794,66.256028,61.119149,29.317331,55.757415,40.177307,496.123024,2.703626,1.328095,79442.502883
min,1.0,20.0,21.0,1300.0,1.0,1.0,1872.0,1950.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2006.0,34900.0
25%,365.75,20.0,59.0,7553.5,5.0,5.0,1954.0,1967.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,2007.0,129975.0
50%,730.5,50.0,69.0,9478.5,6.0,5.0,1973.0,1994.0,0.0,383.5,...,0.0,25.0,0.0,0.0,0.0,0.0,0.0,6.0,2008.0,163000.0
75%,1095.25,70.0,80.0,11601.5,7.0,6.0,2000.0,2004.0,166.0,712.25,...,168.0,68.0,0.0,0.0,0.0,0.0,0.0,8.0,2009.0,214000.0
max,1460.0,190.0,313.0,215245.0,10.0,9.0,2010.0,2010.0,1600.0,5644.0,...,857.0,547.0,552.0,508.0,480.0,738.0,15500.0,12.0,2010.0,755000.0


In [9]:
dataset.isna().sum().loc[dataset.isna().sum() > 0]

LotFrontage      259
Alley           1369
MasVnrType         8
MasVnrArea         8
BsmtQual          37
BsmtCond          37
BsmtExposure      38
BsmtFinType1      37
BsmtFinType2      38
Electrical         1
FireplaceQu      690
GarageType        81
GarageYrBlt       81
GarageFinish      81
GarageQual        81
GarageCond        81
PoolQC          1453
Fence           1179
MiscFeature     1406
dtype: int64

In [10]:
dataset_test.isna().sum().loc[dataset_test.isna().sum() > 0]

MSZoning           4
LotFrontage      227
Alley           1352
Utilities          2
Exterior1st        1
Exterior2nd        1
MasVnrType        16
MasVnrArea        15
BsmtQual          44
BsmtCond          45
BsmtExposure      44
BsmtFinType1      42
BsmtFinSF1         1
BsmtFinType2      42
BsmtFinSF2         1
BsmtUnfSF          1
TotalBsmtSF        1
BsmtFullBath       2
BsmtHalfBath       2
KitchenQual        1
Functional         2
FireplaceQu      730
GarageType        76
GarageYrBlt       78
GarageFinish      78
GarageCars         1
GarageArea         1
GarageQual        78
GarageCond        78
PoolQC          1456
Fence           1169
MiscFeature     1408
SaleType           1
dtype: int64

## Missing Data

In [18]:
dataset[['LotFrontage']].tail()
dataset['LotFrontage'].mean()

70.04995836802665

In [21]:
dataset['LotFrontage'].fillna(dataset['LotFrontage'].mean(), inplace=True)
dataset_test['LotFrontage'].fillna(dataset_test['LotFrontage'].mean(), inplace=True)

In [26]:
dataset['Alley'].fillna('None', inplace=True)
dataset_test['Alley'].fillna('None', inplace=True)
dataset.groupby('Alley').count()

Unnamed: 0_level_0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,LotShape,LandContour,Utilities,LotConfig,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
Alley,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Grvl,50,50,50,50,50,50,50,50,50,50,...,50,0,14,4,50,50,50,50,50,50
,1369,1369,1369,1369,1369,1369,1369,1369,1369,1369,...,1369,7,263,50,1369,1369,1369,1369,1369,1369
Pave,41,41,41,41,41,41,41,41,41,41,...,41,0,4,0,41,41,41,41,41,41


In [27]:
print(dataset['MasVnrType'].head())
dataset.groupby('MasVnrType').count()

0    BrkFace
1       None
2    BrkFace
3       None
4    BrkFace
Name: MasVnrType, dtype: object


Unnamed: 0_level_0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
MasVnrType,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
BrkCmn,15,15,15,15,15,15,15,15,15,15,...,15,0,7,2,15,15,15,15,15,15
BrkFace,445,445,445,445,445,445,445,445,445,445,...,445,2,80,11,445,445,445,445,445,445
,864,864,864,864,864,864,864,864,864,864,...,864,4,188,40,864,864,864,864,864,864
Stone,128,128,128,128,128,128,128,128,128,128,...,128,1,6,1,128,128,128,128,128,128


In [28]:
dataset['MasVnrType'] = dataset['MasVnrType'].fillna('None')
dataset_test['MasVnrType'] = dataset_test['MasVnrType'].fillna('None')

In [29]:
dataset.groupby('MasVnrType').count()

Unnamed: 0_level_0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
MasVnrType,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
BrkCmn,15,15,15,15,15,15,15,15,15,15,...,15,0,7,2,15,15,15,15,15,15
BrkFace,445,445,445,445,445,445,445,445,445,445,...,445,2,80,11,445,445,445,445,445,445
,872,872,872,872,872,872,872,872,872,872,...,872,4,188,40,872,872,872,872,872,872
Stone,128,128,128,128,128,128,128,128,128,128,...,128,1,6,1,128,128,128,128,128,128


In [30]:
dataset['MasVnrArea'].head()

0    196.0
1      0.0
2    162.0
3      0.0
4    350.0
Name: MasVnrArea, dtype: float64

In [31]:
dataset['MasVnrArea'].fillna(0, inplace=True)
dataset_test['MasVnrArea'].fillna(0, inplace=True)

In [32]:
dataset.loc[:, ['BsmtQual', 'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinType2']].head(20)


Unnamed: 0,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinType2
0,Gd,TA,No,GLQ,Unf
1,Gd,TA,Gd,ALQ,Unf
2,Gd,TA,Mn,GLQ,Unf
3,TA,Gd,No,ALQ,Unf
4,Gd,TA,Av,GLQ,Unf
5,Gd,TA,No,GLQ,Unf
6,Ex,TA,Av,GLQ,Unf
7,Gd,TA,Mn,ALQ,BLQ
8,TA,TA,No,Unf,Unf
9,TA,TA,No,GLQ,Unf


In [33]:
dataset['BsmtQual'].fillna('None', inplace=True)
dataset['BsmtCond'].fillna('None', inplace=True)
dataset['BsmtExposure'].fillna('None', inplace=True)
dataset['BsmtFinType1'].fillna('None', inplace=True)
dataset['BsmtFinType2'].fillna('None', inplace=True)

dataset_test['BsmtQual'].fillna('None', inplace=True)
dataset_test['BsmtCond'].fillna('None', inplace=True)
dataset_test['BsmtExposure'].fillna('None', inplace=True)
dataset_test['BsmtFinType1'].fillna('None', inplace=True)
dataset_test['BsmtFinType2'].fillna('None', inplace=True)

In [43]:
dataset = dataset.loc[dataset['Electrical'].isna() == False]
# dataset_test = dataset_test.loc[dataset_test['Electrical'].isna() == False]
print(dataset.shape)
print(dataset_test.shape)

(1459, 81)
(1459, 80)


In [34]:
dataset['FireplaceQu'].head(10)

0    NaN
1     TA
2     TA
3     Gd
4     TA
5    NaN
6     Gd
7     TA
8     TA
9     TA
Name: FireplaceQu, dtype: object

In [35]:
dataset['FireplaceQu'].fillna('None', inplace=True)
dataset_test['FireplaceQu'].fillna('None', inplace=True)

In [36]:
dataset.loc[:, ['GarageType', 'GarageYrBlt', 'GarageFinish', 'GarageQual', 'GarageCond']].head() # 40

Unnamed: 0,GarageType,GarageYrBlt,GarageFinish,GarageQual,GarageCond
0,Attchd,2003.0,RFn,TA,TA
1,Attchd,1976.0,RFn,TA,TA
2,Attchd,2001.0,RFn,TA,TA
3,Detchd,1998.0,Unf,TA,TA
4,Attchd,2000.0,RFn,TA,TA


In [39]:
dataset['GarageType'].fillna('None', inplace=True)
dataset['GarageYrBlt'].fillna(dataset_test['GarageYrBlt'].min(), inplace=True)
dataset['GarageFinish'].fillna('None', inplace=True)
dataset['GarageQual'].fillna('None', inplace=True)
dataset['GarageCond'].fillna('None', inplace=True)

dataset_test['GarageType'].fillna('None', inplace=True)
dataset_test['GarageYrBlt'].fillna(dataset_test['GarageYrBlt'].min(), inplace=True)
dataset_test['GarageFinish'].fillna('None', inplace=True)
dataset_test['GarageQual'].fillna('None', inplace=True)
dataset_test['GarageCond'].fillna('None', inplace=True)

In [40]:
print(dataset['PoolQC'].unique())
print(dataset['Fence'].unique())
print(dataset['MiscFeature'].unique())

[nan 'Ex' 'Fa' 'Gd']
[nan 'MnPrv' 'GdWo' 'GdPrv' 'MnWw']
[nan 'Shed' 'Gar2' 'Othr' 'TenC']


In [41]:
dataset['PoolQC'].fillna('None', inplace=True)
dataset['Fence'].fillna('None', inplace=True)
dataset['MiscFeature'].fillna('None', inplace=True)

dataset_test['PoolQC'].fillna('None', inplace=True)
dataset_test['Fence'].fillna('None', inplace=True)
dataset_test['MiscFeature'].fillna('None', inplace=True)

In [44]:
print(dataset.isna().sum().loc[dataset.isna().sum() > 0])
print(dataset_test.isna().sum().loc[dataset_test.isna().sum() > 0])

Series([], dtype: int64)
MSZoning        4
Utilities       2
Exterior1st     1
Exterior2nd     1
BsmtFinSF1      1
BsmtFinSF2      1
BsmtUnfSF       1
TotalBsmtSF     1
BsmtFullBath    2
BsmtHalfBath    2
KitchenQual     1
Functional      2
GarageCars      1
GarageArea      1
SaleType        1
dtype: int64


In [49]:
dataset_test.isna().sum().loc[dataset_test.isna().sum() > 0].index

Index(['MSZoning', 'Utilities', 'Exterior1st', 'Exterior2nd', 'BsmtFinSF1',
       'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', 'BsmtFullBath',
       'BsmtHalfBath', 'KitchenQual', 'Functional', 'GarageCars', 'GarageArea',
       'SaleType'],
      dtype='object')

In [50]:
num_na_cols = list(dataset_test.describe().columns)
for col in num_na_cols:
    dataset_test[col].fillna(0, inplace=True)
cat_na_cols = list(dataset_test.isna().sum().loc[dataset_test.isna().sum() > 0].index)
for col in cat_na_cols:
    dataset_test[col].fillna('None', inplace=True)

In [51]:
print(dataset_test.isna().sum().loc[dataset_test.isna().sum() > 0])

Series([], dtype: int64)


### Outliers!!!


### Categorical data & Feature scaling.

In [55]:
X = dataset.iloc[:, 1:-1]
X_test = dataset_test.iloc[:, 1:]
print(X_test.shape)
print(X.shape)
X.head()

(1459, 79)
(1459, 79)


Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,...,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition
0,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,Inside,...,0,0,,,,0,2,2008,WD,Normal
1,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,FR2,...,0,0,,,,0,5,2007,WD,Normal
2,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,Inside,...,0,0,,,,0,9,2008,WD,Normal
3,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,Corner,...,0,0,,,,0,2,2006,WD,Abnorml
4,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,FR2,...,0,0,,,,0,12,2008,WD,Normal


In [58]:
y = dataset.iloc[:, [-1]]
print(y.shape)
y.head()

(1459, 1)


Unnamed: 0,SalePrice
0,208500
1,181500
2,223500
3,140000
4,250000


In [59]:
y_sc = MinMaxScaler()  #  y[i] - min(y) / max(y) - min(y)
y = y_sc.fit_transform(y)
y

array([[0.24107763],
       [0.20358284],
       [0.26190807],
       ...,
       [0.321622  ],
       [0.14890293],
       [0.15636717]])

In [60]:
print(dataset.iloc[0, -1])
print(y_sc.inverse_transform([[0.24107763]]))

208500
[[208500.001363]]


In [61]:
X_sc = MinMaxScaler()
transformed_npa = X_sc.fit_transform(X[list(X.describe().columns)])
transformed_npa_test = X_sc.transform(X_test[list(X_test.describe().columns)])

In [62]:
X[list(X.describe().columns)].head()

Unnamed: 0,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtFinSF2,...,GarageArea,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold
0,60,65.0,8450,7,5,2003,2003,196.0,706,0,...,548,0,61,0,0,0,0,0,2,2008
1,20,80.0,9600,6,8,1976,1976,0.0,978,0,...,460,298,0,0,0,0,0,0,5,2007
2,60,68.0,11250,7,5,2001,2002,162.0,486,0,...,608,0,42,0,0,0,0,0,9,2008
3,70,60.0,9550,7,5,1915,1970,0.0,216,0,...,642,0,35,272,0,0,0,0,2,2006
4,60,84.0,14260,8,5,2000,2000,350.0,655,0,...,836,192,84,0,0,0,0,0,12,2008


In [63]:
transformed_df = pd.DataFrame(transformed_npa)
transformed_df.columns = X.describe().columns
X[list(X.describe().columns)] = transformed_df
X[list(X.describe().columns)].head()

Unnamed: 0,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtFinSF2,...,GarageArea,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold
0,0.235294,0.150685,0.03342,0.666667,0.5,0.949275,0.883333,0.1225,0.125089,0.0,...,0.38646,0.0,0.111517,0.0,0.0,0.0,0.0,0.0,0.090909,0.5
1,0.0,0.202055,0.038795,0.555556,0.875,0.753623,0.433333,0.0,0.173281,0.0,...,0.324401,0.347725,0.0,0.0,0.0,0.0,0.0,0.0,0.363636,0.25
2,0.235294,0.160959,0.046507,0.666667,0.5,0.934783,0.866667,0.10125,0.086109,0.0,...,0.428773,0.0,0.076782,0.0,0.0,0.0,0.0,0.0,0.727273,0.5
3,0.294118,0.133562,0.038561,0.666667,0.5,0.311594,0.333333,0.0,0.038271,0.0,...,0.45275,0.0,0.063985,0.492754,0.0,0.0,0.0,0.0,0.090909,0.0
4,0.235294,0.215753,0.060576,0.777778,0.5,0.927536,0.833333,0.21875,0.116052,0.0,...,0.589563,0.224037,0.153565,0.0,0.0,0.0,0.0,0.0,1.0,0.5


In [64]:
transformed_df = pd.DataFrame(transformed_npa_test)
transformed_df.columns = X_test.describe().columns
X_test[list(X_test.describe().columns)] = transformed_df
X_test[list(X_test.describe().columns)].head()

Unnamed: 0,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtFinSF2,...,GarageArea,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold
0,0.0,0.202055,0.048246,0.444444,0.625,0.644928,0.183333,0.0,0.08292,0.097693,...,0.51481,0.163361,0.0,0.0,0.0,0.25,0.0,0.0,0.454545,1.0
1,0.0,0.205479,0.060609,0.555556,0.625,0.623188,0.133333,0.0675,0.163536,0.0,...,0.220028,0.458576,0.065814,0.0,0.0,0.0,0.0,0.806452,0.454545,1.0
2,0.235294,0.181507,0.058566,0.444444,0.5,0.905797,0.8,0.0,0.140149,0.0,...,0.339915,0.247375,0.062157,0.0,0.0,0.0,0.0,0.0,0.181818,1.0
3,0.235294,0.195205,0.040562,0.555556,0.625,0.913043,0.8,0.0125,0.106662,0.0,...,0.331453,0.42007,0.065814,0.0,0.0,0.0,0.0,0.0,0.454545,1.0
4,0.588235,0.075342,0.017318,0.777778,0.5,0.869565,0.7,0.0,0.046598,0.0,...,0.356841,0.0,0.149909,0.0,0.0,0.3,0.0,0.0,0.0,1.0


In [67]:
print(X.isna().sum().sum())
print(X_test.isna().sum().sum())

0
0


In [66]:
X = X.iloc[:-1, :]
X.tail()

Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,...,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition
1454,0.235294,FV,0.140411,0.030929,Pave,Pave,Reg,Lvl,AllPub,Inside,...,0.0,0.0,,,,0.0,0.636364,0.25,WD,Normal
1455,0.0,RL,0.219178,0.055505,Pave,,Reg,Lvl,AllPub,Inside,...,0.0,0.0,,,,0.0,0.090909,1.0,WD,Normal
1456,0.294118,RL,0.15411,0.036187,Pave,,Reg,Lvl,AllPub,Inside,...,0.0,0.0,,MnPrv,,0.16129,0.363636,1.0,WD,Normal
1457,0.0,RL,0.160959,0.039342,Pave,,Reg,Lvl,AllPub,Inside,...,0.0,0.0,,GdPrv,Shed,0.0,0.272727,1.0,WD,Normal
1458,0.0,RL,0.184932,0.04037,Pave,,Reg,Lvl,AllPub,Inside,...,0.0,0.0,,,,0.0,0.454545,0.5,WD,Normal


In [68]:
y = y[:-1, :]

In [69]:
print(X.shape)
print(X_test.shape)
print(y.shape)

(1458, 79)
(1459, 79)
(1458, 1)


In [70]:
X = pd.get_dummies(X)
X_test = pd.get_dummies(X_test)

In [72]:
print(X.shape)
print(X_test.shape)
print(y.shape)
y

(1458, 302)
(1459, 291)
(1458, 1)


array([[0.24107763],
       [0.20358284],
       [0.26190807],
       ...,
       [0.24316067],
       [0.321622  ],
       [0.14890293]])

### Our first model

In [74]:
X_train, X_validation, y_train, y_validation = train_test_split(X.values, y, test_size=0.25)

In [75]:
print(X_train.shape)
print(X_validation.shape)
print(y_train.shape)
print(y_validation.shape)

(1093, 302)
(365, 302)
(1093, 1)
(365, 1)


In [76]:
# fit model no training data
model = XGBRegressor()
history = model.fit(X_train, y_train)

In [77]:
y_predicted = model.predict(X_validation)

In [78]:
score = mean_squared_error(y_validation, y_predicted)
score**0.5

0.04585649213356644

### Feature Engineering!!

### Retraining and hyperparameter tuning

In [79]:
scores = []
combinations = list(itertools.product([.03, .05, .07], [5, 6, 7], [500, 1000]))
combinations

[(0.03, 5, 500),
 (0.03, 5, 1000),
 (0.03, 6, 500),
 (0.03, 6, 1000),
 (0.03, 7, 500),
 (0.03, 7, 1000),
 (0.05, 5, 500),
 (0.05, 5, 1000),
 (0.05, 6, 500),
 (0.05, 6, 1000),
 (0.05, 7, 500),
 (0.05, 7, 1000),
 (0.07, 5, 500),
 (0.07, 5, 1000),
 (0.07, 6, 500),
 (0.07, 6, 1000),
 (0.07, 7, 500),
 (0.07, 7, 1000)]

In [80]:
for combo in combinations:
    model = XGBRegressor(nthread=4, objective='reg:linear', learning_rate=combo[0], max_depth=combo[1], min_child_weight=4,
                        silent=1, subsample=0.7, colsample_bytree=0.7, n_estimators=combo[2])
    model.fit(X_train, y_train)
    y_predicted = model.predict(X_validation)
    score = mean_squared_error(y_validation, y_predicted)**0.5
    print('lr: {}, max_depth: {} => {}'.format(combo[0], combo[1], score))


lr: 0.03, max_depth: 5 => 0.04388290597050245
lr: 0.03, max_depth: 5 => 0.04330511419522443
lr: 0.03, max_depth: 6 => 0.04452851301596842
lr: 0.03, max_depth: 6 => 0.043938805861566975
lr: 0.03, max_depth: 7 => 0.04475767846715142
lr: 0.03, max_depth: 7 => 0.0446855266064909
lr: 0.05, max_depth: 5 => 0.04391425661655937
lr: 0.05, max_depth: 5 => 0.043727834372924254
lr: 0.05, max_depth: 6 => 0.0456397227241475
lr: 0.05, max_depth: 6 => 0.045470373035649304
lr: 0.05, max_depth: 7 => 0.04542765386488276
lr: 0.05, max_depth: 7 => 0.0454246610643912
lr: 0.07, max_depth: 5 => 0.04372739768575186
lr: 0.07, max_depth: 5 => 0.04376801778122923
lr: 0.07, max_depth: 6 => 0.04438600674155813
lr: 0.07, max_depth: 6 => 0.044369578470409544
lr: 0.07, max_depth: 7 => 0.04388799590549415
lr: 0.07, max_depth: 7 => 0.04390704431966475
