In [8]:
import pandas as pd
import numpy as np
import seaborn as sn
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.metrics import calinski_harabaz_score

from sklearn.ensemble import BaggingRegressor, RandomForestRegressor, GradientBoostingRegressor
from sklearn.neighbors import KNeighborsRegressor

from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.metrics import accuracy_score, mean_squared_error, mean_squared_log_error, make_scorer

pd.set_option('display.max_columns', None)

In [4]:
# Import dataset
df = pd.read_csv("train.csv")

In [5]:
# Make a pipeline to prepare data 
from sklearn.base import BaseEstimator, TransformerMixin

class FillNanValues(BaseEstimator, TransformerMixin):
    def __init__(self): pass
    def fit(self, X, y=None):
        self.LotFrontageMedian = X.LotFrontage.median()
        self.MasVnrAreaMedian = X.MasVnrArea.median()
        return self
    def transform(self, X):
        X.LotFrontage.fillna(self.LotFrontageMedian, inplace=True)
        X.MasVnrArea.fillna(self.MasVnrAreaMedian, inplace=True)
        X.MiscFeature.fillna("NoShed", inplace=True)
        X.Fence.fillna('NoFence', inplace=True)
        X.BsmtQual.fillna("NoBasement", inplace=True)
        X.BsmtCond.fillna("NoBasement", inplace=True)
        X.BsmtExposure.fillna("NoBasement", inplace=True)
        X.BsmtFinType1.fillna("NoBasement", inplace=True)
        X.BsmtFinType2.fillna("NoBasement", inplace=True)
        X.BsmtFinType2.fillna("NoBasement", inplace=True)
        X.MasVnrType.fillna("None", inplace=True)
        X.Electrical.fillna("SBrkr", inplace=True)
        X.FireplaceQu.fillna('NoFP', inplace=True)
        X.GarageType.fillna('Attchd', inplace=True)
        X.GarageFinish.fillna('Unf', inplace=True)
        X.GarageQual.fillna('TA', inplace=True)
        X.GarageCond.fillna('TA', inplace=True)
        X.PoolQC.fillna("NoPool", inplace=True)
        return X

class RemoveColumns(BaseEstimator, TransformerMixin):
    def __init__(self): pass
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        return X.drop(
        ['GarageArea', 'BsmtHalfBath', 'MiscVal', 'MoSold',
        'YrSold', 'BsmtFinSF2', 'LowQualFinSF', '3SsnPorch', 
        'Utilities', 'Exterior2nd', 'Alley', 'Id'],
        axis = 1)

class AlterAttributes(BaseEstimator, TransformerMixin):
    def __init__(self): pass
    def fit(self, X, y=None):
        return self
    def transform(self, X):
        #Combine GarageCars == 3 and GarageCars == 4
        X.GarageCars = X.GarageCars.map(lambda x: 3 if (x == 4) else x)

        #Fill Nans in GarageYrBlt with median year
        X.GarageYrBlt = X.GarageYrBlt.fillna(X.GarageYrBlt.median())

        #Combine KitchenAbvGr 1 and 0 AND 2 and 3 such that it is binary
        X.KitchenAbvGr = X.KitchenAbvGr.map(lambda x: 0 if (x == 1) else x)
        X.KitchenAbvGr = X.KitchenAbvGr.map(lambda x: 1 if (x == 2 or x == 3) else x)

        #Make Pool Area binary (x>0 or x==0)--> 1 if they have pool else 0
        X.PoolArea = X.PoolArea.map(lambda x: 1 if (x>0) else x)
        return X

class GetDummies(BaseEstimator, TransformerMixin):
    def __init__(self): pass
    def fit(self, X, y=None):
        self.PriorData = pd.get_dummies(pd.DataFrame(X))
        self.columns = self.PriorData.columns
        return self
    def transform(self, X):
        temp = pd.get_dummies(X)
        for col in self.columns:
            if col not in temp.columns:
                temp_ar = np.zeros(temp.shape[0])
                for i in range(len(temp_ar)):
                    temp_ar[i]=self.PriorData[col].median()
                temp[col] = temp_ar
                
        for i,j in zip(temp.isnull().sum(), temp.columns):
            if i>0:
                temp[j].fillna(temp[j].median(), inplace=True)
        return pd.get_dummies(temp)

class ScaleData(BaseEstimator, TransformerMixin):
    def __init__(self): pass
    def fit(self, X, y=None):
        self.PriorData = X
        return self
    def transform(self, X):
        return (X-self.PriorData.mean())/self.PriorData.std()    

class GetDataFrame(BaseEstimator, TransformerMixin):
    def __init__(self): pass
    def fit(self, X, y=None):
        self.columns = pd.DataFrame(X).columns
        return self
    def transform(self, X):
        return pd.DataFrame(X, columns=self.columns).fillna(0)

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

Prep = Pipeline([
    ('fillnanvalues', FillNanValues()),
    ('removecolumns', RemoveColumns()),
    ('alterattributes', AlterAttributes()),
    ('getdummies', GetDummies()),
    ('scaledata', ScaleData()),
    ('getdataframe', GetDataFrame())
])

In [6]:
# Test pipeline
Prep.fit_transform(df).head(5)

Unnamed: 0,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtUnfSF,TotalBsmtSF,1stFlrSF,2ndFlrSF,GrLivArea,BsmtFullBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,TotRmsAbvGrd,Fireplaces,GarageYrBlt,GarageCars,WoodDeckSF,OpenPorchSF,EnclosedPorch,ScreenPorch,PoolArea,SalePrice,MSZoning_C (all),MSZoning_FV,MSZoning_RH,MSZoning_RL,MSZoning_RM,Street_Grvl,Street_Pave,LotShape_IR1,LotShape_IR2,LotShape_IR3,LotShape_Reg,LandContour_Bnk,LandContour_HLS,LandContour_Low,LandContour_Lvl,LotConfig_Corner,LotConfig_CulDSac,LotConfig_FR2,LotConfig_FR3,LotConfig_Inside,LandSlope_Gtl,LandSlope_Mod,LandSlope_Sev,Neighborhood_Blmngtn,Neighborhood_Blueste,Neighborhood_BrDale,Neighborhood_BrkSide,Neighborhood_ClearCr,Neighborhood_CollgCr,Neighborhood_Crawfor,Neighborhood_Edwards,Neighborhood_Gilbert,Neighborhood_IDOTRR,Neighborhood_MeadowV,Neighborhood_Mitchel,Neighborhood_NAmes,Neighborhood_NPkVill,Neighborhood_NWAmes,Neighborhood_NoRidge,Neighborhood_NridgHt,Neighborhood_OldTown,Neighborhood_SWISU,Neighborhood_Sawyer,Neighborhood_SawyerW,Neighborhood_Somerst,Neighborhood_StoneBr,Neighborhood_Timber,Neighborhood_Veenker,Condition1_Artery,Condition1_Feedr,Condition1_Norm,Condition1_PosA,Condition1_PosN,Condition1_RRAe,Condition1_RRAn,Condition1_RRNe,Condition1_RRNn,Condition2_Artery,Condition2_Feedr,Condition2_Norm,Condition2_PosA,Condition2_PosN,Condition2_RRAe,Condition2_RRAn,Condition2_RRNn,BldgType_1Fam,BldgType_2fmCon,BldgType_Duplex,BldgType_Twnhs,BldgType_TwnhsE,HouseStyle_1.5Fin,HouseStyle_1.5Unf,HouseStyle_1Story,HouseStyle_2.5Fin,HouseStyle_2.5Unf,HouseStyle_2Story,HouseStyle_SFoyer,HouseStyle_SLvl,RoofStyle_Flat,RoofStyle_Gable,RoofStyle_Gambrel,RoofStyle_Hip,RoofStyle_Mansard,RoofStyle_Shed,RoofMatl_ClyTile,RoofMatl_CompShg,RoofMatl_Membran,RoofMatl_Metal,RoofMatl_Roll,RoofMatl_Tar&Grv,RoofMatl_WdShake,RoofMatl_WdShngl,Exterior1st_AsbShng,Exterior1st_AsphShn,Exterior1st_BrkComm,Exterior1st_BrkFace,Exterior1st_CBlock,Exterior1st_CemntBd,Exterior1st_HdBoard,Exterior1st_ImStucc,Exterior1st_MetalSd,Exterior1st_Plywood,Exterior1st_Stone,Exterior1st_Stucco,Exterior1st_VinylSd,Exterior1st_Wd Sdng,Exterior1st_WdShing,MasVnrType_BrkCmn,MasVnrType_BrkFace,MasVnrType_None,MasVnrType_Stone,ExterQual_Ex,ExterQual_Fa,ExterQual_Gd,ExterQual_TA,ExterCond_Ex,ExterCond_Fa,ExterCond_Gd,ExterCond_Po,ExterCond_TA,Foundation_BrkTil,Foundation_CBlock,Foundation_PConc,Foundation_Slab,Foundation_Stone,Foundation_Wood,BsmtQual_Ex,BsmtQual_Fa,BsmtQual_Gd,BsmtQual_NoBasement,BsmtQual_TA,BsmtCond_Fa,BsmtCond_Gd,BsmtCond_NoBasement,BsmtCond_Po,BsmtCond_TA,BsmtExposure_Av,BsmtExposure_Gd,BsmtExposure_Mn,BsmtExposure_No,BsmtExposure_NoBasement,BsmtFinType1_ALQ,BsmtFinType1_BLQ,BsmtFinType1_GLQ,BsmtFinType1_LwQ,BsmtFinType1_NoBasement,BsmtFinType1_Rec,BsmtFinType1_Unf,BsmtFinType2_ALQ,BsmtFinType2_BLQ,BsmtFinType2_GLQ,BsmtFinType2_LwQ,BsmtFinType2_NoBasement,BsmtFinType2_Rec,BsmtFinType2_Unf,Heating_Floor,Heating_GasA,Heating_GasW,Heating_Grav,Heating_OthW,Heating_Wall,HeatingQC_Ex,HeatingQC_Fa,HeatingQC_Gd,HeatingQC_Po,HeatingQC_TA,CentralAir_N,CentralAir_Y,Electrical_FuseA,Electrical_FuseF,Electrical_FuseP,Electrical_Mix,Electrical_SBrkr,KitchenQual_Ex,KitchenQual_Fa,KitchenQual_Gd,KitchenQual_TA,Functional_Maj1,Functional_Maj2,Functional_Min1,Functional_Min2,Functional_Mod,Functional_Sev,Functional_Typ,FireplaceQu_Ex,FireplaceQu_Fa,FireplaceQu_Gd,FireplaceQu_NoFP,FireplaceQu_Po,FireplaceQu_TA,GarageType_2Types,GarageType_Attchd,GarageType_Basment,GarageType_BuiltIn,GarageType_CarPort,GarageType_Detchd,GarageFinish_Fin,GarageFinish_RFn,GarageFinish_Unf,GarageQual_Ex,GarageQual_Fa,GarageQual_Gd,GarageQual_Po,GarageQual_TA,GarageCond_Ex,GarageCond_Fa,GarageCond_Gd,GarageCond_Po,GarageCond_TA,PavedDrive_N,PavedDrive_P,PavedDrive_Y,PoolQC_Ex,PoolQC_Fa,PoolQC_Gd,PoolQC_NoPool,Fence_GdPrv,Fence_GdWo,Fence_MnPrv,Fence_MnWw,Fence_NoFence,MiscFeature_Gar2,MiscFeature_NoShed,MiscFeature_Othr,MiscFeature_Shed,MiscFeature_TenC,SaleType_COD,SaleType_CWD,SaleType_Con,SaleType_ConLD,SaleType_ConLI,SaleType_ConLw,SaleType_New,SaleType_Oth,SaleType_WD,SaleCondition_Abnorml,SaleCondition_AdjLand,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial
0,0.07335,-0.220799,-0.207071,0.651256,-0.517023,1.050634,0.878367,0.513928,0.575228,-0.944267,-0.459145,-0.793162,1.161454,0.370207,1.107431,0.78947,1.227165,0.163723,-0.219237,0.911897,-0.950901,1.01725,0.319621,-0.751918,0.216429,-0.359202,-0.270116,-0.069385,0.347154,-0.083017,-0.215785,-0.105227,0.517956,-0.418812,-0.064216,0.064216,-0.703962,-0.169923,-0.083017,0.760251,-0.212287,-0.188246,-0.158945,0.33701,-0.468578,-0.262234,-0.182318,-0.052396,0.622549,0.23749,-0.215785,-0.094752,-0.108503,-0.037024,-0.105227,-0.203325,-0.139784,2.954209,-0.190187,-0.27107,-0.239094,-0.161194,-0.108503,-0.186288,-0.426687,-0.07873,-0.229337,-0.169923,-0.235877,-0.289539,-0.131946,-0.230986,-0.205144,-0.250096,-0.131946,-0.163415,-0.087099,-0.184312,-0.242277,0.398273,-0.074202,-0.114788,-0.087099,-0.134606,-0.037024,-0.058601,-0.037024,-0.064216,0.10185,-0.026171,-0.037024,-0.026171,-0.026171,-0.037024,0.443381,-0.147237,-0.192111,-0.174141,-0.290925,-0.343273,-0.098363,-0.994195,-0.074202,-0.087099,1.509747,-0.161194,-0.215785,-0.094752,0.528571,-0.087099,-0.493401,-0.069385,-0.037024,-0.026171,0.134606,-0.026171,-0.026171,-0.026171,-0.087099,-0.058601,-0.064216,-0.117811,-0.026171,-0.037024,-0.188246,-0.026171,-0.208741,-0.423319,-0.026171,-0.421067,-0.282537,-0.037024,-0.131946,1.35414,-0.405169,-0.134606,-0.10185,1.509747,-1.217365,-0.309888,-0.192111,-0.098363,1.410829,-1.278381,-0.045361,-0.139784,-0.333219,-0.026171,0.372492,-0.333219,-0.875802,1.120584,-0.129235,-0.064216,-0.045361,-0.300506,-0.156667,1.166845,-0.161194,-0.894259,-0.17827,-0.215785,-0.161194,-0.037024,0.33701,-0.422194,-0.317784,-0.290925,0.729136,-0.163415,-0.421067,-0.335749,1.578327,-0.230986,-0.161194,-0.316477,-0.645902,-0.114788,-0.152018,-0.098363,-0.180304,-0.163415,-0.195909,0.402876,-0.026171,0.149645,-0.111688,-0.069385,-0.037024,-0.052396,0.984706,-0.186288,-0.444486,-0.026171,-0.643774,-0.263722,0.263722,-0.262234,-0.137218,-0.045361,-0.026171,0.30589,-0.27107,-0.16561,1.220838,-1.006528,-0.098363,-0.058601,-0.147237,-0.154359,-0.10185,-0.026171,0.27107,-0.129235,-0.152018,-0.592968,1.05602,-0.117811,-0.522206,-0.064216,0.731341,-0.114788,-0.253172,-0.07873,-0.600353,-0.563446,1.567811,-0.941115,-0.045361,-0.184312,-0.098363,-0.045361,0.220946,-0.037024,-0.156667,-0.07873,-0.069385,0.194018,-0.25622,-0.144792,0.29915,-0.037024,-0.037024,-0.045361,0.069385,-0.205144,-0.195909,-0.346999,-0.087099,0.488031,-0.037024,0.195909,-0.037024,-0.186288,-0.026171,-0.174141,-0.052396,-0.037024,-0.07873,-0.058601,-0.058601,-0.301858,-0.045361,0.390159,-0.272522,-0.052396,-0.091003,-0.117811,0.467491,-0.30589
1,-0.872264,0.460162,-0.091855,-0.071812,2.178881,0.15668,-0.42943,-0.570555,1.171591,-0.641008,0.466305,0.257052,-0.794891,-0.482347,-0.819684,0.78947,-0.76136,0.163723,-0.219237,-0.318574,0.600289,-0.10789,0.319621,1.625638,-0.704242,-0.359202,-0.270116,-0.069385,0.007286,-0.083017,-0.215785,-0.105227,0.517956,-0.418812,-0.064216,0.064216,-0.703962,-0.169923,-0.083017,0.760251,-0.212287,-0.188246,-0.158945,0.33701,-0.468578,-0.262234,5.481171,-0.052396,-1.605199,0.23749,-0.215785,-0.094752,-0.108503,-0.037024,-0.105227,-0.203325,-0.139784,-0.338268,-0.190187,-0.27107,-0.239094,-0.161194,-0.108503,-0.186288,-0.426687,-0.07873,-0.229337,-0.169923,-0.235877,-0.289539,-0.131946,-0.230986,-0.205144,-0.250096,-0.131946,-0.163415,11.473319,-0.184312,4.124686,-2.50912,-0.074202,-0.114788,-0.087099,-0.134606,-0.037024,-0.058601,-0.037024,-0.064216,0.10185,-0.026171,-0.037024,-0.026171,-0.026171,-0.037024,0.443381,-0.147237,-0.192111,-0.174141,-0.290925,-0.343273,-0.098363,1.00515,-0.074202,-0.087099,-0.661909,-0.161194,-0.215785,-0.094752,0.528571,-0.087099,-0.493401,-0.069385,-0.037024,-0.026171,0.134606,-0.026171,-0.026171,-0.026171,-0.087099,-0.058601,-0.064216,-0.117811,-0.026171,-0.037024,-0.188246,-0.026171,-0.208741,-0.423319,-0.026171,2.37329,-0.282537,-0.037024,-0.131946,-0.73797,-0.405169,-0.134606,-0.10185,-0.661909,0.820884,-0.309888,-0.192111,-0.098363,-0.708318,0.781703,-0.045361,-0.139784,-0.333219,-0.026171,0.372492,-0.333219,1.141029,-0.891781,-0.129235,-0.064216,-0.045361,-0.300506,-0.156667,1.166845,-0.161194,-0.894259,-0.17827,-0.215785,-0.161194,-0.037024,0.33701,-0.422194,3.144637,-0.290925,-1.370546,-0.163415,2.37329,-0.335749,-0.633148,-0.230986,-0.161194,-0.316477,-0.645902,-0.114788,-0.152018,-0.098363,-0.180304,-0.163415,-0.195909,0.402876,-0.026171,0.149645,-0.111688,-0.069385,-0.037024,-0.052396,0.984706,-0.186288,-0.444486,-0.026171,-0.643774,-0.263722,0.263722,-0.262234,-0.137218,-0.045361,-0.026171,0.30589,-0.27107,-0.16561,-0.818548,0.992834,-0.098363,-0.058601,-0.147237,-0.154359,-0.10185,-0.026171,0.27107,-0.129235,-0.152018,-0.592968,-0.946303,-0.117811,1.913642,-0.064216,0.731341,-0.114788,-0.253172,-0.07873,-0.600353,-0.563446,1.567811,-0.941115,-0.045361,-0.184312,-0.098363,-0.045361,0.220946,-0.037024,-0.156667,-0.07873,-0.069385,0.194018,-0.25622,-0.144792,0.29915,-0.037024,-0.037024,-0.045361,0.069385,-0.205144,-0.195909,-0.346999,-0.087099,0.488031,-0.037024,0.195909,-0.037024,-0.186288,-0.026171,-0.174141,-0.052396,-0.037024,-0.07873,-0.058601,-0.058601,-0.301858,-0.045361,0.390159,-0.272522,-0.052396,-0.091003,-0.117811,0.467491,-0.30589
2,0.07335,-0.084607,0.073455,0.651256,-0.517023,0.984415,0.82993,0.325803,0.092875,-0.30154,-0.313261,-0.627611,1.188943,0.514836,1.107431,0.78947,1.227165,0.163723,-0.219237,-0.318574,0.600289,0.933906,0.319621,-0.751918,-0.070337,-0.359202,-0.270116,-0.069385,0.53597,-0.083017,-0.215785,-0.105227,0.517956,-0.418812,-0.064216,0.064216,1.419559,-0.169923,-0.083017,-1.314453,-0.212287,-0.188246,-0.158945,0.33701,-0.468578,-0.262234,-0.182318,-0.052396,0.622549,0.23749,-0.215785,-0.094752,-0.108503,-0.037024,-0.105227,-0.203325,-0.139784,2.954209,-0.190187,-0.27107,-0.239094,-0.161194,-0.108503,-0.186288,-0.426687,-0.07873,-0.229337,-0.169923,-0.235877,-0.289539,-0.131946,-0.230986,-0.205144,-0.250096,-0.131946,-0.163415,-0.087099,-0.184312,-0.242277,0.398273,-0.074202,-0.114788,-0.087099,-0.134606,-0.037024,-0.058601,-0.037024,-0.064216,0.10185,-0.026171,-0.037024,-0.026171,-0.026171,-0.037024,0.443381,-0.147237,-0.192111,-0.174141,-0.290925,-0.343273,-0.098363,-0.994195,-0.074202,-0.087099,1.509747,-0.161194,-0.215785,-0.094752,0.528571,-0.087099,-0.493401,-0.069385,-0.037024,-0.026171,0.134606,-0.026171,-0.026171,-0.026171,-0.087099,-0.058601,-0.064216,-0.117811,-0.026171,-0.037024,-0.188246,-0.026171,-0.208741,-0.423319,-0.026171,-0.421067,-0.282537,-0.037024,-0.131946,1.35414,-0.405169,-0.134606,-0.10185,1.509747,-1.217365,-0.309888,-0.192111,-0.098363,1.410829,-1.278381,-0.045361,-0.139784,-0.333219,-0.026171,0.372492,-0.333219,-0.875802,1.120584,-0.129235,-0.064216,-0.045361,-0.300506,-0.156667,1.166845,-0.161194,-0.894259,-0.17827,-0.215785,-0.161194,-0.037024,0.33701,-0.422194,-0.317784,3.434957,-1.370546,-0.163415,-0.421067,-0.335749,1.578327,-0.230986,-0.161194,-0.316477,-0.645902,-0.114788,-0.152018,-0.098363,-0.180304,-0.163415,-0.195909,0.402876,-0.026171,0.149645,-0.111688,-0.069385,-0.037024,-0.052396,0.984706,-0.186288,-0.444486,-0.026171,-0.643774,-0.263722,0.263722,-0.262234,-0.137218,-0.045361,-0.026171,0.30589,-0.27107,-0.16561,1.220838,-1.006528,-0.098363,-0.058601,-0.147237,-0.154359,-0.10185,-0.026171,0.27107,-0.129235,-0.152018,-0.592968,-0.946303,-0.117811,1.913642,-0.064216,0.731341,-0.114788,-0.253172,-0.07873,-0.600353,-0.563446,1.567811,-0.941115,-0.045361,-0.184312,-0.098363,-0.045361,0.220946,-0.037024,-0.156667,-0.07873,-0.069385,0.194018,-0.25622,-0.144792,0.29915,-0.037024,-0.037024,-0.045361,0.069385,-0.205144,-0.195909,-0.346999,-0.087099,0.488031,-0.037024,0.195909,-0.037024,-0.186288,-0.026171,-0.174141,-0.052396,-0.037024,-0.07873,-0.058601,-0.058601,-0.301858,-0.045361,0.390159,-0.272522,-0.052396,-0.091003,-0.117811,0.467491,-0.30589
3,0.309753,-0.447787,-0.096864,0.651256,-0.517023,-1.862993,-0.720051,-0.570555,-0.499103,-0.061648,-0.687089,-0.521555,0.936955,0.383528,1.107431,-1.025689,-0.76136,0.163723,-0.219237,0.296662,0.600289,0.80889,1.672219,-0.751918,-0.175988,4.091122,-0.270116,-0.069385,-0.515105,-0.083017,-0.215785,-0.105227,0.517956,-0.418812,-0.064216,0.064216,1.419559,-0.169923,-0.083017,-1.314453,-0.212287,-0.188246,-0.158945,0.33701,2.132654,-0.262234,-0.182318,-0.052396,-1.605199,0.23749,-0.215785,-0.094752,-0.108503,-0.037024,-0.105227,-0.203325,-0.139784,-0.338268,5.254382,-0.27107,-0.239094,-0.161194,-0.108503,-0.186288,-0.426687,-0.07873,-0.229337,-0.169923,-0.235877,-0.289539,-0.131946,-0.230986,-0.205144,-0.250096,-0.131946,-0.163415,-0.087099,-0.184312,-0.242277,0.398273,-0.074202,-0.114788,-0.087099,-0.134606,-0.037024,-0.058601,-0.037024,-0.064216,0.10185,-0.026171,-0.037024,-0.026171,-0.026171,-0.037024,0.443381,-0.147237,-0.192111,-0.174141,-0.290925,-0.343273,-0.098363,-0.994195,-0.074202,-0.087099,1.509747,-0.161194,-0.215785,-0.094752,0.528571,-0.087099,-0.493401,-0.069385,-0.037024,-0.026171,0.134606,-0.026171,-0.026171,-0.026171,-0.087099,-0.058601,-0.064216,-0.117811,-0.026171,-0.037024,-0.188246,-0.026171,-0.208741,-0.423319,-0.026171,-0.421067,-0.282537,-0.037024,-0.131946,-0.73797,2.466416,-0.134606,-0.10185,-0.661909,0.820884,-0.309888,-0.192111,-0.098363,-0.708318,0.781703,-0.045361,-0.139784,-0.333219,-0.026171,0.372492,2.998972,-0.875802,-0.891781,-0.129235,-0.064216,-0.045361,-0.300506,-0.156667,-0.856425,-0.161194,1.117479,-0.17827,4.631073,-0.161194,-0.037024,-2.965237,-0.422194,-0.317784,-0.290925,0.729136,-0.163415,2.37329,-0.335749,-0.633148,-0.230986,-0.161194,-0.316477,-0.645902,-0.114788,-0.152018,-0.098363,-0.180304,-0.163415,-0.195909,0.402876,-0.026171,0.149645,-0.111688,-0.069385,-0.037024,-0.052396,-1.014836,-0.186288,2.24825,-0.026171,-0.643774,-0.263722,0.263722,-0.262234,-0.137218,-0.045361,-0.026171,0.30589,-0.27107,-0.16561,1.220838,-1.006528,-0.098363,-0.058601,-0.147237,-0.154359,-0.10185,-0.026171,0.27107,-0.129235,-0.152018,1.685277,-0.946303,-0.117811,-0.522206,-0.064216,-1.366415,-0.114788,-0.253172,-0.07873,1.664545,-0.563446,-0.637395,1.061841,-0.045361,-0.184312,-0.098363,-0.045361,0.220946,-0.037024,-0.156667,-0.07873,-0.069385,0.194018,-0.25622,-0.144792,0.29915,-0.037024,-0.037024,-0.045361,0.069385,-0.205144,-0.195909,-0.346999,-0.087099,0.488031,-0.037024,0.195909,-0.037024,-0.186288,-0.026171,-0.174141,-0.052396,-0.037024,-0.07873,-0.058601,-0.058601,-0.301858,-0.045361,0.390159,3.66691,-0.052396,-0.091003,-0.117811,-2.137613,-0.30589
4,0.07335,0.641752,0.37502,1.374324,-0.517023,0.951306,0.733056,1.366021,0.46341,-0.174805,0.199611,-0.045596,1.617323,1.298881,1.107431,0.78947,1.227165,1.389547,-0.219237,1.527133,0.600289,0.892234,1.672219,0.77993,0.563567,-0.359202,-0.270116,-0.069385,0.869545,-0.083017,-0.215785,-0.105227,0.517956,-0.418812,-0.064216,0.064216,1.419559,-0.169923,-0.083017,-1.314453,-0.212287,-0.188246,-0.158945,0.33701,-0.468578,-0.262234,5.481171,-0.052396,-1.605199,0.23749,-0.215785,-0.094752,-0.108503,-0.037024,-0.105227,-0.203325,-0.139784,-0.338268,-0.190187,-0.27107,-0.239094,-0.161194,-0.108503,-0.186288,-0.426687,-0.07873,-0.229337,5.880991,-0.235877,-0.289539,-0.131946,-0.230986,-0.205144,-0.250096,-0.131946,-0.163415,-0.087099,-0.184312,-0.242277,0.398273,-0.074202,-0.114788,-0.087099,-0.134606,-0.037024,-0.058601,-0.037024,-0.064216,0.10185,-0.026171,-0.037024,-0.026171,-0.026171,-0.037024,0.443381,-0.147237,-0.192111,-0.174141,-0.290925,-0.343273,-0.098363,-0.994195,-0.074202,-0.087099,1.509747,-0.161194,-0.215785,-0.094752,0.528571,-0.087099,-0.493401,-0.069385,-0.037024,-0.026171,0.134606,-0.026171,-0.026171,-0.026171,-0.087099,-0.058601,-0.064216,-0.117811,-0.026171,-0.037024,-0.188246,-0.026171,-0.208741,-0.423319,-0.026171,-0.421067,-0.282537,-0.037024,-0.131946,1.35414,-0.405169,-0.134606,-0.10185,1.509747,-1.217365,-0.309888,-0.192111,-0.098363,1.410829,-1.278381,-0.045361,-0.139784,-0.333219,-0.026171,0.372492,-0.333219,-0.875802,1.120584,-0.129235,-0.064216,-0.045361,-0.300506,-0.156667,1.166845,-0.161194,-0.894259,-0.17827,-0.215785,-0.161194,-0.037024,0.33701,2.366959,-0.317784,-0.290925,-1.370546,-0.163415,-0.421067,-0.335749,1.578327,-0.230986,-0.161194,-0.316477,-0.645902,-0.114788,-0.152018,-0.098363,-0.180304,-0.163415,-0.195909,0.402876,-0.026171,0.149645,-0.111688,-0.069385,-0.037024,-0.052396,0.984706,-0.186288,-0.444486,-0.026171,-0.643774,-0.263722,0.263722,-0.262234,-0.137218,-0.045361,-0.026171,0.30589,-0.27107,-0.16561,1.220838,-1.006528,-0.098363,-0.058601,-0.147237,-0.154359,-0.10185,-0.026171,0.27107,-0.129235,-0.152018,-0.592968,-0.946303,-0.117811,1.913642,-0.064216,0.731341,-0.114788,-0.253172,-0.07873,-0.600353,-0.563446,1.567811,-0.941115,-0.045361,-0.184312,-0.098363,-0.045361,0.220946,-0.037024,-0.156667,-0.07873,-0.069385,0.194018,-0.25622,-0.144792,0.29915,-0.037024,-0.037024,-0.045361,0.069385,-0.205144,-0.195909,-0.346999,-0.087099,0.488031,-0.037024,0.195909,-0.037024,-0.186288,-0.026171,-0.174141,-0.052396,-0.037024,-0.07873,-0.058601,-0.058601,-0.301858,-0.045361,0.390159,-0.272522,-0.052396,-0.091003,-0.117811,0.467491,-0.30589


In [54]:
GradB = Pipeline([
    ('prep', Prep), 
    ('bag', GradientBoostingRegressor(n_estimators=100))
])

param_grid = [
    {'bag__n_estimators' : [200, 500, 800]},
    {'bag__loss' : ['ls', 'lad', 'huber']},
    {'bag__learning_rate' : [0.01, 0.05, 0.1, 0.2]},
    {'bag__max_depth' : [2,3,5,7]},
    {'bag__subsample' : [0.7, 1.0]},
    {'bag__max_features' : [None, 'sqrt']}
]

grid_search2 = GridSearchCV(GradB, param_grid, cv=3, scoring=make_scorer(mean_squared_log_error))
grid_search2.fit(df.drop(["SalePrice"], axis=1), df.SalePrice)
print grid_search2.grid_scores_
print grid_search2.best_params_

[mean: 0.01712, std: 0.00282, params: {'bag__n_estimators': 200}, mean: 0.01727, std: 0.00332, params: {'bag__n_estimators': 500}, mean: 0.01704, std: 0.00315, params: {'bag__n_estimators': 800}, mean: 0.01810, std: 0.00300, params: {'bag__loss': 'ls'}, mean: 0.01870, std: 0.00223, params: {'bag__loss': 'lad'}, mean: 0.01798, std: 0.00209, params: {'bag__loss': 'huber'}, mean: 0.06133, std: 0.00399, params: {'bag__learning_rate': 0.01}, mean: 0.02071, std: 0.00328, params: {'bag__learning_rate': 0.05}, mean: 0.01805, std: 0.00301, params: {'bag__learning_rate': 0.1}, mean: 0.01853, std: 0.00274, params: {'bag__learning_rate': 0.2}, mean: 0.01987, std: 0.00342, params: {'bag__max_depth': 2}, mean: 0.01819, std: 0.00298, params: {'bag__max_depth': 3}, mean: 0.01834, std: 0.00189, params: {'bag__max_depth': 5}, mean: 0.02011, std: 0.00160, params: {'bag__max_depth': 7}, mean: 0.01761, std: 0.00290, params: {'bag__subsample': 0.7}, mean: 0.01808, std: 0.00295, params: {'bag__subsample': 1.



In [57]:
zip(grid_search2.cv_results_["mean_test_score"],grid_search2.cv_results_["params"])

[(0.017120352732740023, {'bag__n_estimators': 200}),
 (0.017266216989404216, {'bag__n_estimators': 500}),
 (0.01703890340278582, {'bag__n_estimators': 800}),
 (0.01810272661845206, {'bag__loss': 'ls'}),
 (0.018702586059728035, {'bag__loss': 'lad'}),
 (0.017979771608930148, {'bag__loss': 'huber'}),
 (0.061331067303454216, {'bag__learning_rate': 0.01}),
 (0.020714888749423, {'bag__learning_rate': 0.05}),
 (0.018049871701812069, {'bag__learning_rate': 0.1}),
 (0.018525068439091592, {'bag__learning_rate': 0.2}),
 (0.0198690825992314, {'bag__max_depth': 2}),
 (0.018186019860501508, {'bag__max_depth': 3}),
 (0.018340559952578914, {'bag__max_depth': 5}),
 (0.020107143212031268, {'bag__max_depth': 7}),
 (0.017613709245566749, {'bag__subsample': 0.7}),
 (0.018083595022720115, {'bag__subsample': 1.0}),
 (0.018013301315519568, {'bag__max_features': None}),
 (0.019458297071736564, {'bag__max_features': 'sqrt'})]

In [14]:
from sklearn.model_selection import RandomizedSearchCV
model = Pipeline([
    ('prep', Prep), 
    ('bag', GradientBoostingRegressor(n_estimators=800, loss='huber', subsample=0.7))
])

param_grid = {'bag__n_estimators' : [700, 800, 900], 'bag__max_depth' : [3,4],
    'bag__subsample' : [0.7, 0.8, 0.9], 'bag__loss': ['huber']}

grid_search3 = RandomizedSearchCV(model, param_grid, cv=3, scoring=make_scorer(mean_squared_log_error))
grid_search3.fit(df.drop(["SalePrice"], axis=1), df.SalePrice)
zip(grid_search3.cv_results_["mean_test_score"],grid_search3.cv_results_["params"])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._update_inplace(new_data)


[(0.016881147365104109,
  {'bag__loss': 'huber',
   'bag__max_depth': 4,
   'bag__n_estimators': 800,
   'bag__subsample': 0.8}),
 (0.016464509744430044,
  {'bag__loss': 'huber',
   'bag__max_depth': 3,
   'bag__n_estimators': 900,
   'bag__subsample': 0.7}),
 (0.017918144250430219,
  {'bag__loss': 'huber',
   'bag__max_depth': 3,
   'bag__n_estimators': 900,
   'bag__subsample': 0.9}),
 (0.017144690120745203,
  {'bag__loss': 'huber',
   'bag__max_depth': 4,
   'bag__n_estimators': 700,
   'bag__subsample': 0.8}),
 (0.016854693023648438,
  {'bag__loss': 'huber',
   'bag__max_depth': 4,
   'bag__n_estimators': 900,
   'bag__subsample': 0.8}),
 (0.017417189058639999,
  {'bag__loss': 'huber',
   'bag__max_depth': 4,
   'bag__n_estimators': 900,
   'bag__subsample': 0.7}),
 (0.016536558218087784,
  {'bag__loss': 'huber',
   'bag__max_depth': 3,
   'bag__n_estimators': 900,
   'bag__subsample': 0.8}),
 (0.016811441158136389,
  {'bag__loss': 'huber',
   'bag__max_depth': 3,
   'bag__n_estima

In [46]:
model = Pipeline([
    ('prep', Prep), 
    ('bag', GradientBoostingRegressor(n_estimators=15000, loss='huber', subsample=0.5, learning_rate=0.005))
])

res = cross_val_score(model, df.drop("SalePrice", axis=1), df.SalePrice, cv=3, scoring=make_scorer(mean_squared_log_error))
res, res.mean()

(array([ 0.01230486,  0.01765054,  0.01450896]), 0.014821455197180403)

In [45]:
model = Pipeline([
    ('prep', Prep), 
    ('bag', GradientBoostingRegressor(n_estimators=30000, loss='huber', subsample=0.4, learning_rate=0.003))
])

res = cross_val_score(model, df.drop("SalePrice", axis=1), df.SalePrice, cv=3, scoring=make_scorer(mean_squared_log_error))
res, res.mean()

(array([ 0.01241914,  0.01806286,  0.01441576]), 0.01496591939471281)

# Try it on the test data and make a submission to Kaggle

In [47]:
# Train on entire training dataset 
model = Pipeline([
    ('prep', Prep), 
    ('bag', GradientBoostingRegressor(n_estimators=15000, loss='huber', subsample=0.5, learning_rate=0.005))
])

model.fit(df.drop(['SalePrice'],axis=1), df.SalePrice)

df_test = pd.read_csv("test.csv")
test_predictions = model.predict(df_test)

pd.DataFrame(test_predictions).describe()

Unnamed: 0,0
count,1459.0
mean,179000.727532
std,77200.849721
min,25181.610278
25%,128692.974084
50%,158678.076182
75%,208891.284239
max,562712.156114


In [50]:
df_test_predictions = pd.DataFrame([df_test.Id.astype(int) , test_predictions]).T
df_test_predictions.columns = ['Id', 'SalePrice']
df_test_predictions['Id'] =df_test_predictions['Id'].astype(int)
df_test_predictions.to_csv("Submission_3.csv", index=False)

In [51]:
df_test_predictions

Unnamed: 0,Id,SalePrice
0,1461,123762.045516
1,1462,161510.617074
2,1463,191487.406375
3,1464,195024.450031
4,1465,183478.063506
5,1466,175835.892124
6,1467,171006.865419
7,1468,168753.309245
8,1469,182913.360386
9,1470,130609.699091
