In [1]:
import os
import sys
import logging
import re
import time

import pandas as pd
import numpy as np
import sklearn.metrics
import sklearn.preprocessing
import sklearn.model_selection
import sklearn.preprocessing
import sklearn.pipeline

import sklearn.random_projection
import sklearn.decomposition 
import sklearn.cluster
import sklearn.feature_selection

import xgboost
import sklearn.svm
import sklearn.linear_model
import sklearn.ensemble
import sklearn.gaussian_process
import sklearn.kernel_ridge
import sklearn.tree
import tensorflow as tf

import matplotlib.pyplot as plt
import plotly
import plotly.graph_objects as go
import plotly.offline

In [2]:
plotly.offline.init_notebook_mode(connected=True)
pd.options.mode.chained_assignment = None
pd.options.display.max_rows = 500
pd.options.display.max_columns = None
pd.options.display.max_colwidth = 160

log = logging.getLogger(name=__name__)
log.setLevel(logging.INFO)
logging.captureWarnings(True)
formatter = logging.Formatter(
    '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
stream_handler = logging.StreamHandler()
stream_handler.setLevel(logging.INFO)

stream_handler.setFormatter(formatter)
log.addHandler(stream_handler)

log.info(f"Python version: {sys.version}")
log.info(f"Numpy version: {np.__version__}")
log.info(f"Pandas version: {pd.__version__}")
log.info(f"Scikit-learn version: {sklearn.__version__}")
log.info(f"Plotly version: {plotly.__version__}")

2020-12-01 09:25:58,188 - __main__ - INFO - Python version: 3.8.5 (default, Jul 28 2020, 12:59:40) 
[GCC 9.3.0]
2020-12-01 09:25:58,189 - __main__ - INFO - Numpy version: 1.19.4
2020-12-01 09:25:58,189 - __main__ - INFO - Pandas version: 1.1.4
2020-12-01 09:25:58,189 - __main__ - INFO - Scikit-learn version: 0.23.2
2020-12-01 09:25:58,220 - __main__ - INFO - Plotly version: 4.13.0


In [3]:
data_bn = "data"
data_dir = os.path.abspath(
    os.path.join(__name__, os.pardir, os.pardir, data_bn)
)

log.info(f"Data directory: {data_dir}")

train_bn = "train.csv"
test_bn = "test.csv"
train_fn = os.path.join(data_dir, train_bn)
test_fn = os.path.join(data_dir, test_bn)

df_train = pd.read_csv(train_fn)
df_test = pd.read_csv(test_fn)

log.info(f"Training data shape: {df_train.shape}")
log.info(f"Test data shape: {df_test.shape}")

train_pts = df_train.shape[0]

2020-12-01 09:25:58,281 - __main__ - INFO - Data directory: /home/jamescorbin/GIT/kaggle/house_prices_regression/data
2020-12-01 09:25:58,318 - __main__ - INFO - Training data shape: (1460, 81)
2020-12-01 09:25:58,319 - __main__ - INFO - Test data shape: (1459, 80)


In [4]:
df_train.head(5)

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,2003,2003,Gable,CompShg,VinylSd,VinylSd,BrkFace,196.0,Gd,TA,PConc,Gd,TA,No,GLQ,706,Unf,0,150,856,GasA,Ex,Y,SBrkr,856,854,0,1710,1,0,2,1,3,1,Gd,8,Typ,0,,Attchd,2003.0,RFn,2,548,TA,TA,Y,0,61,0,0,0,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,FR2,Gtl,Veenker,Feedr,Norm,1Fam,1Story,6,8,1976,1976,Gable,CompShg,MetalSd,MetalSd,,0.0,TA,TA,CBlock,Gd,TA,Gd,ALQ,978,Unf,0,284,1262,GasA,Ex,Y,SBrkr,1262,0,0,1262,0,1,2,0,3,1,TA,6,Typ,1,TA,Attchd,1976.0,RFn,2,460,TA,TA,Y,298,0,0,0,0,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,Inside,Gtl,CollgCr,Norm,Norm,1Fam,2Story,7,5,2001,2002,Gable,CompShg,VinylSd,VinylSd,BrkFace,162.0,Gd,TA,PConc,Gd,TA,Mn,GLQ,486,Unf,0,434,920,GasA,Ex,Y,SBrkr,920,866,0,1786,1,0,2,1,3,1,Gd,6,Typ,1,TA,Attchd,2001.0,RFn,2,608,TA,TA,Y,0,42,0,0,0,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,Corner,Gtl,Crawfor,Norm,Norm,1Fam,2Story,7,5,1915,1970,Gable,CompShg,Wd Sdng,Wd Shng,,0.0,TA,TA,BrkTil,TA,Gd,No,ALQ,216,Unf,0,540,756,GasA,Gd,Y,SBrkr,961,756,0,1717,1,0,1,0,3,1,Gd,7,Typ,1,Gd,Detchd,1998.0,Unf,3,642,TA,TA,Y,0,35,272,0,0,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,FR2,Gtl,NoRidge,Norm,Norm,1Fam,2Story,8,5,2000,2000,Gable,CompShg,VinylSd,VinylSd,BrkFace,350.0,Gd,TA,PConc,Gd,TA,Av,GLQ,655,Unf,0,490,1145,GasA,Ex,Y,SBrkr,1145,1053,0,2198,1,0,2,1,4,1,Gd,9,Typ,1,TA,Attchd,2000.0,RFn,3,836,TA,TA,Y,192,84,0,0,0,0,,,,0,12,2008,WD,Normal,250000


In [5]:
df_test.head(5)

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition
0,1461,20,RH,80.0,11622,Pave,,Reg,Lvl,AllPub,Inside,Gtl,NAmes,Feedr,Norm,1Fam,1Story,5,6,1961,1961,Gable,CompShg,VinylSd,VinylSd,,0.0,TA,TA,CBlock,TA,TA,No,Rec,468.0,LwQ,144.0,270.0,882.0,GasA,TA,Y,SBrkr,896,0,0,896,0.0,0.0,1,0,2,1,TA,5,Typ,0,,Attchd,1961.0,Unf,1.0,730.0,TA,TA,Y,140,0,0,0,120,0,,MnPrv,,0,6,2010,WD,Normal
1,1462,20,RL,81.0,14267,Pave,,IR1,Lvl,AllPub,Corner,Gtl,NAmes,Norm,Norm,1Fam,1Story,6,6,1958,1958,Hip,CompShg,Wd Sdng,Wd Sdng,BrkFace,108.0,TA,TA,CBlock,TA,TA,No,ALQ,923.0,Unf,0.0,406.0,1329.0,GasA,TA,Y,SBrkr,1329,0,0,1329,0.0,0.0,1,1,3,1,Gd,6,Typ,0,,Attchd,1958.0,Unf,1.0,312.0,TA,TA,Y,393,36,0,0,0,0,,,Gar2,12500,6,2010,WD,Normal
2,1463,60,RL,74.0,13830,Pave,,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,5,5,1997,1998,Gable,CompShg,VinylSd,VinylSd,,0.0,TA,TA,PConc,Gd,TA,No,GLQ,791.0,Unf,0.0,137.0,928.0,GasA,Gd,Y,SBrkr,928,701,0,1629,0.0,0.0,2,1,3,1,TA,6,Typ,1,TA,Attchd,1997.0,Fin,2.0,482.0,TA,TA,Y,212,34,0,0,0,0,,MnPrv,,0,3,2010,WD,Normal
3,1464,60,RL,78.0,9978,Pave,,IR1,Lvl,AllPub,Inside,Gtl,Gilbert,Norm,Norm,1Fam,2Story,6,6,1998,1998,Gable,CompShg,VinylSd,VinylSd,BrkFace,20.0,TA,TA,PConc,TA,TA,No,GLQ,602.0,Unf,0.0,324.0,926.0,GasA,Ex,Y,SBrkr,926,678,0,1604,0.0,0.0,2,1,3,1,Gd,7,Typ,1,Gd,Attchd,1998.0,Fin,2.0,470.0,TA,TA,Y,360,36,0,0,0,0,,,,0,6,2010,WD,Normal
4,1465,120,RL,43.0,5005,Pave,,IR1,HLS,AllPub,Inside,Gtl,StoneBr,Norm,Norm,TwnhsE,1Story,8,5,1992,1992,Gable,CompShg,HdBoard,HdBoard,,0.0,Gd,TA,PConc,Gd,TA,No,ALQ,263.0,Unf,0.0,1017.0,1280.0,GasA,Ex,Y,SBrkr,1280,0,0,1280,0.0,0.0,2,0,2,1,Gd,5,Typ,0,,Attchd,1992.0,RFn,2.0,506.0,TA,TA,Y,0,82,0,0,144,0,,,,0,1,2010,WD,Normal


In [6]:
y_col = "SalePrice"

In [7]:
log.info(f"Number of training dataset columns: {len(df_train.columns)}.")
df_train.columns[:10]

2020-12-01 09:25:58,519 - __main__ - INFO - Number of training dataset columns: 81.


Index(['Id', 'MSSubClass', 'MSZoning', 'LotFrontage', 'LotArea', 'Street',
       'Alley', 'LotShape', 'LandContour', 'Utilities'],
      dtype='object')

In [8]:
description_fn = os.path.join(data_dir, "data_description.txt")

with open(description_fn, 'r') as f:
    desc = [x for x in f]

In [9]:
feat_re = re.compile("\w+(?:)")

feat_search = []

for i, line in enumerate(desc):
    a = feat_re.match(line)
    if a:
        feat_search.append((i, a.group()))

In [10]:
desc[:10]

['MSSubClass: Identifies the type of dwelling involved in the sale.\t\n',
 '\n',
 '        20\t1-STORY 1946 & NEWER ALL STYLES\n',
 '        30\t1-STORY 1945 & OLDER\n',
 '        40\t1-STORY W/FINISHED ATTIC ALL AGES\n',
 '        45\t1-1/2 STORY - UNFINISHED ALL AGES\n',
 '        50\t1-1/2 STORY FINISHED ALL AGES\n',
 '        60\t2-STORY 1946 & NEWER\n',
 '        70\t2-STORY 1945 & OLDER\n',
 '        75\t2-1/2 STORY ALL AGES\n']

In [11]:
feat_search[:10]

[(0, 'MSSubClass'),
 (19, 'MSZoning'),
 (30, 'LotFrontage'),
 (32, 'LotArea'),
 (34, 'Street'),
 (39, 'Alley'),
 (45, 'LotShape'),
 (52, 'LandContour'),
 (59, 'Utilities'),
 (66, 'LotConfig')]

In [12]:
cat_feats = []
cont_feats = []

STEP = 2

for i, couple in enumerate(feat_search[:-1]):
    if feat_search[i+1][0] - couple[0] > STEP:
        cat_feats.append(couple[1])
    else:
        cont_feats.append(couple[1])
        
if len(desc) - feat_search[-1][0] > STEP:
    cat_feats.append(feat_search[-1][1])
else:
    cont_feats.append(feat_search[-1][1])

In [13]:
cat_feats[:10]

['MSSubClass',
 'MSZoning',
 'Street',
 'Alley',
 'LotShape',
 'LandContour',
 'Utilities',
 'LotConfig',
 'LandSlope',
 'Neighborhood']

In [14]:
cont_feats[:10]

['LotFrontage',
 'LotArea',
 'YearBuilt',
 'YearRemodAdd',
 'MasVnrArea',
 'BsmtFinSF1',
 'BsmtFinSF2',
 'BsmtUnfSF',
 'TotalBsmtSF',
 '1stFlrSF']

In [15]:
try:
    cont_feats.remove("Kitchen")
    cont_feats.append("KitchenAbvGr")
except ValueError as e:
    log.error(e)
try:
    cont_feats.remove("Bedroom")
    cont_feats.append("BedroomAbvGr")
except ValueError as e:
    log.error(e)

#cat_feats.remove("Neighborhood")

In [16]:
UNK = "UNK"
RANK = "rank"
NUMBER = "number"
FREQUENCY = "frequency"


class OrdinalEncoderExt(sklearn.preprocessing.OrdinalEncoder):
    """
    """

    def __init__(self,
                 top_n=None, 
                 count_thresh=None, 
                 freq_thresh=None, 
                 categories="auto", 
                 **kwargs,
    ):
        """
        """
        super(OrdinalEncoderExt, self).__init__(
            categories=categories,
            **kwargs
        )
        if top_n is not None:
            self.criterion = RANK
            try:
                self.criterion_val = int(top_n)
            except ValueError as e:
                log.error(e)
        elif count_thresh is not None:
            self.criterion = NUMBER
            try:
                self.criterion_val = int(count_thresh)
            except ValueError as e:
                log.error(e)
        elif freq_thresh is not None:
            self.criterion = FREQUENCY
            try:
                self.criterion_val = float(freq_thresh)
            except ValueError as e:
                log.error(e)
        else:
            self.criterion = ""
            self.criterion_val = None


    def fit(self, X):
        """
        """
        try:
            X = np.array(X)
        except ValueError as e:
            log.error(e)
        assert (len(X.shape)==2), "Require 2D array"
        
        X = X.astype(str)
        
        Y = np.full(X.shape, "", dtype="U20")
        for j in range(X.shape[1]):
            unique_elem, elem_locs, elem_counts = (
                np.unique(
                    X[:, j],
                    return_inverse=True,
                    return_counts=True,
                )
            )

            if self.criterion == RANK:
                a = np.argpartition(elem_counts, self.criterion_val)
                for t in a:
                    Y[elem_locs[t], j] = unique_elem[t]
            elif self.criterion == NUMBER:
                for i, t in np.ndenumerate(elem_counts):
                    if t >= self.criterion_val:
                        Y[elem_locs[i], j] = unique_elem[i]
            elif self.criterion == FREQUENCY:
                for i, t in np.ndenumerate(elem_counts):
                    if t/X.shape[0] >= self.criterion_val:
                        Y[elem_locs[i], j] = unique_elem[i]
            else:
                Y[:, j] = X[:, j]
            Y[np.where(Y[:, j]==''), j] = UNK

        tmp = np.full(X.shape[1], UNK).reshape((1, -1))
        Y = np.append(Y, tmp, axis=0)

        super(OrdinalEncoderExt, self).fit(Y)
        
        return 0


    def transform(self, X):
        """
        """
        X = X.astype(str)
        for i in range(X.shape[1]):
            X[~np.isin(X[:, i], self.categories_[i]), i] = UNK
            
        return super(OrdinalEncoderExt, self).transform(X).astype(int)


    def fit_transform(self, X):
        """
        """
        self.fit(X)

        return self.transform(X)

train_cat_data = df_train[cat_feats]
train_cat_data.fillna("", inplace=True)

test_cat_data = df_test[cat_feats]
test_cat_data.fillna("", inplace=True)

enc = OrdinalEncoderExt()

train_cat_data.head(5)

test_cat_data.head(5)

train_cat_vals = (
    pd.DataFrame(
        enc.fit_transform(train_cat_data.values), 
        columns=train_cat_data.columns,
    )
)

test_cat_vals = pd.DataFrame(enc.transform(test_cat_data.values), columns=test_cat_data.columns)

train_cat_vals.head(5)

test_cat_vals.head(5)

cols = [
    f"{train_cat_data.columns[i]}_{x}" 
        for i, col in enumerate(enc.categories_) for x in col
]
train_bin_enc = pd.DataFrame()
test_bin_enc = pd.DataFrame()


for j, cat in enumerate(train_cat_vals.columns):
    for i, col in enumerate(enc.categories_[j]):
        train_bin_enc[f"{cat}_{col}"] = train_cat_vals[cat].apply(lambda x: 1 if x==i else 0)
        test_bin_enc[f"{cat}_{col}"] = test_cat_vals[cat].apply(lambda x: 1 if x==i else 0)

cat_scl = sklearn.preprocessing.StandardScaler()

train_bin_enc = pd.DataFrame(
    cat_scl.fit_transform(train_bin_enc.values),
    columns=train_bin_enc.columns,
)

test_bin_enc = pd.DataFrame(
    cat_scl.transform(test_bin_enc.values),
    columns=test_bin_enc.columns,
)

train_bin_enc.head(5)

test_bin_enc.head(5)

train_cont_data = df_train[cont_feats]
test_cont_data = df_test[cont_feats]

scl = sklearn.preprocessing.StandardScaler()

train_cont_vals = pd.DataFrame(
    scl.fit_transform(train_cont_data.values),
    columns=train_cont_data.columns,
)
test_cont_vals = pd.DataFrame(
    scl.transform(test_cont_data.values),
    columns=train_cont_data.columns,
)

train_cont_data.head(5)

test_cont_data.head(5)

train_cont_vals.head(5)

test_cont_vals.head(5)

X = train_cont_vals.join(train_bin_enc)
X.fillna(0, inplace=True)

log.info(f"Total number of independent features before projection: {X.shape[1]}")

X_test = test_cont_vals.join(test_bin_enc)
X_test.fillna(0, inplace=True)

X.head(5)

X_test.head(5)

Y_train = df_train[[y_col]]

log_y_col = "log_y"

Y_train[log_y_col] = np.log(Y_train[y_col])

y_scl = sklearn.preprocessing.StandardScaler()

Y = pd.DataFrame(y_scl.fit_transform(Y_train[[log_y_col]].values), columns=Y_train[[log_y_col]].columns)

Y.head(5)

In [17]:
class MixedDataProcessor(sklearn.base.TransformerMixin):
    """
    """
    
    def __init__(self, categorical_columns, continuous_columns, **kwargs):
        """
        """
        self.categorical_columns = categorical_columns
        self.continuous_columns = continuous_columns
        self.enc = OrdinalEncoderExt(
            #freq_thresh=0.025
        )
        self.scl = sklearn.preprocessing.StandardScaler()
        self.y_scl = sklearn.preprocessing.StandardScaler()
        
        
    def fit(self, X, y=None):
        """
        """
        cat_feats = self.categorical_columns
        cont_feats = self.continuous_columns
        
        cont_data = X[cont_feats]

        scl = self.scl

        scl.fit(cont_data.values)
        
        cat_data = X[cat_feats].fillna("")

        enc = self.enc

        enc.fit(cat_data.values)
        
        if y is not None:
            y_scl = self.y_scl
            y = np.log(y)
            y_scl.fit(y)

        return 0 
    
    
    def transform(self, X, y=None):
        """
        """
        cat_feats = self.categorical_columns
        cont_feats = self.continuous_columns
        
        cont_data = X[cont_feats]

        scl = self.scl

        cont_vals = pd.DataFrame(
            scl.transform(cont_data.values),
            columns=cont_data.columns,
        )
        
        cat_data = X[cat_feats].fillna("")

        enc = self.enc

        cat_vals = (
            pd.DataFrame(
                enc.transform(cat_data.values), 
                columns=cat_data.columns,
            )
        )
        
        cols = [
            f"{cat_data.columns[i]}_{x}" 
                for i, col in enumerate(enc.categories_) for x in col
        ]
        
        bin_enc = pd.DataFrame()
        
        for j, cat in enumerate(cat_vals.columns):
            for i, col in enumerate(enc.categories_[j]):
                bin_enc[f"{cat}_{col}"] = cat_vals[cat].apply(lambda x: 1 if x==i else 0)
                
        X_p = cont_vals.join(bin_enc).fillna(0)
        
        if y is not None:
            y = np.log(y)
            y_p = self.y_scl.transform(y)
            ret_val = (X_p, y_p)
        else:
            ret_val = X_p

        return ret_val
    
    
    def fit_transform(self, X, y=None):
        """
        """
        self.fit(X, y=y)
        return self.transform(X, y=y)

In [18]:
preprocessor = MixedDataProcessor(cat_feats, cont_feats)
X, Y = preprocessor.fit_transform(X=df_train, y=df_train[[y_col]])

Y = pd.DataFrame(Y, columns=[y_col])

log.info(f"Number of columns in dataset after preprocessing: {len(X.columns)}")

X_test = preprocessor.transform(df_test.drop(["Id"], axis=1))

2020-12-01 09:25:59,217 - __main__ - INFO - Number of columns in dataset after preprocessing: 365


In [19]:
true_labels = pd.read_csv(os.path.join(data_dir, "true_submission.csv"))

true_labels[y_col] = (true_labels["SalePrice"])

true_labels[y_col] = preprocessor.y_scl.transform(np.log(true_labels[[y_col]]))

X = pd.concat([X, X_test], ignore_index=True)
Y = pd.concat([Y, true_labels[[y_col]]], ignore_index=True)

In [20]:
X.head(5)

Unnamed: 0,LotFrontage,LotArea,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,TotRmsAbvGrd,Fireplaces,GarageYrBlt,GarageCars,GarageArea,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,KitchenAbvGr,BedroomAbvGr,MSSubClass_120,MSSubClass_160,MSSubClass_180,MSSubClass_190,MSSubClass_20,MSSubClass_30,MSSubClass_40,MSSubClass_45,MSSubClass_50,MSSubClass_60,MSSubClass_70,MSSubClass_75,MSSubClass_80,MSSubClass_85,MSSubClass_90,MSSubClass_UNK,MSZoning_C (all),MSZoning_FV,MSZoning_RH,MSZoning_RL,MSZoning_RM,MSZoning_UNK,Street_Grvl,Street_Pave,Street_UNK,Alley_Grvl,Alley_Pave,Alley_UNK,LotShape_IR1,LotShape_IR2,LotShape_IR3,LotShape_Reg,LotShape_UNK,LandContour_Bnk,LandContour_HLS,LandContour_Low,LandContour_Lvl,LandContour_UNK,Utilities_AllPub,Utilities_NoSeWa,Utilities_UNK,LotConfig_Corner,LotConfig_CulDSac,LotConfig_FR2,LotConfig_FR3,LotConfig_Inside,LotConfig_UNK,LandSlope_Gtl,LandSlope_Mod,LandSlope_Sev,LandSlope_UNK,Neighborhood_Blmngtn,Neighborhood_Blueste,Neighborhood_BrDale,Neighborhood_BrkSide,Neighborhood_ClearCr,Neighborhood_CollgCr,Neighborhood_Crawfor,Neighborhood_Edwards,Neighborhood_Gilbert,Neighborhood_IDOTRR,Neighborhood_MeadowV,Neighborhood_Mitchel,Neighborhood_NAmes,Neighborhood_NPkVill,Neighborhood_NWAmes,Neighborhood_NoRidge,Neighborhood_NridgHt,Neighborhood_OldTown,Neighborhood_SWISU,Neighborhood_Sawyer,Neighborhood_SawyerW,Neighborhood_Somerst,Neighborhood_StoneBr,Neighborhood_Timber,Neighborhood_UNK,Neighborhood_Veenker,Condition1_Artery,Condition1_Feedr,Condition1_Norm,Condition1_PosA,Condition1_PosN,Condition1_RRAe,Condition1_RRAn,Condition1_RRNe,Condition1_RRNn,Condition1_UNK,Condition2_Artery,Condition2_Feedr,Condition2_Norm,Condition2_PosA,Condition2_PosN,Condition2_RRAe,Condition2_RRAn,Condition2_RRNn,Condition2_UNK,BldgType_1Fam,BldgType_2fmCon,BldgType_Duplex,BldgType_Twnhs,BldgType_TwnhsE,BldgType_UNK,HouseStyle_1.5Fin,HouseStyle_1.5Unf,HouseStyle_1Story,HouseStyle_2.5Fin,HouseStyle_2.5Unf,HouseStyle_2Story,HouseStyle_SFoyer,HouseStyle_SLvl,HouseStyle_UNK,OverallQual_1,OverallQual_10,OverallQual_2,OverallQual_3,OverallQual_4,OverallQual_5,OverallQual_6,OverallQual_7,OverallQual_8,OverallQual_9,OverallQual_UNK,OverallCond_1,OverallCond_2,OverallCond_3,OverallCond_4,OverallCond_5,OverallCond_6,OverallCond_7,OverallCond_8,OverallCond_9,OverallCond_UNK,RoofStyle_Flat,RoofStyle_Gable,RoofStyle_Gambrel,RoofStyle_Hip,RoofStyle_Mansard,RoofStyle_Shed,RoofStyle_UNK,RoofMatl_ClyTile,RoofMatl_CompShg,RoofMatl_Membran,RoofMatl_Metal,RoofMatl_Roll,RoofMatl_Tar&Grv,RoofMatl_UNK,RoofMatl_WdShake,RoofMatl_WdShngl,Exterior1st_AsbShng,Exterior1st_AsphShn,Exterior1st_BrkComm,Exterior1st_BrkFace,Exterior1st_CBlock,Exterior1st_CemntBd,Exterior1st_HdBoard,Exterior1st_ImStucc,Exterior1st_MetalSd,Exterior1st_Plywood,Exterior1st_Stone,Exterior1st_Stucco,Exterior1st_UNK,Exterior1st_VinylSd,Exterior1st_Wd Sdng,Exterior1st_WdShing,Exterior2nd_AsbShng,Exterior2nd_AsphShn,Exterior2nd_Brk Cmn,Exterior2nd_BrkFace,Exterior2nd_CBlock,Exterior2nd_CmentBd,Exterior2nd_HdBoard,Exterior2nd_ImStucc,Exterior2nd_MetalSd,Exterior2nd_Other,Exterior2nd_Plywood,Exterior2nd_Stone,Exterior2nd_Stucco,Exterior2nd_UNK,Exterior2nd_VinylSd,Exterior2nd_Wd Sdng,Exterior2nd_Wd Shng,MasVnrType_BrkCmn,MasVnrType_BrkFace,MasVnrType_None,MasVnrType_Stone,MasVnrType_UNK,ExterQual_Ex,ExterQual_Fa,ExterQual_Gd,ExterQual_TA,ExterQual_UNK,ExterCond_Ex,ExterCond_Fa,ExterCond_Gd,ExterCond_Po,ExterCond_TA,ExterCond_UNK,Foundation_BrkTil,Foundation_CBlock,Foundation_PConc,Foundation_Slab,Foundation_Stone,Foundation_UNK,Foundation_Wood,BsmtQual_Ex,BsmtQual_Fa,BsmtQual_Gd,BsmtQual_TA,BsmtQual_UNK,BsmtCond_Fa,BsmtCond_Gd,BsmtCond_Po,BsmtCond_TA,BsmtCond_UNK,BsmtExposure_Av,BsmtExposure_Gd,BsmtExposure_Mn,BsmtExposure_No,BsmtExposure_UNK,BsmtFinType1_ALQ,BsmtFinType1_BLQ,BsmtFinType1_GLQ,BsmtFinType1_LwQ,BsmtFinType1_Rec,BsmtFinType1_UNK,BsmtFinType1_Unf,BsmtFinType2_ALQ,BsmtFinType2_BLQ,BsmtFinType2_GLQ,BsmtFinType2_LwQ,BsmtFinType2_Rec,BsmtFinType2_UNK,BsmtFinType2_Unf,Heating_Floor,Heating_GasA,Heating_GasW,Heating_Grav,Heating_OthW,Heating_UNK,Heating_Wall,HeatingQC_Ex,HeatingQC_Fa,HeatingQC_Gd,HeatingQC_Po,HeatingQC_TA,HeatingQC_UNK,CentralAir_N,CentralAir_UNK,CentralAir_Y,Electrical_FuseA,Electrical_FuseF,Electrical_FuseP,Electrical_Mix,Electrical_SBrkr,Electrical_UNK,KitchenQual_Ex,KitchenQual_Fa,KitchenQual_Gd,KitchenQual_TA,KitchenQual_UNK,Functional_Maj1,Functional_Maj2,Functional_Min1,Functional_Min2,Functional_Mod,Functional_Sev,Functional_Typ,Functional_UNK,FireplaceQu_Ex,FireplaceQu_Fa,FireplaceQu_Gd,FireplaceQu_Po,FireplaceQu_TA,FireplaceQu_UNK,GarageType_2Types,GarageType_Attchd,GarageType_Basment,GarageType_BuiltIn,GarageType_CarPort,GarageType_Detchd,GarageType_UNK,GarageFinish_Fin,GarageFinish_RFn,GarageFinish_UNK,GarageFinish_Unf,GarageQual_Ex,GarageQual_Fa,GarageQual_Gd,GarageQual_Po,GarageQual_TA,GarageQual_UNK,GarageCond_Ex,GarageCond_Fa,GarageCond_Gd,GarageCond_Po,GarageCond_TA,GarageCond_UNK,PavedDrive_N,PavedDrive_P,PavedDrive_UNK,PavedDrive_Y,PoolQC_Ex,PoolQC_Fa,PoolQC_Gd,PoolQC_UNK,Fence_GdPrv,Fence_GdWo,Fence_MnPrv,Fence_MnWw,Fence_UNK,MiscFeature_Gar2,MiscFeature_Othr,MiscFeature_Shed,MiscFeature_TenC,MiscFeature_UNK,SaleType_COD,SaleType_CWD,SaleType_Con,SaleType_ConLD,SaleType_ConLI,SaleType_ConLw,SaleType_New,SaleType_Oth,SaleType_UNK,SaleType_WD,SaleCondition_Abnorml,SaleCondition_AdjLand,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial,SaleCondition_UNK
0,-0.208034,-0.207142,1.050994,0.878668,0.510015,0.575425,-0.288653,-0.944591,-0.459303,-0.793434,1.161852,-0.120242,0.370333,1.10781,-0.241061,0.789741,1.227585,0.91221,-0.951226,0.992426,0.311725,0.351,-0.752176,0.216503,-0.359325,-0.116339,-0.270208,-0.068692,-0.087688,-1.599111,0.138777,-0.211454,0.163779,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0
1,0.409895,-0.091886,0.156734,-0.429577,-0.572835,1.171992,-0.288653,-0.641228,0.466465,0.25714,-0.795163,-0.120242,-0.482512,-0.819964,3.948809,0.789741,-0.761621,-0.318683,0.600495,-0.101543,0.311725,-0.060731,1.626195,-0.704483,-0.359325,-0.116339,-0.270208,-0.068692,-0.087688,-0.48911,-0.614439,-0.211454,0.163779,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0
2,-0.084449,0.07348,0.984752,0.830215,0.322174,0.092907,-0.288653,-0.301643,-0.313369,-0.627826,1.189351,-0.120242,0.515013,1.10781,-0.241061,0.789741,1.227585,-0.318683,0.600495,0.911391,0.311725,0.631726,-0.752176,-0.070361,-0.359325,-0.116339,-0.270208,-0.068692,-0.087688,0.990891,0.138777,-0.211454,0.163779,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0
3,-0.414011,-0.096897,-1.863632,-0.720298,-0.572835,-0.499274,-0.288653,-0.06167,-0.687324,-0.521734,0.937276,-0.120242,0.383659,1.10781,-0.241061,-1.026041,-0.761621,0.296763,0.600495,0.789839,1.650307,0.790804,-0.752176,-0.176048,4.092524,-0.116339,-0.270208,-0.068692,-0.087688,-1.599111,-1.367655,-0.211454,0.163779,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0
4,0.574676,0.375148,0.951632,0.733308,1.360826,0.463568,-0.288653,-0.174865,0.19968,-0.045611,1.617877,-0.120242,1.299326,1.10781,-0.241061,0.789741,1.227585,1.527656,0.600495,0.870874,1.650307,1.698485,0.780197,0.56376,-0.359325,-0.116339,-0.270208,-0.068692,-0.087688,2.100892,0.138777,-0.211454,1.390023,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0


In [21]:
X_test.head(5)

Unnamed: 0,LotFrontage,LotArea,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,TotRmsAbvGrd,Fireplaces,GarageYrBlt,GarageCars,GarageArea,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,KitchenAbvGr,BedroomAbvGr,MSSubClass_120,MSSubClass_160,MSSubClass_180,MSSubClass_190,MSSubClass_20,MSSubClass_30,MSSubClass_40,MSSubClass_45,MSSubClass_50,MSSubClass_60,MSSubClass_70,MSSubClass_75,MSSubClass_80,MSSubClass_85,MSSubClass_90,MSSubClass_UNK,MSZoning_C (all),MSZoning_FV,MSZoning_RH,MSZoning_RL,MSZoning_RM,MSZoning_UNK,Street_Grvl,Street_Pave,Street_UNK,Alley_Grvl,Alley_Pave,Alley_UNK,LotShape_IR1,LotShape_IR2,LotShape_IR3,LotShape_Reg,LotShape_UNK,LandContour_Bnk,LandContour_HLS,LandContour_Low,LandContour_Lvl,LandContour_UNK,Utilities_AllPub,Utilities_NoSeWa,Utilities_UNK,LotConfig_Corner,LotConfig_CulDSac,LotConfig_FR2,LotConfig_FR3,LotConfig_Inside,LotConfig_UNK,LandSlope_Gtl,LandSlope_Mod,LandSlope_Sev,LandSlope_UNK,Neighborhood_Blmngtn,Neighborhood_Blueste,Neighborhood_BrDale,Neighborhood_BrkSide,Neighborhood_ClearCr,Neighborhood_CollgCr,Neighborhood_Crawfor,Neighborhood_Edwards,Neighborhood_Gilbert,Neighborhood_IDOTRR,Neighborhood_MeadowV,Neighborhood_Mitchel,Neighborhood_NAmes,Neighborhood_NPkVill,Neighborhood_NWAmes,Neighborhood_NoRidge,Neighborhood_NridgHt,Neighborhood_OldTown,Neighborhood_SWISU,Neighborhood_Sawyer,Neighborhood_SawyerW,Neighborhood_Somerst,Neighborhood_StoneBr,Neighborhood_Timber,Neighborhood_UNK,Neighborhood_Veenker,Condition1_Artery,Condition1_Feedr,Condition1_Norm,Condition1_PosA,Condition1_PosN,Condition1_RRAe,Condition1_RRAn,Condition1_RRNe,Condition1_RRNn,Condition1_UNK,Condition2_Artery,Condition2_Feedr,Condition2_Norm,Condition2_PosA,Condition2_PosN,Condition2_RRAe,Condition2_RRAn,Condition2_RRNn,Condition2_UNK,BldgType_1Fam,BldgType_2fmCon,BldgType_Duplex,BldgType_Twnhs,BldgType_TwnhsE,BldgType_UNK,HouseStyle_1.5Fin,HouseStyle_1.5Unf,HouseStyle_1Story,HouseStyle_2.5Fin,HouseStyle_2.5Unf,HouseStyle_2Story,HouseStyle_SFoyer,HouseStyle_SLvl,HouseStyle_UNK,OverallQual_1,OverallQual_10,OverallQual_2,OverallQual_3,OverallQual_4,OverallQual_5,OverallQual_6,OverallQual_7,OverallQual_8,OverallQual_9,OverallQual_UNK,OverallCond_1,OverallCond_2,OverallCond_3,OverallCond_4,OverallCond_5,OverallCond_6,OverallCond_7,OverallCond_8,OverallCond_9,OverallCond_UNK,RoofStyle_Flat,RoofStyle_Gable,RoofStyle_Gambrel,RoofStyle_Hip,RoofStyle_Mansard,RoofStyle_Shed,RoofStyle_UNK,RoofMatl_ClyTile,RoofMatl_CompShg,RoofMatl_Membran,RoofMatl_Metal,RoofMatl_Roll,RoofMatl_Tar&Grv,RoofMatl_UNK,RoofMatl_WdShake,RoofMatl_WdShngl,Exterior1st_AsbShng,Exterior1st_AsphShn,Exterior1st_BrkComm,Exterior1st_BrkFace,Exterior1st_CBlock,Exterior1st_CemntBd,Exterior1st_HdBoard,Exterior1st_ImStucc,Exterior1st_MetalSd,Exterior1st_Plywood,Exterior1st_Stone,Exterior1st_Stucco,Exterior1st_UNK,Exterior1st_VinylSd,Exterior1st_Wd Sdng,Exterior1st_WdShing,Exterior2nd_AsbShng,Exterior2nd_AsphShn,Exterior2nd_Brk Cmn,Exterior2nd_BrkFace,Exterior2nd_CBlock,Exterior2nd_CmentBd,Exterior2nd_HdBoard,Exterior2nd_ImStucc,Exterior2nd_MetalSd,Exterior2nd_Other,Exterior2nd_Plywood,Exterior2nd_Stone,Exterior2nd_Stucco,Exterior2nd_UNK,Exterior2nd_VinylSd,Exterior2nd_Wd Sdng,Exterior2nd_Wd Shng,MasVnrType_BrkCmn,MasVnrType_BrkFace,MasVnrType_None,MasVnrType_Stone,MasVnrType_UNK,ExterQual_Ex,ExterQual_Fa,ExterQual_Gd,ExterQual_TA,ExterQual_UNK,ExterCond_Ex,ExterCond_Fa,ExterCond_Gd,ExterCond_Po,ExterCond_TA,ExterCond_UNK,Foundation_BrkTil,Foundation_CBlock,Foundation_PConc,Foundation_Slab,Foundation_Stone,Foundation_UNK,Foundation_Wood,BsmtQual_Ex,BsmtQual_Fa,BsmtQual_Gd,BsmtQual_TA,BsmtQual_UNK,BsmtCond_Fa,BsmtCond_Gd,BsmtCond_Po,BsmtCond_TA,BsmtCond_UNK,BsmtExposure_Av,BsmtExposure_Gd,BsmtExposure_Mn,BsmtExposure_No,BsmtExposure_UNK,BsmtFinType1_ALQ,BsmtFinType1_BLQ,BsmtFinType1_GLQ,BsmtFinType1_LwQ,BsmtFinType1_Rec,BsmtFinType1_UNK,BsmtFinType1_Unf,BsmtFinType2_ALQ,BsmtFinType2_BLQ,BsmtFinType2_GLQ,BsmtFinType2_LwQ,BsmtFinType2_Rec,BsmtFinType2_UNK,BsmtFinType2_Unf,Heating_Floor,Heating_GasA,Heating_GasW,Heating_Grav,Heating_OthW,Heating_UNK,Heating_Wall,HeatingQC_Ex,HeatingQC_Fa,HeatingQC_Gd,HeatingQC_Po,HeatingQC_TA,HeatingQC_UNK,CentralAir_N,CentralAir_UNK,CentralAir_Y,Electrical_FuseA,Electrical_FuseF,Electrical_FuseP,Electrical_Mix,Electrical_SBrkr,Electrical_UNK,KitchenQual_Ex,KitchenQual_Fa,KitchenQual_Gd,KitchenQual_TA,KitchenQual_UNK,Functional_Maj1,Functional_Maj2,Functional_Min1,Functional_Min2,Functional_Mod,Functional_Sev,Functional_Typ,Functional_UNK,FireplaceQu_Ex,FireplaceQu_Fa,FireplaceQu_Gd,FireplaceQu_Po,FireplaceQu_TA,FireplaceQu_UNK,GarageType_2Types,GarageType_Attchd,GarageType_Basment,GarageType_BuiltIn,GarageType_CarPort,GarageType_Detchd,GarageType_UNK,GarageFinish_Fin,GarageFinish_RFn,GarageFinish_UNK,GarageFinish_Unf,GarageQual_Ex,GarageQual_Fa,GarageQual_Gd,GarageQual_Po,GarageQual_TA,GarageQual_UNK,GarageCond_Ex,GarageCond_Fa,GarageCond_Gd,GarageCond_Po,GarageCond_TA,GarageCond_UNK,PavedDrive_N,PavedDrive_P,PavedDrive_UNK,PavedDrive_Y,PoolQC_Ex,PoolQC_Fa,PoolQC_Gd,PoolQC_UNK,Fence_GdPrv,Fence_GdWo,Fence_MnPrv,Fence_MnWw,Fence_UNK,MiscFeature_Gar2,MiscFeature_Othr,MiscFeature_Shed,MiscFeature_TenC,MiscFeature_UNK,SaleType_COD,SaleType_CWD,SaleType_Con,SaleType_ConLD,SaleType_ConLI,SaleType_ConLw,SaleType_New,SaleType_Oth,SaleType_UNK,SaleType_WD,SaleCondition_Abnorml,SaleCondition_AdjLand,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial,SaleCondition_UNK
0,0.409895,0.110763,-0.340077,-1.15638,-0.572835,0.053428,0.604293,-0.672923,-0.400017,-0.689929,-0.795163,-0.120242,-1.179256,-0.819964,-0.241061,-1.026041,-0.761621,-0.93413,-0.951226,-0.709304,-1.026858,1.202536,0.365179,-0.704483,-0.359325,-0.116339,1.882709,-0.068692,-0.087688,-0.11911,1.64521,-0.211454,-1.062465,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0
1,0.45109,0.37585,-0.43944,-1.30174,0.023838,1.051363,-0.288653,-0.365032,0.619239,0.430511,-0.795163,-0.120242,-0.354966,-0.819964,-0.241061,-1.026041,1.227585,-0.318683,-0.951226,-0.830856,-1.026858,-0.753188,2.3844,-0.16095,-0.359325,-0.116339,-0.270208,-0.068692,25.116309,-0.11911,1.64521,-0.211454,0.163779,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0
2,0.162723,0.332053,0.852269,0.6364,-0.572835,0.761852,-0.288653,-0.974021,-0.295127,-0.607125,0.811239,-0.120242,0.216136,-0.819964,-0.241061,0.789741,1.227585,-0.318683,0.600495,0.749322,0.311725,0.042202,0.939819,-0.191147,-0.359325,-0.116339,-0.270208,-0.068692,-0.087688,-1.229111,1.64521,-0.211454,0.163779,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0
3,0.327504,-0.054002,0.88539,0.6364,-0.46234,0.347326,-0.288653,-0.550672,-0.299687,-0.6123,0.758532,-0.120242,0.168544,-0.819964,-0.241061,0.789741,1.227585,0.296763,0.600495,0.789839,0.311725,-0.013943,2.121024,-0.16095,-0.359325,-0.116339,-0.270208,-0.068692,-0.087688,-0.11911,1.64521,-0.211454,0.163779,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0
4,-1.11433,-0.552407,0.686666,0.345679,-0.572835,-0.39619,-0.288653,1.018211,0.507509,0.303718,-0.795163,-0.120242,-0.448246,-0.819964,-0.241061,0.789741,-0.761621,-0.93413,-0.951226,0.546735,0.311725,0.154492,-0.752176,0.533564,-0.359325,-0.116339,2.313293,-0.068692,-0.087688,-1.969111,1.64521,-0.211454,-1.062465,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,1,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0


In [22]:
n_components = [150]
percentiles = [50, 80, 99, 100]
f_regression = sklearn.feature_selection.f_regression

projs = [
    sklearn.decomposition.PCA(
        n_components=n_comp,
        svd_solver='randomized',
        whiten=True,
    )
    for n_comp in n_components
]

projs.extend(
    [
        sklearn.cluster.FeatureAgglomeration(
            n_clusters=n_comp,
        ) 
        for n_comp in n_components
    ]
)

projs.extend(
    [
        sklearn.feature_selection.SelectPercentile(
            f_regression, 
            percentile=per,
        )
        for per in percentiles
    ]
)

fig = go.Figure()

hist = go.Histogram(
    x=Y_train[y_col].values, 
    xbins=dict(
        start=0,
        end=Y_train[y_col].max(),
        size=10000,
    ),
)
fig.add_trace(hist)

fig.update_layout(
    go.Layout(
        xaxis = dict(
            rangeslider = {'visible': False},
        ),
    )
)
plotly.offline.iplot(fig)

fig = go.Figure()

hist = go.Histogram(
    x=Y_train[log_y_col].values, 
    xbins=dict(
        start=0,
        end=Y_train[log_y_col].max(),
        size=0.10,
    ),
)
fig.add_trace(hist)

fig.update_layout(
    go.Layout(
        xaxis = dict(
            rangeslider = {'visible': False},
        ),
    )
)
plotly.offline.iplot(fig)

In [23]:
Z = X.join(Y)

Z_corr = Z.corr()

In [24]:
#Z_corr

for col in Z.columns[:2]:
    if col not in (y_col, log_y_col):
        
        cor_x = np.linspace(Z[col].min(), Z[col].max(), 3)
        s = Z_corr.at[log_y_col, col]
        
        fig = go.Figure()
        
        trace = go.Scatter(x=Z[col], y=Z[log_y_col], mode="markers", text=Z.index)
        trace_cor = go.Scatter(x=cor_x, y=s*cor_x, mode="lines")
        
        fig.add_trace(trace)
        fig.add_trace(trace_cor)

        fig.update_layout(
            width=1400,
            height=1200,
            title=dict(text=f"{col} vs. {y_col}"),
            xaxis_title=dict(text=col),
            yaxis_title=dict(text=y_col),
        )
        plotly.offline.iplot(fig)

In [25]:
tf.keras.backend.set_floatx('float64')

class TFModel(tf.keras.Model):
    """
    """
    def __init__(self,
            batch_size=None,
            epochs=3,
            units0=40,
            units1=5,
    ):
        """
        """
        super(TFModel, self).__init__()

        self.batch_size = batch_size
        self.epochs = epochs
        out_dim = 1
              
        self._dense0 = tf.keras.layers.Dense(
            units0,
            activation=tf.nn.relu,
            name="dense_0",
        )
        
        self._dense1 = tf.keras.layers.Dense(
            units1,
            activation=tf.nn.relu,
            name="dense_0",
        )
        
        self._densef = tf.keras.layers.Dense(
            out_dim,
            #activation=tf.keras.activations.linear,
            activation=tf.nn.relu,
            name="dense_f",
        )
        
        self._optimizer = tf.keras.optimizers.Adam(
                            learning_rate=1e-4
        )
        self._metrics = [tf.keras.metrics.MeanAbsoluteError()]
        self._loss = tf.keras.losses.MeanSquaredError()
        
        self.compile(
            optimizer=self._optimizer,
            loss=self._loss,
            metrics=self._metrics,
        )
        

    def call(self, inputs):       
        """
        """
        x = self._dense0(inputs)
        x = self._dense1(x)
        out = self._densef(x)
        return out
    
    
    def fit(self, X, Y, validation_data=None):
        """
        """
        callbacks=[
            tf.keras.callbacks.EarlyStopping(
                monitor="loss",
                min_delta=1e-5,
                patience=20,
                #baseline=1,
                restore_best_weights=False,
            )
        ]
        return super().fit(
            x=X,
            y=Y,
            epochs=self.epochs,
            callbacks=callbacks,
            validation_data=validation_data,
            verbose=0,
        )
    
    
    def get_params(self, **kwargs):
        """
        """
        return self.__dict__

In [26]:
models = []

models.extend(
    [
        sklearn.pipeline.Pipeline([
            ("projector", proj), 
            ("model", sklearn.linear_model.LinearRegression()),
        ])
        for proj in projs
    ]
)

tf_param_grid = sklearn.model_selection.ParameterGrid(
    dict(
        epochs=[80, 180],
        units0=[50, 500, 750],
        units1=[10, 100, 200],
    )
)
models.extend(
    [
        sklearn.pipeline.Pipeline([
            ("projector", proj), 
            ("model", TFModel(**params))
        ])
        for params in tf_param_grid for proj in projs
    ]
)

xgb_param_grid = sklearn.model_selection.ParameterGrid(
    dict(
        #n_estimators=[100, 110, 150],
        max_depth=[5, 6, 8],
        learning_rate=[None, 1e-4, 1e-2],
        booster=["gbtree", "gblinear", "dart"],
        reg_alpha=[None, 1e-5, 1e-3],
        reg_lambda=[None, 1e-5, 1e-3],
    )
)
models.extend(
    [xgboost.XGBRegressor(**params) for params in xgb_param_grid]
)

lsvr_param_grid = sklearn.model_selection.ParameterGrid(
    dict(
        C=[1, 2, 0.5],
    )
)
models.extend(
    [sklearn.svm.LinearSVR(**params) for params in lsvr_param_grid]
)

svr_param_grid = sklearn.model_selection.ParameterGrid(
    dict(
        C=[1, 2, 0.5],
        kernel=["linear", "poly", "rbf", "sigmoid"],
        gamma=["scale", 0.01],
    )
)
models.extend(
    [sklearn.svm.SVR(**params) for params in svr_param_grid]
)
        
kernel_ridge_param_grid = sklearn.model_selection.ParameterGrid(
    dict(
        alpha=[1, 2, 1e-1],
        gamma=[None, 1, 0.1],
    )
)
models.extend(
    [sklearn.kernel_ridge.KernelRidge(**params) 
        for params in kernel_ridge_param_grid
    ]
)
        
elastic_param_grid = sklearn.model_selection.ParameterGrid(
    dict(
        alpha=[2, 1, 0.5],
        l1_ratio=[0.5, 1, 0.1],
    )
)
models.extend(
    [sklearn.linear_model.ElasticNet(**params) 
        for params in elastic_param_grid
    ]
)

gauss_process_param_grid = sklearn.model_selection.ParameterGrid(
    dict(
        kernel=[
            None, 
            sklearn.gaussian_process.kernels.Matern(),
            sklearn.gaussian_process.kernels.Matern(nu=0.5),
            sklearn.gaussian_process.kernels.Matern(nu=2.5),
            sklearn.gaussian_process.kernels.Matern(nu=np.inf),
            sklearn.gaussian_process.kernels.DotProduct(),
            sklearn.gaussian_process.kernels.RationalQuadratic(),
        ],
    )
)
models.extend(
    [
        sklearn.pipeline.Pipeline([
            ("projector", proj),
            ("model", 
                sklearn.gaussian_process.GaussianProcessRegressor(
                    **params
                )
            ),
        ])
        for params in gauss_process_param_grid for proj in projs
    ]
)

elastic_cv_param_grid = sklearn.model_selection.ParameterGrid(
    dict(
        l1_ratio=[0.5, 0.1, 0.7, 0.9, 0.95, 1],
    )
)
models.extend(
    [sklearn.linear_model.ElasticNetCV(**params) 
        for params in elastic_cv_param_grid
    ]
)

gradient_boosting_param_grid = sklearn.model_selection.ParameterGrid(
    dict(
        n_estimators=[100],
        criterion=["friedman_mse", "mse", "mae"],
        max_depth=[3, 5],
        max_features=[
            "auto",
            "sqrt", "log2"],
    )
)
models.extend(
    [sklearn.ensemble.GradientBoostingRegressor(**params)
        for params in gradient_boosting_param_grid
    ]
)

random_forest_param_grid = sklearn.model_selection.ParameterGrid(
    dict(
        n_estimators=[100],
        criterion=["mse", "mae"],
        max_depth=[None, 5],
        max_features=[
            "auto",
            "sqrt", "log2"],
    )
)
models.extend(
    [sklearn.ensemble.RandomForestRegressor(**params)
        for params in random_forest_param_grid
    ]
)
    
ridge_param_grid = sklearn.model_selection.ParameterGrid(
    dict(
        alpha=[1, 2, 0.5],
    )
)
models.extend(
    [sklearn.linear_model.Ridge(**params)
         for params in ridge_param_grid
    ]
)

bayes_adr_param_grid = sklearn.model_selection.ParameterGrid(
    dict(
        alpha_1=[1e-6, 1e-5],
        alpha_2=[1e-6, 1e-5],
        lambda_1=[1e-6, 1e-5],
        lambda_2=[1e-6, 1e-5],
    )
)
models.extend(
    [sklearn.linear_model.ARDRegression(**params)
        for params in bayes_adr_param_grid
    ]
)

sgd_linear_param_grid = sklearn.model_selection.ParameterGrid(
    dict(
        eta0=[0.01, 0.005],
        power_t=[0.25, 0.2],
    )
)
models.extend(
    [sklearn.linear_model.SGDRegressor(**params)
        for params in sgd_linear_param_grid
    ]
)

ada_boost_param_grid = sklearn.model_selection.ParameterGrid(
    dict(
        base_estimator=[
            None,
            sklearn.tree.DecisionTreeRegressor(max_depth=4),
        ],
        loss=["linear", "square", "exponential"],
    )
)
models.extend(
    [sklearn.ensemble.AdaBoostRegressor(**params)
        for params in ada_boost_param_grid
    ]
)

bagging_param_grid = sklearn.model_selection.ParameterGrid(
    dict(
        n_estimators=[10, 20],
        max_features=[1.0, 0.2],
        bootstrap=[True, False],
    )
)
models.extend(
    [sklearn.ensemble.BaggingRegressor(**params)
        for params in bagging_param_grid
    ]
)

In [27]:
import warnings
warnings.filterwarnings("ignore")

pred_cols = [f"model_{i:03d}" for i in range(len(models))]
    
scores = []

kf_cols = ["r2", "mse", "test_mse"]

for model in models:
    t1 = time.perf_counter()
    row = []
    
    X_train_split, X_test_split, y_train_split, y_test_split = (
        sklearn.model_selection.train_test_split(
            X, 
            Y,
            test_size=0.2, 
            random_state=42,
        )
    )
    
    validation_data = (X_test_split, y_test_split)
    
    model.fit(X_train_split, y_train_split)       
    pred = model.predict(X)
    r2 = sklearn.metrics.r2_score(Y[y_col], pred)
    mse = sklearn.metrics.mean_squared_error(Y[y_col], pred)

    test_mse = sklearn.metrics.mean_squared_error(
        y_test_split, 
        model.predict(X_test_split),
    )
    
    row.append(r2)
    row.append(mse)
    row.append(test_mse)
    
    t2 = time.perf_counter()
    row.append(t2-t1)
    scores.append(row)
    log.info(f"{str(model)[:15]} -- time elapsed: {t2-t1:5.3f}")

2020-12-01 09:26:02,379 - __main__ - INFO - Pipeline(steps= -- time elapsed: 0.604
2020-12-01 09:26:02,545 - __main__ - INFO - Pipeline(steps= -- time elapsed: 0.161
2020-12-01 09:26:02,603 - __main__ - INFO - Pipeline(steps= -- time elapsed: 0.055
2020-12-01 09:26:02,682 - __main__ - INFO - Pipeline(steps= -- time elapsed: 0.077
2020-12-01 09:26:02,787 - __main__ - INFO - Pipeline(steps= -- time elapsed: 0.103
2020-12-01 09:26:02,889 - __main__ - INFO - Pipeline(steps= -- time elapsed: 0.099
2020-12-01 09:26:06,523 - __main__ - INFO - Pipeline(steps= -- time elapsed: 3.633
2020-12-01 09:26:10,257 - __main__ - INFO - Pipeline(steps= -- time elapsed: 3.732
2020-12-01 09:26:14,092 - __main__ - INFO - Pipeline(steps= -- time elapsed: 3.833
2020-12-01 09:26:18,148 - __main__ - INFO - Pipeline(steps= -- time elapsed: 4.053
2020-12-01 09:26:22,519 - __main__ - INFO - Pipeline(steps= -- time elapsed: 4.369
2020-12-01 09:26:26,859 - __main__ - INFO - Pipeline(steps= -- time elapsed: 4.338
2020

Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:48:50,208 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.238


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:48:50,452 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.243


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:48:50,689 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.236


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:48:50,946 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.255


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:48:51,226 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.278


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:48:51,487 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.260


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:48:51,748 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.259


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:48:52,001 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.251


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:48:52,249 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.247


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:48:52,527 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.276


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:48:52,774 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.245


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:48:53,049 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.274


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:48:53,301 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.250


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:48:53,570 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.267


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:48:53,823 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.251


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:48:54,083 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.259


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:48:54,330 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.245


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:48:54,588 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.256


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:48:54,847 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.258


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:48:55,104 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.255


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:48:55,394 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.289


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:48:55,657 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.261


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:48:55,896 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.238


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:48:56,126 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.229
2020-12-01 09:48:56,347 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.220


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:48:56,567 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.218


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:48:56,788 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.220


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:48:57,028 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.238
2020-12-01 09:48:57,255 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.225


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:48:57,484 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.227


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:48:57,713 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.228


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:48:57,943 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.228
2020-12-01 09:48:58,171 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.226


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:48:58,396 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.224


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:48:58,621 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.223


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:48:58,848 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.225


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:48:59,077 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.228


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:48:59,314 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.236


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:48:59,542 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.226
2020-12-01 09:48:59,769 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.225


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:49:00,004 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.234


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:49:00,241 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.236
2020-12-01 09:49:00,467 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.224


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:49:00,705 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.237


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:49:00,959 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.252


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:49:01,223 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.262


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:49:01,485 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.260


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:49:01,758 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.272


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:49:01,987 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.228


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:49:02,232 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.243


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:49:02,484 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.251


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:49:02,761 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.275


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:49:02,990 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.228


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:49:03,226 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.235


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:49:03,462 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.234


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:49:03,702 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.239
2020-12-01 09:49:03,929 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.226


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:49:04,159 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.228


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:49:04,388 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.227


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:49:04,623 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.234


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:49:04,856 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.231


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:49:05,118 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.261


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:49:05,397 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.277


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:49:05,683 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.283


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:49:05,964 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.279


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:49:06,226 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.261


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:49:06,484 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.256


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:49:06,718 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.233


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:49:06,968 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.248


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:49:07,217 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.247


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:49:07,476 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.258
2020-12-01 09:49:07,701 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.223


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:49:07,979 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.276


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:49:08,237 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.257


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:49:08,485 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.247
2020-12-01 09:49:08,713 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.225


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:49:08,964 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.250


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:49:09,204 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.238
2020-12-01 09:49:09,430 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.225


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:49:09,694 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.262


Parameters: { max_depth } might not be used.

  This may not be accurate due to some parameters are only used in language bindings but
  passed down to XGBoost core.  Or some parameters are not used but slip through this
  verification. Please open an issue if you find above cases.




2020-12-01 09:49:09,942 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.247
2020-12-01 09:49:10,650 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.705
2020-12-01 09:49:11,383 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.731
2020-12-01 09:49:12,138 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.753
2020-12-01 09:49:12,854 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.714
2020-12-01 09:49:13,587 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.731
2020-12-01 09:49:14,285 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.696
2020-12-01 09:49:14,986 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.699
2020-12-01 09:49:15,685 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.697
2020-12-01 09:49:16,379 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.692
2020-12-01 09:49:17,276 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.895
2020-12-01 09:49:18,242 - __main__ - INFO - XGBRegressor(ba -- time elapsed: 0.963
2020

In [28]:
cv_df = pd.DataFrame(scores, index=pred_cols, columns=kf_cols+["time"])

cv_df["name"] = [str(model) for model in models]
cv_df['params'] = [model.get_params() for model in models]

In [29]:
cv_df.head(5)

Unnamed: 0,r2,mse,test_mse,time,name,params
model_000,0.9050207,0.09911438,0.1041053,0.603967,"Pipeline(steps=[('projector',\n PCA(n_components=150, svd_solver='randomized', whiten=True)),\n ('model', LinearRegression())])","{'memory': None, 'steps': [('projector', PCA(n_components=150, svd_solver='randomized', whiten=True)), ('model', LinearRegression())], 'verbose': False, 'pr..."
model_001,0.9040964,0.1000789,0.1041714,0.161306,"Pipeline(steps=[('projector', FeatureAgglomeration(n_clusters=150)),\n ('model', LinearRegression())])","{'memory': None, 'steps': [('projector', FeatureAgglomeration(n_clusters=150)), ('model', LinearRegression())], 'verbose': False, 'projector': FeatureAgglom..."
model_002,-2.394983e+16,2.499252e+16,1.249198e+17,0.055441,"Pipeline(steps=[('projector',\n SelectPercentile(percentile=50,\n score_func=<function f_regression at 0x7f...","{'memory': None, 'steps': [('projector', SelectPercentile(percentile=50,  score_func=<function f_regression at 0x7fcfaac44a60>)), ('model', ..."
model_003,-1.282089e+18,1.337906e+18,6.68724e+18,0.077126,"Pipeline(steps=[('projector',\n SelectPercentile(percentile=80,\n score_func=<function f_regression at 0x7f...","{'memory': None, 'steps': [('projector', SelectPercentile(percentile=80,  score_func=<function f_regression at 0x7fcfaac44a60>)), ('model', ..."
model_004,-1.903006e+17,1.985856e+17,9.925879e+17,0.103241,"Pipeline(steps=[('projector',\n SelectPercentile(percentile=99,\n score_func=<function f_regression at 0x7f...","{'memory': None, 'steps': [('projector', SelectPercentile(percentile=99,  score_func=<function f_regression at 0x7fcfaac44a60>)), ('model', ..."


In [30]:
cv_df.sort_values(by=['r2'], ascending=False).head(20)

Unnamed: 0,r2,mse,test_mse,time,name,params
model_417,0.982488,0.018275,0.091337,33.448176,"Pipeline(steps=[('projector',\n SelectPercentile(percentile=80,\n score_func=<function f_regression at 0x7f...","{'memory': None, 'steps': [('projector', SelectPercentile(percentile=80,  score_func=<function f_regression at 0x7fcfaac44a60>)), ('model', ..."
model_418,0.98219,0.018586,0.092892,17.255182,"Pipeline(steps=[('projector',\n SelectPercentile(percentile=99,\n score_func=<function f_regression at 0x7f...","{'memory': None, 'steps': [('projector', SelectPercentile(percentile=99,  score_func=<function f_regression at 0x7fcfaac44a60>)), ('model', ..."
model_419,0.982188,0.018587,0.092899,11.944749,"Pipeline(steps=[('projector',\n SelectPercentile(percentile=100,\n score_func=<function f_regression at 0x7...","{'memory': None, 'steps': [('projector', SelectPercentile(percentile=100,  score_func=<function f_regression at 0x7fcfaac44a60>)), ('model',..."
model_411,0.981802,0.018991,0.094916,28.576184,"Pipeline(steps=[('projector',\n SelectPercentile(percentile=80,\n score_func=<function f_regression at 0x7f...","{'memory': None, 'steps': [('projector', SelectPercentile(percentile=80,  score_func=<function f_regression at 0x7fcfaac44a60>)), ('model', ..."
model_416,0.98168,0.019117,0.095536,36.686795,"Pipeline(steps=[('projector',\n SelectPercentile(percentile=50,\n score_func=<function f_regression at 0x7f...","{'memory': None, 'steps': [('projector', SelectPercentile(percentile=50,  score_func=<function f_regression at 0x7fcfaac44a60>)), ('model', ..."
model_412,0.981494,0.019312,0.09652,33.5371,"Pipeline(steps=[('projector',\n SelectPercentile(percentile=99,\n score_func=<function f_regression at 0x7f...","{'memory': None, 'steps': [('projector', SelectPercentile(percentile=99,  score_func=<function f_regression at 0x7fcfaac44a60>)), ('model', ..."
model_441,0.981072,0.019752,0.098719,22.354817,"Pipeline(steps=[('projector',\n SelectPercentile(percentile=80,\n score_func=<function f_regression at 0x7f...","{'memory': None, 'steps': [('projector', SelectPercentile(percentile=80,  score_func=<function f_regression at 0x7fcfaac44a60>)), ('model', ..."
model_442,0.980555,0.020291,0.101416,39.766891,"Pipeline(steps=[('projector',\n SelectPercentile(percentile=99,\n score_func=<function f_regression at 0x7f...","{'memory': None, 'steps': [('projector', SelectPercentile(percentile=99,  score_func=<function f_regression at 0x7fcfaac44a60>)), ('model', ..."
model_443,0.980551,0.020295,0.101438,24.354049,"Pipeline(steps=[('projector',\n SelectPercentile(percentile=100,\n score_func=<function f_regression at 0x7...","{'memory': None, 'steps': [('projector', SelectPercentile(percentile=100,  score_func=<function f_regression at 0x7fcfaac44a60>)), ('model',..."
model_423,0.980495,0.020354,0.101731,16.603895,"Pipeline(steps=[('projector',\n SelectPercentile(percentile=80,\n score_func=<function f_regression at 0x7f...","{'memory': None, 'steps': [('projector', SelectPercentile(percentile=80,  score_func=<function f_regression at 0x7fcfaac44a60>)), ('model', ..."


In [31]:
model_name = cv_df.sort_values(by=['r2'], ascending=False).index[0]
model = models[int(model_name[-3:])]

In [40]:
results = pd.DataFrame({"Id": df_test["Id"], "SalePrice": model.predict(X_test).reshape(-1)})

In [33]:
results_fn = os.path.join(data_dir, "results.csv")
#results.to_csv(results_fn, index=False)

In [41]:
try:
    val = np.sqrt(
        sklearn.metrics.mean_squared_error(
            true_labels[y_col],
            results[y_col],
        )
    )
    log.info(f"RMSE : {val}")
except ValueError as e:
    log.error(e)

2020-12-01 10:41:40,836 - __main__ - INFO - RMSE : 1586663070.1612337


In [42]:
for i in range(len(models)):
    model_name = cv_df.sort_values(by=['r2'], ascending=False).index[i]
    model = models[int(model_name[-3:])]
    results = pd.DataFrame({"Id": df_test["Id"], "SalePrice": model.predict(X_test).reshape((-1))})
    try:
        #val = np.sqrt(
        #    sklearn.metrics.mean_squared_error(
        #        np.log(true_labels[y_col]),
        #        preprocessor.y_scl.inverse_transform(results[y_col]),
        #    )
        #)
        val = np.sqrt(
            sklearn.metrics.mean_squared_error(
                true_labels[y_col],
                results[y_col],
            )
        )
        log.info(f"{i} RMSE of log: {val}")
    except ValueError as e:
        log.error(e)

2020-12-01 10:41:48,242 - __main__ - INFO - 0 RMSE of log: 0.11689443044113539
2020-12-01 10:41:48,829 - __main__ - INFO - 1 RMSE of log: 0.11894942358626381
2020-12-01 10:41:49,565 - __main__ - INFO - 2 RMSE of log: 0.11895881456535276
2020-12-01 10:41:50,051 - __main__ - INFO - 3 RMSE of log: 0.118205528235023
2020-12-01 10:41:50,339 - __main__ - INFO - 4 RMSE of log: 0.12084302183151668
2020-12-01 10:41:50,964 - __main__ - INFO - 5 RMSE of log: 0.12062346058280868
2020-12-01 10:41:51,433 - __main__ - INFO - 6 RMSE of log: 0.12138795979087635
2020-12-01 10:41:51,999 - __main__ - INFO - 7 RMSE of log: 0.12441062681036523
2020-12-01 10:41:52,613 - __main__ - INFO - 8 RMSE of log: 0.1244291528102259
2020-12-01 10:41:53,107 - __main__ - INFO - 9 RMSE of log: 0.12304104802578186
2020-12-01 10:41:53,356 - __main__ - INFO - 10 RMSE of log: 0.12663297790026323
2020-12-01 10:41:53,936 - __main__ - INFO - 11 RMSE of log: 0.12607632456370835
2020-12-01 10:41:54,658 - __main__ - INFO - 12 RMSE o

In [36]:
results

Unnamed: 0,Id,SalePrice
0,1461,-0.857330
1,1462,-0.074982
2,1463,0.152191
3,1464,0.407715
4,1465,0.548889
...,...,...
1454,2915,-1.662903
1455,2916,-1.854095
1456,2917,-0.197601
1457,2918,-0.817932
