# House Prices Prediction

In [1]:
import numpy as np
import pandas as pd
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn import metrics
import seaborn as sns
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import RobustScaler
from sklearn.ensemble import IsolationForest
from sklearn.ensemble import StackingRegressor

from scipy.stats import boxcox
from scipy.special import boxcox1p

import xgboost as xgb
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.cross_decomposition import PLSRegression
from catboost import CatBoostRegressor
from vecstack import stacking
from sklearn import linear_model


from sklearn.model_selection import RandomizedSearchCV

/kaggle/input/house-prices-advanced-regression-techniques/sample_submission.csv
/kaggle/input/house-prices-advanced-regression-techniques/data_description.txt
/kaggle/input/house-prices-advanced-regression-techniques/train.csv
/kaggle/input/house-prices-advanced-regression-techniques/test.csv


## Helper Functions

In [2]:
def train_regressor(model):
    reg = model
    reg.fit(X_train, Y_train)
    y_pred_reg= reg.predict(X_test)

    print(np.sqrt(metrics.mean_squared_error(np.log(y_test), np.log(y_pred_reg))))
    
def three_model(m1, m2, m3):
    model_1 = m1
    model_2 = m2
    model_3 = m3

    # training all the model on the training dataset
    model_1.fit(X_train, Y_train)
    model_2.fit(X_train, Y_train)
    model_3.fit(X_train, Y_train)

    # predicting the output on the validation dataset
    pred_1 = model_1.predict(X_test)
    pred_2 = model_2.predict(X_test)
    pred_3 = model_3.predict(X_test)
    
    # final prediction after averaging on the prediction of all 3 models
    pred_final = (pred_1+pred_2+pred_3)/3.0
    
    print(np.sqrt(metrics.mean_squared_error(np.log(y_test), np.log(pred_final))))
    
def stack_three_model(m1, m2, m3):
#     model_1 = m1
#     model_2 = m2
#     model_3 = m3
    
#     all_models = [model_1, model_2, model_3]
    
#     s_train, s_test = stacking(all_models, X_train, Y_train,
#                            X_test, regression=True, n_folds=4)
    
#     final_model = model_1
#     final_model.fit(s_train, y_train)

    estimators = [('model1', m1), ('model2', m2)]
    
    reg = StackingRegressor(estimators=estimators, final_estimator=m3)
    reg.fit(X_train, Y_train)
    pred_final = reg.predict(X_test)
    
    print(np.sqrt(metrics.mean_squared_error(np.log(y_test), np.log(pred_final))))
    
def get_output(model):
    reg = model
    reg.fit(x_train, y_train)
    y_pred_reg_final= reg.predict(test)
    
    output = pd.DataFrame({"Id": test['Id'],"SalePrice": y_pred_reg_final})
    
    output.to_csv("submission.csv", index=False)
    
def get_output_three_model(m1, m2, m3):
    model_1 = m1
    model_2 = m2
    model_3 = m3

    # training all the model on the training dataset
    model_1.fit(x_train, y_train)
    model_2.fit(x_train, y_train)
    model_3.fit(x_train, y_train)

    # predicting the output on the validation dataset
    pred_1 = model_1.predict(test)
    pred_2 = model_2.predict(test)
    pred_3 = model_3.predict(test)

    # final prediction after averaging on the prediction of all 3 models
    pred_final = (pred_1+pred_2+pred_3)/3.0
    
    output = pd.DataFrame({"Id": Id,"SalePrice": pred_final})
    output.to_csv("submission.csv", index=False)

In [3]:
le = LabelEncoder()
def labelencode(table):
    for col in table.columns:
        if table[col].dtypes == object:
            table[col]= le.fit_transform(table[col])
    return table

In [4]:
def missing (df1):
    missing_number = df1.isnull().sum().sort_values(ascending=False)
    missing_percent = ((df1.isnull().sum()/df1.isnull().count())*100).sort_values(ascending=False)
    missing_values = pd.concat([missing_number, missing_percent], axis=1, keys=['Missing_Number', 'Missing_Percent'])
    
    return missing_values
    
def dropcol(df1):
    for col in df1.columns:
        if df1[col].isnull().mean()*100>40:
            df1.drop(col,axis=1,inplace=True)
            test.drop(col,axis=1,inplace=True)

In [5]:
def impute_knn(df):
    ttn = df.select_dtypes(include=[np.number])
    ttc = df.select_dtypes(exclude=[np.number])

    cols_nan = ttn.columns[ttn.isna().any()].tolist()         # columns w/ nan 
    cols_no_nan = ttn.columns.difference(cols_nan).values     # columns w/n nan

    for col in cols_nan:
        imp_test = ttn[ttn[col].isna()]   # indicies which have missing data will become our test set
        imp_train = ttn.dropna()          # all indicies which which have no missing data 
        model = KNeighborsRegressor(n_neighbors=5)  # KNR Unsupervised Approach
        knr = model.fit(imp_train[cols_no_nan], imp_train[col])
        ttn.loc[ttn[col].isna(), (col)] = knr.predict(imp_test[cols_no_nan])
    
    for col in df.columns:
        if col in ttn.columns:
            df[col] = ttn[col]
        elif col in ttc.columns:
            df[col] = ttc[col]
    
    return df

## Dataset

In [6]:
train = pd.read_csv("../input/house-prices-advanced-regression-techniques/train.csv")

test = pd.read_csv("../input/house-prices-advanced-regression-techniques/test.csv")

In [7]:
train.columns

Index(['Id', 'MSSubClass', 'MSZoning', 'LotFrontage', 'LotArea', 'Street',
       'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig',
       'LandSlope', 'Neighborhood', 'Condition1', 'Condition2', 'BldgType',
       'HouseStyle', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd',
       'RoofStyle', 'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType',
       'MasVnrArea', 'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual',
       'BsmtCond', 'BsmtExposure', 'BsmtFinType1', 'BsmtFinSF1',
       'BsmtFinType2', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', 'Heating',
       'HeatingQC', 'CentralAir', 'Electrical', '1stFlrSF', '2ndFlrSF',
       'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath',
       'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'KitchenQual',
       'TotRmsAbvGrd', 'Functional', 'Fireplaces', 'FireplaceQu', 'GarageType',
       'GarageYrBlt', 'GarageFinish', 'GarageCars', 'GarageArea', 'GarageQual',
       'GarageCond', 'PavedDrive

In [8]:
missing(train)

Unnamed: 0,Missing_Number,Missing_Percent
PoolQC,1453,99.520548
MiscFeature,1406,96.301370
Alley,1369,93.767123
Fence,1179,80.753425
FireplaceQu,690,47.260274
...,...,...
ExterQual,0,0.000000
Exterior2nd,0,0.000000
Exterior1st,0,0.000000
RoofMatl,0,0.000000


In [9]:
pd.set_option('display.max_columns', None)
# train.head(10)

In [10]:
cols_outliers = ['LotArea', 'GrLivArea']

## Pre Processing

In [11]:
dropcol(train)

dropcol(test)

In [12]:
pd.options.mode.chained_assignment = None

train = impute_knn(train)

test = impute_knn(test)

In [13]:
f = lambda x: x.median() if np.issubdtype(x.dtype, np.number) else x.mode().iloc[0]
train = train.fillna(train.groupby('YrSold').transform(f))

test = test.fillna(test.groupby('YrSold').transform(f))

In [14]:
# for col in cols_outliers:
#     Q1 = np.percentile(train[col], 25,
#                        interpolation = 'midpoint')
 
#     Q3 = np.percentile(train[col], 75,
#                        interpolation = 'midpoint')
#     IQR = Q3 - Q1


#     upper = np.where(train[col] >= (Q3+1.5*IQR))
#     lower = np.where(train[col] <= (Q1-1.5*IQR))

#     print(col, upper, lower)
#     print(train.iloc[upper[0]])




# for col in cols_outliers:
#     Q1 = np.percentile(train[col], 25,
#                        interpolation = 'midpoint')
 
#     Q3 = np.percentile(train[col], 75,
#                        interpolation = 'midpoint')
#     IQR = Q3 - Q1


#     upper = np.where(train[col] >= (Q3+1.5*IQR))
#     lower = np.where(train[col] <= (Q1-1.5*IQR))

#     print(col, upper, lower)
#     print(train.iloc[upper[0]])
#     train.drop(upper[0], inplace = True, axis = 0)
#     train.drop(lower[0], inplace = True, axis = 0)







# Q1 = np.percentile(train['LotArea'], 25,
#                    interpolation = 'midpoint')

# Q3 = np.percentile(train['LotArea'], 75,
#                    interpolation = 'midpoint')
# IQR = Q3 - Q1


# upper = np.where(train["LotArea"] >= (Q3+1.5*IQR))
# lower = np.where(train["LotArea"] <= (Q1-1.5*IQR))


# train.drop(upper[0], inplace = True)
# train.drop(lower[0], inplace = True)




for col in cols_outliers:
    Q1 = np.percentile(train[col], 25,
                       interpolation = 'midpoint')

    Q3 = np.percentile(train[col], 75,
                       interpolation = 'midpoint')
    IQR = Q3 - Q1


    upper = np.where(train[col] >= (Q3+1.5*IQR))
    lower = np.where(train[col] <= (Q1-1.5*IQR))


    train.drop(upper[0], inplace = True)
    train.drop(lower[0], inplace = True)
    train.reset_index(drop = True, inplace = True)

In [15]:
# num_feats = train.dtypes[train.dtypes != 'object'].index
# skew_feats = train[num_feats].skew().sort_values(ascending = False)
# skewness = pd.DataFrame({'Skew':skew_feats})
# skewness.head(60)

In [16]:
# train.hist(grid=False,
#        figsize=(100, 60),
#        bins=30)

In [17]:
skewed_cols = ['LotFrontage', 'LotArea', 'YearBuilt', '1stFlrSF', 'GrLivArea', 'GarageYrBlt']

In [18]:
# for col in skewed_cols:
#     fitted_data, fitted_lambda = boxcox(train[col])
#     train[col] = fitted_data


for col in skewed_cols:
    train[col] = boxcox1p(train[col], 0.15)

In [19]:
train = labelencode(train)

test = labelencode(test)

In [20]:
x_train = (train.iloc[:,:-1]) # all pixel values
y_train = train.iloc[:,-1] # only labels i.e targets digits
x_test = test
Id = test['Id']

In [21]:
y_train

0       208500
1       181500
2       223500
3       140000
4       250000
         ...  
1359    175000
1360    210000
1361    266500
1362    142125
1363    147500
Name: SalePrice, Length: 1364, dtype: int64

In [22]:
train

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,3,5.831328,19.212182,1,3,3,0,4,0,5,2,2,0,5,7,5,14.187527,2003,1,0,12,13,1,196.0,2,4,2,2,3,3,2,706,5,0,150,856,1,0,1,4,11.692623,854,0,13.698888,1,0,2,1,3,1,2,8,5,0,1,14.187527,1,2,548,4,4,2,0,61,0,0,0,0,0,2,2008,8,4,208500
1,2,20,3,6.221214,19.712205,1,3,3,0,2,0,24,1,2,0,2,6,8,14.145138,1976,1,0,8,8,2,0.0,3,4,1,2,3,1,0,978,5,0,284,1262,1,0,1,4,12.792276,0,0,12.792276,0,1,2,0,3,1,3,6,5,1,1,14.145138,1,2,460,4,4,2,298,0,0,0,0,0,0,5,2007,8,4,181500
2,3,60,3,5.914940,20.347241,1,0,3,0,4,0,5,2,2,0,5,7,5,14.184404,2002,1,0,12,13,1,162.0,2,4,2,2,3,2,2,486,5,0,434,920,1,0,1,4,11.892039,866,0,13.832085,1,0,2,1,3,1,2,6,5,1,1,14.184404,1,2,608,4,4,2,0,42,0,0,0,0,0,9,2008,8,4,223500
3,4,70,3,5.684507,19.691553,1,0,3,0,0,0,6,2,2,0,5,7,5,14.047529,1970,1,0,13,15,2,0.0,3,4,0,3,1,3,0,216,5,0,540,756,1,2,1,4,12.013683,756,0,13.711364,1,0,1,0,3,1,2,7,5,1,5,14.179714,2,3,642,4,4,2,0,35,272,0,0,0,0,2,2006,8,0,140000
4,5,60,3,6.314735,21.325160,1,0,3,0,2,0,15,2,2,0,5,8,5,14.182841,2000,1,0,12,13,1,350.0,2,4,2,2,3,0,2,655,5,0,490,1145,1,0,1,4,12.510588,1053,0,14.480029,1,0,2,1,4,1,2,9,5,1,1,14.182841,1,3,836,4,4,2,192,84,0,0,0,0,0,12,2008,8,4,250000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1359,1456,60,3,5.744420,18.960528,1,3,3,0,4,0,8,2,2,0,5,6,5,14.181278,2000,1,0,12,13,2,0.0,3,4,2,2,3,3,5,0,5,0,953,953,1,0,1,4,11.990298,694,0,13.584606,0,0,2,1,3,1,3,7,5,1,1,14.181278,1,2,460,4,4,2,0,40,0,0,0,0,0,8,2007,8,4,175000
1360,1457,20,3,6.337529,20.994868,1,3,3,0,4,0,14,2,2,0,2,6,6,14.148295,1988,1,0,9,10,3,119.0,3,4,1,2,3,3,0,790,4,163,589,1542,1,4,1,4,14.295205,0,0,14.295205,1,0,2,0,3,1,3,7,2,2,1,14.148295,2,2,500,4,4,2,349,0,0,0,0,0,0,2,2010,8,4,210000
1361,1458,70,3,5.859551,19.476345,1,3,3,0,4,0,6,2,2,0,5,7,9,14.089451,2006,1,0,5,5,2,0.0,0,2,4,3,1,3,2,275,5,0,877,1152,1,0,1,4,12.616840,1152,0,14.679454,0,0,2,0,4,1,2,9,5,2,1,14.089451,1,1,252,4,4,2,0,60,0,0,0,0,2500,5,2010,8,4,266500
1362,1459,20,3,5.914940,19.760176,1,3,3,0,4,0,12,2,2,0,2,5,6,14.103852,1996,3,0,8,8,2,0.0,3,4,1,3,3,2,2,49,4,1029,0,1078,1,2,1,0,12.338074,0,0,12.338074,1,0,1,0,2,1,2,5,5,0,1,14.103852,2,1,240,4,4,2,366,0,112,0,0,0,0,4,2010,8,4,142125


In [23]:
test

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SaleType,SaleCondition
0,1461,20,2,80.0,11622,1,3,3,0,4,0,12,1,2,0,2,5,6,1961,1961,1,0,10,12,2,0.0,3,4,1,3,3,3,4,468.0,3,144.0,270.0,882.0,0,4,1,3,896,0,0,896,0.0,0.0,1,0,2,1,3,5,6,0,1,1961.0,2,1.0,730.0,3,4,2,140,0,0,0,120,0,0,6,2010,8,4
1,1462,20,3,81.0,14267,1,0,3,0,0,0,12,2,2,0,2,6,6,1958,1958,3,0,11,13,1,108.0,3,4,1,3,3,3,0,923.0,5,0.0,406.0,1329.0,0,4,1,3,1329,0,0,1329,0.0,0.0,1,1,3,1,2,6,6,0,1,1958.0,2,1.0,312.0,3,4,2,393,36,0,0,0,0,12500,6,2010,8,4
2,1463,60,3,74.0,13830,1,0,3,0,4,0,8,2,2,0,4,5,5,1997,1998,1,0,10,12,2,0.0,3,4,2,2,3,3,2,791.0,5,0.0,137.0,928.0,0,2,1,3,928,701,0,1629,0.0,0.0,2,1,3,1,3,6,6,1,1,1997.0,0,2.0,482.0,3,4,2,212,34,0,0,0,0,0,3,2010,8,4
3,1464,60,3,78.0,9978,1,0,3,0,4,0,8,2,2,0,4,6,6,1998,1998,1,0,10,12,1,20.0,3,4,2,3,3,3,2,602.0,5,0.0,324.0,926.0,0,0,1,3,926,678,0,1604,0.0,0.0,2,1,3,1,2,7,6,1,1,1998.0,0,2.0,470.0,3,4,2,360,36,0,0,0,0,0,6,2010,8,4
4,1465,120,3,43.0,5005,1,0,1,0,4,0,22,2,2,4,2,8,5,1992,1992,1,0,6,6,2,0.0,2,4,2,2,3,3,0,263.0,5,0.0,1017.0,1280.0,0,0,1,3,1280,0,0,1280,0.0,0.0,2,0,2,1,2,5,6,0,1,1992.0,1,2.0,506.0,3,4,2,0,82,0,0,144,0,0,1,2010,8,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1454,2915,160,4,21.0,1936,1,3,3,0,4,0,10,2,2,3,4,4,7,1970,1970,1,0,5,5,2,0.0,3,4,1,3,3,3,5,0.0,5,0.0,546.0,546.0,0,2,1,3,546,546,0,1092,0.0,0.0,1,1,3,1,3,5,6,0,1,1972.2,2,0.0,0.0,3,4,2,0,0,0,0,0,0,0,6,2006,8,4
1455,2916,160,4,21.0,1894,1,3,3,0,4,0,10,2,2,4,4,4,5,1970,1970,1,0,5,5,2,0.0,3,4,1,3,3,3,4,252.0,5,0.0,294.0,546.0,0,4,1,3,546,546,0,1092,0.0,0.0,1,1,3,1,3,6,6,0,4,1970.0,2,1.0,286.0,3,4,2,0,24,0,0,0,0,0,4,2006,8,0
1456,2917,20,3,160.0,20000,1,3,3,0,4,0,11,2,2,0,2,5,7,1960,1996,1,0,10,12,2,0.0,3,4,1,3,3,3,0,1224.0,5,0.0,0.0,1224.0,0,0,1,3,1224,0,0,1224,1.0,0.0,1,0,4,1,3,7,6,1,5,1960.0,2,2.0,576.0,3,4,2,474,0,0,0,0,0,0,9,2006,8,0
1457,2918,85,3,62.0,10441,1,3,3,0,4,0,11,2,2,0,5,5,5,1992,1992,1,0,6,14,2,0.0,3,4,2,2,3,0,2,337.0,5,0.0,575.0,912.0,0,4,1,3,970,0,0,970,0.0,1.0,1,0,3,1,3,6,6,0,1,1967.4,2,0.0,0.0,3,4,2,80,32,0,0,0,0,700,7,2006,8,4


In [24]:
x_train

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SaleType,SaleCondition
0,1,60,3,5.831328,19.212182,1,3,3,0,4,0,5,2,2,0,5,7,5,14.187527,2003,1,0,12,13,1,196.0,2,4,2,2,3,3,2,706,5,0,150,856,1,0,1,4,11.692623,854,0,13.698888,1,0,2,1,3,1,2,8,5,0,1,14.187527,1,2,548,4,4,2,0,61,0,0,0,0,0,2,2008,8,4
1,2,20,3,6.221214,19.712205,1,3,3,0,2,0,24,1,2,0,2,6,8,14.145138,1976,1,0,8,8,2,0.0,3,4,1,2,3,1,0,978,5,0,284,1262,1,0,1,4,12.792276,0,0,12.792276,0,1,2,0,3,1,3,6,5,1,1,14.145138,1,2,460,4,4,2,298,0,0,0,0,0,0,5,2007,8,4
2,3,60,3,5.914940,20.347241,1,0,3,0,4,0,5,2,2,0,5,7,5,14.184404,2002,1,0,12,13,1,162.0,2,4,2,2,3,2,2,486,5,0,434,920,1,0,1,4,11.892039,866,0,13.832085,1,0,2,1,3,1,2,6,5,1,1,14.184404,1,2,608,4,4,2,0,42,0,0,0,0,0,9,2008,8,4
3,4,70,3,5.684507,19.691553,1,0,3,0,0,0,6,2,2,0,5,7,5,14.047529,1970,1,0,13,15,2,0.0,3,4,0,3,1,3,0,216,5,0,540,756,1,2,1,4,12.013683,756,0,13.711364,1,0,1,0,3,1,2,7,5,1,5,14.179714,2,3,642,4,4,2,0,35,272,0,0,0,0,2,2006,8,0
4,5,60,3,6.314735,21.325160,1,0,3,0,2,0,15,2,2,0,5,8,5,14.182841,2000,1,0,12,13,1,350.0,2,4,2,2,3,0,2,655,5,0,490,1145,1,0,1,4,12.510588,1053,0,14.480029,1,0,2,1,4,1,2,9,5,1,1,14.182841,1,3,836,4,4,2,192,84,0,0,0,0,0,12,2008,8,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1359,1456,60,3,5.744420,18.960528,1,3,3,0,4,0,8,2,2,0,5,6,5,14.181278,2000,1,0,12,13,2,0.0,3,4,2,2,3,3,5,0,5,0,953,953,1,0,1,4,11.990298,694,0,13.584606,0,0,2,1,3,1,3,7,5,1,1,14.181278,1,2,460,4,4,2,0,40,0,0,0,0,0,8,2007,8,4
1360,1457,20,3,6.337529,20.994868,1,3,3,0,4,0,14,2,2,0,2,6,6,14.148295,1988,1,0,9,10,3,119.0,3,4,1,2,3,3,0,790,4,163,589,1542,1,4,1,4,14.295205,0,0,14.295205,1,0,2,0,3,1,3,7,2,2,1,14.148295,2,2,500,4,4,2,349,0,0,0,0,0,0,2,2010,8,4
1361,1458,70,3,5.859551,19.476345,1,3,3,0,4,0,6,2,2,0,5,7,9,14.089451,2006,1,0,5,5,2,0.0,0,2,4,3,1,3,2,275,5,0,877,1152,1,0,1,4,12.616840,1152,0,14.679454,0,0,2,0,4,1,2,9,5,2,1,14.089451,1,1,252,4,4,2,0,60,0,0,0,0,2500,5,2010,8,4
1362,1459,20,3,5.914940,19.760176,1,3,3,0,4,0,12,2,2,0,2,5,6,14.103852,1996,3,0,8,8,2,0.0,3,4,1,3,3,2,2,49,4,1029,0,1078,1,2,1,0,12.338074,0,0,12.338074,1,0,1,0,2,1,2,5,5,0,1,14.103852,2,1,240,4,4,2,366,0,112,0,0,0,0,4,2010,8,4


In [25]:
train.columns

Index(['Id', 'MSSubClass', 'MSZoning', 'LotFrontage', 'LotArea', 'Street',
       'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope',
       'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle',
       'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'RoofStyle',
       'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'MasVnrArea',
       'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond',
       'BsmtExposure', 'BsmtFinType1', 'BsmtFinSF1', 'BsmtFinType2',
       'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', 'Heating', 'HeatingQC',
       'CentralAir', 'Electrical', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF',
       'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath',
       'BedroomAbvGr', 'KitchenAbvGr', 'KitchenQual', 'TotRmsAbvGrd',
       'Functional', 'Fireplaces', 'GarageType', 'GarageYrBlt', 'GarageFinish',
       'GarageCars', 'GarageArea', 'GarageQual', 'GarageCond', 'PavedDrive',
       'WoodDeckSF', 'OpenPo

In [26]:
x_train.columns

Index(['Id', 'MSSubClass', 'MSZoning', 'LotFrontage', 'LotArea', 'Street',
       'LotShape', 'LandContour', 'Utilities', 'LotConfig', 'LandSlope',
       'Neighborhood', 'Condition1', 'Condition2', 'BldgType', 'HouseStyle',
       'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'RoofStyle',
       'RoofMatl', 'Exterior1st', 'Exterior2nd', 'MasVnrType', 'MasVnrArea',
       'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond',
       'BsmtExposure', 'BsmtFinType1', 'BsmtFinSF1', 'BsmtFinType2',
       'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', 'Heating', 'HeatingQC',
       'CentralAir', 'Electrical', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF',
       'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath',
       'BedroomAbvGr', 'KitchenAbvGr', 'KitchenQual', 'TotRmsAbvGrd',
       'Functional', 'Fireplaces', 'GarageType', 'GarageYrBlt', 'GarageFinish',
       'GarageCars', 'GarageArea', 'GarageQual', 'GarageCond', 'PavedDrive',
       'WoodDeckSF', 'OpenPo

In [27]:
X_train, X_test, Y_train, y_test = train_test_split(x_train, y_train, test_size=0.3)

In [28]:
# scaler = MinMaxScaler()

# X_train = pd.DataFrame(scaler.fit_transform(X_train))

# X_test = pd.DataFrame(scaler.transform(X_test))

# x_test = pd.DataFrame(scaler.transform(x_test))

In [29]:
X_train.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SaleType,SaleCondition
688,737,90,3,5.684507,19.255157,1,3,3,0,4,0,12,2,2,2,2,3,4,14.103852,1950,1,0,11,11,2,0.0,3,4,1,2,3,3,5,0,5,0,0,0,1,2,0,1,12.236143,0,0,12.236143,0,0,2,0,2,2,3,6,5,0,5,14.102254,2,2,400,4,4,2,0,0,0,0,0,0,0,7,2006,8,4
88,92,20,3,6.337529,19.235091,1,3,3,0,4,0,12,2,2,0,2,5,3,14.121376,1961,3,0,6,6,0,203.0,3,4,1,3,3,3,4,600,5,0,635,1235,1,4,1,4,12.729304,0,0,12.729304,0,0,1,0,2,1,3,6,5,0,1,14.121376,2,2,480,4,4,2,0,0,0,0,0,0,0,12,2006,8,0
925,988,20,3,6.291711,19.93708,1,0,3,0,4,0,16,2,2,0,2,9,5,14.196881,2010,3,0,12,13,3,450.0,0,4,2,0,3,0,2,1646,5,0,284,1930,1,0,1,4,14.087847,0,0,14.087847,1,0,2,1,3,1,0,8,5,1,1,14.198437,0,3,606,4,4,2,168,95,0,0,0,0,0,4,2010,6,5
747,798,20,3,5.591427,18.842482,1,3,3,0,4,0,12,2,2,0,2,5,5,14.108639,1953,1,0,12,13,2,0.0,3,4,1,3,3,3,1,570,5,0,203,773,1,2,1,4,11.414227,0,0,11.414227,0,0,1,0,2,1,3,4,5,0,1,14.108639,2,1,240,4,4,2,0,0,0,0,0,0,0,4,2008,8,0
166,176,20,3,6.314735,20.815248,1,3,3,0,0,0,7,2,2,0,2,6,7,14.103852,2001,1,0,14,15,2,0.0,3,4,1,3,1,0,0,477,5,0,725,1202,1,4,1,4,14.421879,0,0,14.421879,1,0,2,0,4,1,2,7,5,1,1,14.103852,2,2,576,4,4,2,0,29,39,0,0,0,0,6,2007,8,4


In [30]:
X_test.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SaleType,SaleCondition
855,913,30,4,5.392276,17.989871,1,3,3,0,4,0,3,2,2,0,2,5,7,14.06371,1950,1,0,8,8,2,0.0,3,4,0,3,3,3,4,489,5,0,279,768,1,4,0,4,12.167343,0,0,12.167343,0,0,1,0,3,1,3,6,2,0,5,14.06371,2,1,450,4,4,2,0,0,112,0,120,0,620,7,2006,8,0
151,159,60,1,6.65495,20.794619,1,3,3,0,0,0,21,2,2,0,5,7,5,14.189087,2005,1,0,12,13,2,0.0,2,4,2,2,3,3,2,222,5,0,769,991,1,0,1,4,12.099929,956,0,14.099058,0,0,2,1,3,1,2,8,5,1,1,14.189087,1,2,678,4,4,2,0,136,0,0,0,0,0,5,2010,8,4
86,90,20,3,5.684507,19.034204,1,3,3,0,4,0,5,2,2,0,2,4,5,14.173451,1995,1,0,12,13,2,0.0,3,4,2,2,3,3,2,588,5,0,402,990,1,0,1,4,12.09709,0,0,12.09709,1,0,1,0,3,1,3,5,5,0,1,14.144506,2,0,0,4,4,2,0,0,0,0,0,0,0,8,2007,8,4
581,621,30,3,5.172535,19.11844,1,3,3,0,4,0,7,2,2,0,2,3,3,14.045907,1950,1,0,11,12,2,0.0,3,4,0,3,3,3,1,41,5,0,823,864,1,4,0,1,11.718228,0,0,11.718228,1,0,1,0,2,1,3,5,5,0,1,14.084317,2,0,0,4,4,0,0,0,100,0,0,0,0,9,2008,8,4
1250,1338,30,4,7.525113,16.567522,1,0,0,0,0,1,17,1,2,0,2,4,4,14.089451,1950,1,0,13,14,2,0.0,3,4,1,3,3,3,5,0,5,0,693,693,3,1,0,0,11.120741,0,0,11.120741,0,0,1,0,2,1,1,4,5,0,1,14.101615,2,0,0,4,4,0,0,20,0,0,0,0,0,3,2006,8,4


In [31]:
x_test.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SaleType,SaleCondition
0,1461,20,2,80.0,11622,1,3,3,0,4,0,12,1,2,0,2,5,6,1961,1961,1,0,10,12,2,0.0,3,4,1,3,3,3,4,468.0,3,144.0,270.0,882.0,0,4,1,3,896,0,0,896,0.0,0.0,1,0,2,1,3,5,6,0,1,1961.0,2,1.0,730.0,3,4,2,140,0,0,0,120,0,0,6,2010,8,4
1,1462,20,3,81.0,14267,1,0,3,0,0,0,12,2,2,0,2,6,6,1958,1958,3,0,11,13,1,108.0,3,4,1,3,3,3,0,923.0,5,0.0,406.0,1329.0,0,4,1,3,1329,0,0,1329,0.0,0.0,1,1,3,1,2,6,6,0,1,1958.0,2,1.0,312.0,3,4,2,393,36,0,0,0,0,12500,6,2010,8,4
2,1463,60,3,74.0,13830,1,0,3,0,4,0,8,2,2,0,4,5,5,1997,1998,1,0,10,12,2,0.0,3,4,2,2,3,3,2,791.0,5,0.0,137.0,928.0,0,2,1,3,928,701,0,1629,0.0,0.0,2,1,3,1,3,6,6,1,1,1997.0,0,2.0,482.0,3,4,2,212,34,0,0,0,0,0,3,2010,8,4
3,1464,60,3,78.0,9978,1,0,3,0,4,0,8,2,2,0,4,6,6,1998,1998,1,0,10,12,1,20.0,3,4,2,3,3,3,2,602.0,5,0.0,324.0,926.0,0,0,1,3,926,678,0,1604,0.0,0.0,2,1,3,1,2,7,6,1,1,1998.0,0,2.0,470.0,3,4,2,360,36,0,0,0,0,0,6,2010,8,4
4,1465,120,3,43.0,5005,1,0,1,0,4,0,22,2,2,4,2,8,5,1992,1992,1,0,6,6,2,0.0,2,4,2,2,3,3,0,263.0,5,0.0,1017.0,1280.0,0,0,1,3,1280,0,0,1280,0.0,0.0,2,0,2,1,2,5,6,0,1,1992.0,1,2.0,506.0,3,4,2,0,82,0,0,144,0,0,1,2010,8,4


In [32]:
# scaler = StandardScaler()

# X_train = scaler.fit_transform(X_train)
# X_test = scaler.transform(X_test)
# x_test = scaler.transform(x_test)




scaler = RobustScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
x_test = scaler.transform(x_test)

In [33]:
X_train = pd.DataFrame(X_train)
X_test = pd.DataFrame(X_test)
x_test = pd.DataFrame(x_test)

## Models Training

In [34]:
train_regressor(xgb.XGBRegressor(n_estimators = 1000))

0.13761650201245348


In [35]:
train_regressor(CatBoostRegressor(verbose = 0))

0.10969737057993961


In [36]:
train_regressor(LogisticRegression())

0.2538454565670532


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


In [37]:
train_regressor(svm.SVR())

0.3808813718338147


In [38]:
train_regressor(RandomForestRegressor(n_estimators = 250))

0.12703567451263822


In [39]:
train_regressor(RandomForestRegressor())

0.12593227087332115


In [40]:
train_regressor(DecisionTreeRegressor())

0.20948040141658844


In [41]:
train_regressor(ExtraTreesRegressor(n_estimators = 200))

0.127691142732992


In [42]:
train_regressor(PLSRegression(n_components = 2))

0.15991096512505615


In [43]:
train_regressor(linear_model.ARDRegression())

0.13657196958810147


In [44]:
three_model(CatBoostRegressor(verbose = 0), xgb.XGBRegressor(), linear_model.ARDRegression())

0.11401011642442456


In [45]:
three_model(CatBoostRegressor(verbose = 0), xgb.XGBRegressor(n_estimators = 1000), linear_model.ARDRegression())

0.11404096212364254


In [46]:
stack_three_model(linear_model.ARDRegression(n_iter = 300), xgb.XGBRegressor(n_estimators = 1000), ExtraTreesRegressor(n_estimators = 200))

0.13056532377735555


In [47]:
stack_three_model(ExtraTreesRegressor(n_estimators = 200), xgb.XGBRegressor(n_estimators = 1000), RandomForestRegressor(n_estimators = 250))

0.14173840884730693


In [48]:
three_model(xgb.XGBRegressor(), linear_model.ARDRegression(), linear_model.BayesianRidge())

0.12391295089603042


In [49]:
three_model(xgb.XGBRegressor(n_estimators = 1000), RandomForestRegressor(n_estimators = 250), ExtraTreesRegressor(n_estimators = 200))

0.12132227876506195


In [50]:
three_model(RandomForestRegressor(n_estimators = 250), xgb.XGBRegressor(n_estimators = 1000), ExtraTreesRegressor(n_estimators = 200))

0.12114714723703027


In [51]:
get_output_three_model(xgb.XGBRegressor(n_estimators = 1000), linear_model.ARDRegression(), ExtraTreesRegressor(n_estimators = 200))

In [52]:
# stack_three_model(xgb.XGBRegressor(), RandomForestRegressor(), ExtraTreesRegressor(n_estimators = 200))

In [53]:
train_regressor(linear_model.Ridge())

0.14200800409168662


In [54]:
train_regressor(linear_model.Lasso())

0.14261505582310718


  coef_, l1_reg, l2_reg, X, y, max_iter, tol, rng, random, positive


In [55]:
train_regressor(linear_model.BayesianRidge())

0.13737350438702595


In [56]:
# train_regressor(linear_model.SGDRegressor())

In [57]:
train_regressor(CatBoostRegressor(verbose = 0))

0.10969737057993961
