# House Prices: Advanced Regression Techniques

In [292]:

import pandas as pd
import numpy as nop
import tensorflow as tf
from datetime import datetime

In [293]:
train = pd.read_csv('./data/train.csv')

In [294]:
test = pd.read_csv('./data/test.csv')

In [295]:
train.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,...,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,...,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,...,0,,,,0,12,2008,WD,Normal,250000


In [296]:
train.shape

(1460, 81)

In [297]:
train.dtypes

Id                 int64
MSSubClass         int64
MSZoning          object
LotFrontage      float64
LotArea            int64
                  ...   
MoSold             int64
YrSold             int64
SaleType          object
SaleCondition     object
SalePrice          int64
Length: 81, dtype: object

In [298]:
# Attributes which are mostly Null
mostly_nan = {x:y for x,y in dict(train.isnull().sum()).items() if y > 700}
mostly_nan

{'Alley': 1369, 'PoolQC': 1453, 'Fence': 1179, 'MiscFeature': 1406}

In [299]:
train = train.drop(mostly_nan.keys(), axis = 1)

In [300]:
have_nan = {x:y for x,y in dict(train.isnull().sum()).items() if y > 0}
have_nan

{'LotFrontage': 259,
 'MasVnrType': 8,
 'MasVnrArea': 8,
 'BsmtQual': 37,
 'BsmtCond': 37,
 'BsmtExposure': 38,
 'BsmtFinType1': 37,
 'BsmtFinType2': 38,
 'Electrical': 1,
 'FireplaceQu': 690,
 'GarageType': 81,
 'GarageYrBlt': 81,
 'GarageFinish': 81,
 'GarageQual': 81,
 'GarageCond': 81}

In [301]:
train['Electrical'].value_counts().index[0]

'SBrkr'

In [302]:
train[train['Electrical'].isnull()]

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,LotShape,LandContour,Utilities,LotConfig,...,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
1379,1380,80,RL,73.0,9735,Pave,Reg,Lvl,AllPub,Inside,...,0,0,0,0,0,5,2008,WD,Normal,167500


In [303]:
# Replace Nan with mode
for col,_ in have_nan.items():
    if train.dtypes[col] == 'object':
        mode = train[col].value_counts().index[0]
        train[col] = train[col].replace(np.nan, mode)
    else:
        median = train[col].median()
        train[col] = train[col].replace(np.nan, median)

In [304]:
{x:y for x,y in dict(train.isnull().sum()).items() if y > 0}

{}

In [305]:
categoricals = list(train.select_dtypes(include=['object']).columns)

In [306]:
from sklearn.preprocessing import LabelBinarizer
encoders = {}
to_drop = []
for col in categoricals:
    print(f'{col} = {train[col].unique()}')
    lb = LabelBinarizer()
    encoded = lb.fit_transform(train[col])
#     print(y.shape)
    if encoded.shape[1] > 1:
        ne = pd.DataFrame(encoded, columns=[f'{col}_{clas}' for clas in lb.classes_])
        to_drop.append(col)
    else:
        ne = pd.DataFrame(encoded, columns=[col])
    train = train.drop([col], axis = 1)
    train = pd.concat([train, ne], axis=1)
    encoders[col] = lb

MSZoning = ['RL' 'RM' 'C (all)' 'FV' 'RH']
Street = ['Pave' 'Grvl']
LotShape = ['Reg' 'IR1' 'IR2' 'IR3']
LandContour = ['Lvl' 'Bnk' 'Low' 'HLS']
Utilities = ['AllPub' 'NoSeWa']
LotConfig = ['Inside' 'FR2' 'Corner' 'CulDSac' 'FR3']
LandSlope = ['Gtl' 'Mod' 'Sev']
Neighborhood = ['CollgCr' 'Veenker' 'Crawfor' 'NoRidge' 'Mitchel' 'Somerst' 'NWAmes'
 'OldTown' 'BrkSide' 'Sawyer' 'NridgHt' 'NAmes' 'SawyerW' 'IDOTRR'
 'MeadowV' 'Edwards' 'Timber' 'Gilbert' 'StoneBr' 'ClearCr' 'NPkVill'
 'Blmngtn' 'BrDale' 'SWISU' 'Blueste']
Condition1 = ['Norm' 'Feedr' 'PosN' 'Artery' 'RRAe' 'RRNn' 'RRAn' 'PosA' 'RRNe']
Condition2 = ['Norm' 'Artery' 'RRNn' 'Feedr' 'PosN' 'PosA' 'RRAn' 'RRAe']
BldgType = ['1Fam' '2fmCon' 'Duplex' 'TwnhsE' 'Twnhs']
HouseStyle = ['2Story' '1Story' '1.5Fin' '1.5Unf' 'SFoyer' 'SLvl' '2.5Unf' '2.5Fin']
RoofStyle = ['Gable' 'Hip' 'Gambrel' 'Mansard' 'Flat' 'Shed']
RoofMatl = ['CompShg' 'WdShngl' 'Metal' 'WdShake' 'Membran' 'Tar&Grv' 'Roll'
 'ClyTile']
Exterior1st = ['VinylSd' 'Meta

In [307]:
train.head()

Unnamed: 0,Id,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,...,SaleType_ConLw,SaleType_New,SaleType_Oth,SaleType_WD,SaleCondition_Abnorml,SaleCondition_AdjLand,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial
0,1,60,65.0,8450,7,5,2003,2003,196.0,706,...,0,0,0,1,0,0,0,0,1,0
1,2,20,80.0,9600,6,8,1976,1976,0.0,978,...,0,0,0,1,0,0,0,0,1,0
2,3,60,68.0,11250,7,5,2001,2002,162.0,486,...,0,0,0,1,0,0,0,0,1,0
3,4,70,60.0,9550,7,5,1915,1970,0.0,216,...,0,0,0,1,1,0,0,0,0,0
4,5,60,84.0,14260,8,5,2000,2000,350.0,655,...,0,0,0,1,0,0,0,0,1,0


In [308]:
train.head()

Unnamed: 0,Id,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,...,SaleType_ConLw,SaleType_New,SaleType_Oth,SaleType_WD,SaleCondition_Abnorml,SaleCondition_AdjLand,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial
0,1,60,65.0,8450,7,5,2003,2003,196.0,706,...,0,0,0,1,0,0,0,0,1,0
1,2,20,80.0,9600,6,8,1976,1976,0.0,978,...,0,0,0,1,0,0,0,0,1,0
2,3,60,68.0,11250,7,5,2001,2002,162.0,486,...,0,0,0,1,0,0,0,0,1,0
3,4,70,60.0,9550,7,5,1915,1970,0.0,216,...,0,0,0,1,1,0,0,0,0,0
4,5,60,84.0,14260,8,5,2000,2000,350.0,655,...,0,0,0,1,0,0,0,0,1,0


In [309]:
train.dtypes.value_counts()

int32      236
int64       35
float64      3
dtype: int64

In [310]:
y = train['SalePrice']
X = train
X = train.drop(['SalePrice', 'Id'], axis = 1)

In [311]:
from sklearn.preprocessing import MinMaxScaler, StandardScaler
scalers = {}
for col in X:
    scale = StandardScaler()
    X[col] = scale.fit_transform(np.array(X[col]).reshape(-1, 1))
    scalers[col] = scale


In [147]:
# from sklearn.model_selection import train_test_split
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [148]:
from sklearn.linear_model import LinearRegression

In [149]:
lr_model = LinearRegression()

In [150]:
lr_model.fit(X_train, y_train)

LinearRegression()

In [151]:
pred = lr_model.predict(X_test)

In [152]:
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [153]:
mean_squared_error(y_test, pred)

1.329961960191654e+29

In [154]:
mean_absolute_error(y_test, pred)

50317047556976.33

In [312]:
test = test.drop(mostly_nan, axis = 1)

In [313]:
test_have_nan = {x:y for x,y in dict(test.isnull().sum()).items() if y > 0}
test_have_nan

{'MSZoning': 4,
 'LotFrontage': 227,
 'Utilities': 2,
 'Exterior1st': 1,
 'Exterior2nd': 1,
 'MasVnrType': 16,
 'MasVnrArea': 15,
 'BsmtQual': 44,
 'BsmtCond': 45,
 'BsmtExposure': 44,
 'BsmtFinType1': 42,
 'BsmtFinSF1': 1,
 'BsmtFinType2': 42,
 'BsmtFinSF2': 1,
 'BsmtUnfSF': 1,
 'TotalBsmtSF': 1,
 'BsmtFullBath': 2,
 'BsmtHalfBath': 2,
 'KitchenQual': 1,
 'Functional': 2,
 'FireplaceQu': 730,
 'GarageType': 76,
 'GarageYrBlt': 78,
 'GarageFinish': 78,
 'GarageCars': 1,
 'GarageArea': 1,
 'GarageQual': 78,
 'GarageCond': 78,
 'SaleType': 1}

In [314]:
# Replace Nan with mode
for col,_ in test_have_nan.items():
    mode = test[col].value_counts().index[0]
    test[col] = test[col].replace(np.nan, mode)

In [315]:
for col in categoricals:
#     print(f'{col} = {test[col].unique()}')

    encoded = encoders[col].transform(test[col])
#     print(y.shape)
    if encoded.shape[1] > 1:
        ne = pd.DataFrame(encoded, columns=[f'{col}_{clas}' for clas in encoders[col].classes_])
        to_drop.append(col)
    else:
        ne = pd.DataFrame(encoded, columns=[col])
    test = test.drop([col], axis = 1)
    test = pd.concat([test, ne], axis=1)

In [316]:
test.head()

Unnamed: 0,Id,MSSubClass,LotFrontage,LotArea,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,...,SaleType_ConLw,SaleType_New,SaleType_Oth,SaleType_WD,SaleCondition_Abnorml,SaleCondition_AdjLand,SaleCondition_Alloca,SaleCondition_Family,SaleCondition_Normal,SaleCondition_Partial
0,1461,20,80.0,11622,5,6,1961,1961,0.0,468.0,...,0,0,0,1,0,0,0,0,1,0
1,1462,20,81.0,14267,6,6,1958,1958,108.0,923.0,...,0,0,0,1,0,0,0,0,1,0
2,1463,60,74.0,13830,5,5,1997,1998,0.0,791.0,...,0,0,0,1,0,0,0,0,1,0
3,1464,60,78.0,9978,6,6,1998,1998,20.0,602.0,...,0,0,0,1,0,0,0,0,1,0
4,1465,120,43.0,5005,8,5,1992,1992,0.0,263.0,...,0,0,0,1,0,0,0,0,1,0


In [317]:
Id_col = test['Id']
X_test = test.drop(['Id'], axis = 1)

In [318]:
scalers

{'MSSubClass': StandardScaler(),
 'LotFrontage': StandardScaler(),
 'LotArea': StandardScaler(),
 'OverallQual': StandardScaler(),
 'OverallCond': StandardScaler(),
 'YearBuilt': StandardScaler(),
 'YearRemodAdd': StandardScaler(),
 'MasVnrArea': StandardScaler(),
 'BsmtFinSF1': StandardScaler(),
 'BsmtFinSF2': StandardScaler(),
 'BsmtUnfSF': StandardScaler(),
 'TotalBsmtSF': StandardScaler(),
 '1stFlrSF': StandardScaler(),
 '2ndFlrSF': StandardScaler(),
 'LowQualFinSF': StandardScaler(),
 'GrLivArea': StandardScaler(),
 'BsmtFullBath': StandardScaler(),
 'BsmtHalfBath': StandardScaler(),
 'FullBath': StandardScaler(),
 'HalfBath': StandardScaler(),
 'BedroomAbvGr': StandardScaler(),
 'KitchenAbvGr': StandardScaler(),
 'TotRmsAbvGrd': StandardScaler(),
 'Fireplaces': StandardScaler(),
 'GarageYrBlt': StandardScaler(),
 'GarageCars': StandardScaler(),
 'GarageArea': StandardScaler(),
 'WoodDeckSF': StandardScaler(),
 'OpenPorchSF': StandardScaler(),
 'EnclosedPorch': StandardScaler(),
 

In [319]:
from sklearn.preprocessing import MinMaxScaler
for col in X_test:
    X_test[col] = scalers[col].transform(np.array(X_test[col]).reshape(-1, 1))
     


In [580]:
my_pred = lr_model.predict(X_test)

In [581]:
my_pred

array([109392.66174316, 152254.56469727, 186848.2947998 , ...,
       180758.60296631, 105995.24700928, 224207.48864746])

In [582]:
pred_series = pd.Series(my_pred, name='SalePrice')

In [583]:
output = pd.concat([Id_col, pred_series], axis=1)

In [584]:
output.head()

Unnamed: 0,Id,SalePrice
0,1461,109392.661743
1,1462,152254.564697
2,1463,186848.2948
3,1464,192210.252869
4,1465,205223.612793


In [592]:
output.to_csv('./submissions/lr_kaggle_submssion.csv', index = False)

In [593]:
# Simple Linear Regression got score 0.85796 and 4989 position

In [594]:
from sklearn.ensemble import RandomForestRegressor

In [595]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [596]:
rf_model = RandomForestRegressor()

In [597]:
rf_model.fit(X, y)

RandomForestRegressor()

In [598]:
Id_col = test['Id']
X_kaggle_test = test.drop(['Id'], axis = 1)

In [599]:
my_pred = rf_model.predict(X_kaggle_test)
pred_series = pd.Series(my_pred, name='SalePrice')
output = pd.concat([Id_col, pred_series], axis=1)
output.to_csv('rf_kaggle_submssion.csv', index = False)

In [600]:
# Random Forest Regressor got score 0.14668 and 2965 position

In [601]:
from sklearn.ensemble import GradientBoostingRegressor

In [602]:
gb_model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1,max_depth=1, random_state=0, loss='ls')

In [603]:
gb_model.fit(X, y)

GradientBoostingRegressor(max_depth=1, random_state=0)

In [604]:
my_pred = gb_model.predict(X_kaggle_test)
pred_series = pd.Series(my_pred, name='SalePrice')
output = pd.concat([Id_col, pred_series], axis=1)
output.to_csv('./submissions/gb_kaggle_submssion.csv', index = False)

In [605]:
 # GradientBoosting Regressor got score 0.17547

In [606]:
from sklearn.neural_network import MLPRegressor

In [607]:
mlp_model = MLPRegressor(random_state=1, max_iter=5000)

In [608]:
mlp_model.fit(X, y)

MLPRegressor(max_iter=5000, random_state=1)

In [609]:
my_pred = mlp_model.predict(X_kaggle_test)
pred_series = pd.Series(my_pred, name='SalePrice')
output = pd.concat([Id_col, pred_series], axis=1)
output.to_csv('./submissions/mlp_kaggle_submssion.csv', index = False)

In [610]:
#!pip3 install xgboost

In [226]:
import xgboost as xgb

In [227]:
xgb_model = xgb.XGBRegressor(objective="reg:squarederror", random_state=11196)

In [228]:
xgb_model.fit(X, y)

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
             importance_type='gain', interaction_constraints='',
             learning_rate=0.300000012, max_delta_step=0, max_depth=6,
             min_child_weight=1, missing=nan, monotone_constraints='()',
             n_estimators=100, n_jobs=0, num_parallel_tree=1,
             random_state=11196, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,
             subsample=1, tree_method='exact', validate_parameters=1,
             verbosity=None)

In [229]:
my_pred = xgb_model.predict(X_test)
pred_series = pd.Series(my_pred, name='SalePrice')
output = pd.concat([Id_col, pred_series], axis=1)
output.to_csv(f'./submissions/xgb_kaggle_submssion_{datetime.now().strftime("%d-%b-%Y_(%H-%M-%S)")}.csv', index = False)

In [399]:
# XGB Regressor got score 0.14274 and 2645 position

In [234]:
!pip install mlxtend

Collecting mlxtend
  Downloading mlxtend-0.17.3-py2.py3-none-any.whl (1.3 MB)
Installing collected packages: mlxtend
Successfully installed mlxtend-0.17.3


In [235]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import VotingRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.svm import SVR
from sklearn.linear_model import Ridge
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.ensemble import GradientBoostingRegressor
# from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from mlxtend.regressor import StackingCVRegressor
from sklearn.model_selection import KFold, cross_val_score


In [236]:
lasso = Lasso(alpha = 0.0005)
ridge = Ridge(alpha = 11.9)
elasticnet = ElasticNet(alpha = 0.001)
gbr = GradientBoostingRegressor(learning_rate = 0.1, loss = 'huber', max_depth = 1, max_features ='sqrt',
                                min_samples_leaf = 15, min_samples_split = 10, n_estimators =3000)
# lightgbm = LGBMRegressor(learning_rate = 0.01, n_estimators = 5000, num_leaves = 4)
xgboost = XGBRegressor(learning_rate = 0.01, max_depth = 3, n_estimators = 3500)

In [237]:
scvr = StackingCVRegressor(regressors=(ridge, lasso, elasticnet, gbr, xgboost),
                                meta_regressor=xgboost,
                                use_features_in_secondary=True)

In [278]:
scvr.fit(X, y)

  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)


StackingCVRegressor(meta_regressor=XGBRegressor(base_score=None, booster=None,
                                                colsample_bylevel=None,
                                                colsample_bynode=None,
                                                colsample_bytree=None,
                                                gamma=None, gpu_id=None,
                                                importance_type='gain',
                                                interaction_constraints=None,
                                                learning_rate=0.01,
                                                max_delta_step=None,
                                                max_depth=3,
                                                min_child_weight=None,
                                                missing=nan,
                                                monotone_constraints=None,
                                                n_estimators=3500, n_...
      

In [279]:
my_pred = scvr.predict(X_test)
pred_series = pd.Series(my_pred, name='SalePrice')
output = pd.concat([Id_col, pred_series], axis=1)
output.to_csv(f'./submissions/scvr_kaggle_submssion_{datetime.now().strftime("%d-%b-%Y_(%H-%M-%S)")}.csv', index = False)

ValueError: feature_names mismatch: ['f0', 'f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9', 'f10', 'f11', 'f12', 'f13', 'f14', 'f15', 'f16', 'f17', 'f18', 'f19', 'f20', 'f21', 'f22', 'f23', 'f24', 'f25', 'f26', 'f27', 'f28', 'f29', 'f30', 'f31', 'f32', 'f33', 'f34', 'f35', 'f36', 'f37', 'f38', 'f39', 'f40', 'f41', 'f42', 'f43', 'f44', 'f45', 'f46', 'f47', 'f48', 'f49', 'f50', 'f51', 'f52', 'f53', 'f54', 'f55', 'f56', 'f57', 'f58', 'f59', 'f60', 'f61', 'f62', 'f63', 'f64', 'f65', 'f66', 'f67', 'f68', 'f69', 'f70', 'f71', 'f72', 'f73', 'f74', 'f75', 'f76', 'f77', 'f78', 'f79', 'f80', 'f81', 'f82', 'f83', 'f84', 'f85', 'f86', 'f87', 'f88', 'f89', 'f90', 'f91', 'f92', 'f93', 'f94', 'f95', 'f96', 'f97', 'f98', 'f99', 'f100', 'f101', 'f102', 'f103', 'f104', 'f105', 'f106', 'f107', 'f108', 'f109', 'f110', 'f111', 'f112', 'f113', 'f114', 'f115', 'f116', 'f117', 'f118', 'f119', 'f120', 'f121', 'f122', 'f123', 'f124', 'f125', 'f126', 'f127', 'f128', 'f129', 'f130', 'f131', 'f132', 'f133', 'f134', 'f135', 'f136', 'f137', 'f138', 'f139', 'f140', 'f141', 'f142', 'f143', 'f144', 'f145', 'f146', 'f147', 'f148', 'f149', 'f150', 'f151', 'f152', 'f153', 'f154', 'f155', 'f156', 'f157', 'f158', 'f159', 'f160', 'f161', 'f162', 'f163', 'f164', 'f165', 'f166', 'f167', 'f168', 'f169', 'f170', 'f171', 'f172', 'f173', 'f174', 'f175', 'f176', 'f177', 'f178', 'f179', 'f180', 'f181', 'f182', 'f183', 'f184', 'f185', 'f186', 'f187', 'f188', 'f189', 'f190', 'f191', 'f192', 'f193', 'f194', 'f195', 'f196', 'f197', 'f198', 'f199', 'f200', 'f201', 'f202', 'f203', 'f204', 'f205', 'f206', 'f207', 'f208', 'f209', 'f210', 'f211', 'f212', 'f213', 'f214', 'f215', 'f216', 'f217', 'f218', 'f219', 'f220', 'f221', 'f222', 'f223', 'f224', 'f225', 'f226', 'f227', 'f228', 'f229', 'f230', 'f231', 'f232', 'f233', 'f234', 'f235', 'f236', 'f237', 'f238', 'f239', 'f240', 'f241', 'f242', 'f243', 'f244', 'f245', 'f246', 'f247', 'f248', 'f249', 'f250', 'f251', 'f252', 'f253', 'f254', 'f255', 'f256', 'f257', 'f258', 'f259', 'f260', 'f261', 'f262', 'f263', 'f264', 'f265', 'f266', 'f267', 'f268', 'f269', 'f270', 'f271'] ['MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 'YearRemodAdd', 'MasVnrArea', 'BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF', 'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath', 'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'TotRmsAbvGrd', 'Fireplaces', 'GarageYrBlt', 'GarageCars', 'GarageArea', 'WoodDeckSF', 'OpenPorchSF', 'EnclosedPorch', '3SsnPorch', 'ScreenPorch', 'PoolArea', 'MiscVal', 'MoSold', 'YrSold', 'MSZoning_C (all)', 'MSZoning_FV', 'MSZoning_RH', 'MSZoning_RL', 'MSZoning_RM', 'Street', 'LotShape_IR1', 'LotShape_IR2', 'LotShape_IR3', 'LotShape_Reg', 'LandContour_Bnk', 'LandContour_HLS', 'LandContour_Low', 'LandContour_Lvl', 'Utilities', 'LotConfig_Corner', 'LotConfig_CulDSac', 'LotConfig_FR2', 'LotConfig_FR3', 'LotConfig_Inside', 'LandSlope_Gtl', 'LandSlope_Mod', 'LandSlope_Sev', 'Neighborhood_Blmngtn', 'Neighborhood_Blueste', 'Neighborhood_BrDale', 'Neighborhood_BrkSide', 'Neighborhood_ClearCr', 'Neighborhood_CollgCr', 'Neighborhood_Crawfor', 'Neighborhood_Edwards', 'Neighborhood_Gilbert', 'Neighborhood_IDOTRR', 'Neighborhood_MeadowV', 'Neighborhood_Mitchel', 'Neighborhood_NAmes', 'Neighborhood_NPkVill', 'Neighborhood_NWAmes', 'Neighborhood_NoRidge', 'Neighborhood_NridgHt', 'Neighborhood_OldTown', 'Neighborhood_SWISU', 'Neighborhood_Sawyer', 'Neighborhood_SawyerW', 'Neighborhood_Somerst', 'Neighborhood_StoneBr', 'Neighborhood_Timber', 'Neighborhood_Veenker', 'Condition1_Artery', 'Condition1_Feedr', 'Condition1_Norm', 'Condition1_PosA', 'Condition1_PosN', 'Condition1_RRAe', 'Condition1_RRAn', 'Condition1_RRNe', 'Condition1_RRNn', 'Condition2_Artery', 'Condition2_Feedr', 'Condition2_Norm', 'Condition2_PosA', 'Condition2_PosN', 'Condition2_RRAe', 'Condition2_RRAn', 'Condition2_RRNn', 'BldgType_1Fam', 'BldgType_2fmCon', 'BldgType_Duplex', 'BldgType_Twnhs', 'BldgType_TwnhsE', 'HouseStyle_1.5Fin', 'HouseStyle_1.5Unf', 'HouseStyle_1Story', 'HouseStyle_2.5Fin', 'HouseStyle_2.5Unf', 'HouseStyle_2Story', 'HouseStyle_SFoyer', 'HouseStyle_SLvl', 'RoofStyle_Flat', 'RoofStyle_Gable', 'RoofStyle_Gambrel', 'RoofStyle_Hip', 'RoofStyle_Mansard', 'RoofStyle_Shed', 'RoofMatl_ClyTile', 'RoofMatl_CompShg', 'RoofMatl_Membran', 'RoofMatl_Metal', 'RoofMatl_Roll', 'RoofMatl_Tar&Grv', 'RoofMatl_WdShake', 'RoofMatl_WdShngl', 'Exterior1st_AsbShng', 'Exterior1st_AsphShn', 'Exterior1st_BrkComm', 'Exterior1st_BrkFace', 'Exterior1st_CBlock', 'Exterior1st_CemntBd', 'Exterior1st_HdBoard', 'Exterior1st_ImStucc', 'Exterior1st_MetalSd', 'Exterior1st_Plywood', 'Exterior1st_Stone', 'Exterior1st_Stucco', 'Exterior1st_VinylSd', 'Exterior1st_Wd Sdng', 'Exterior1st_WdShing', 'Exterior2nd_AsbShng', 'Exterior2nd_AsphShn', 'Exterior2nd_Brk Cmn', 'Exterior2nd_BrkFace', 'Exterior2nd_CBlock', 'Exterior2nd_CmentBd', 'Exterior2nd_HdBoard', 'Exterior2nd_ImStucc', 'Exterior2nd_MetalSd', 'Exterior2nd_Other', 'Exterior2nd_Plywood', 'Exterior2nd_Stone', 'Exterior2nd_Stucco', 'Exterior2nd_VinylSd', 'Exterior2nd_Wd Sdng', 'Exterior2nd_Wd Shng', 'MasVnrType_BrkCmn', 'MasVnrType_BrkFace', 'MasVnrType_None', 'MasVnrType_Stone', 'ExterQual_Ex', 'ExterQual_Fa', 'ExterQual_Gd', 'ExterQual_TA', 'ExterCond_Ex', 'ExterCond_Fa', 'ExterCond_Gd', 'ExterCond_Po', 'ExterCond_TA', 'Foundation_BrkTil', 'Foundation_CBlock', 'Foundation_PConc', 'Foundation_Slab', 'Foundation_Stone', 'Foundation_Wood', 'BsmtQual_Ex', 'BsmtQual_Fa', 'BsmtQual_Gd', 'BsmtQual_TA', 'BsmtCond_Fa', 'BsmtCond_Gd', 'BsmtCond_Po', 'BsmtCond_TA', 'BsmtExposure_Av', 'BsmtExposure_Gd', 'BsmtExposure_Mn', 'BsmtExposure_No', 'BsmtFinType1_ALQ', 'BsmtFinType1_BLQ', 'BsmtFinType1_GLQ', 'BsmtFinType1_LwQ', 'BsmtFinType1_Rec', 'BsmtFinType1_Unf', 'BsmtFinType2_ALQ', 'BsmtFinType2_BLQ', 'BsmtFinType2_GLQ', 'BsmtFinType2_LwQ', 'BsmtFinType2_Rec', 'BsmtFinType2_Unf', 'Heating_Floor', 'Heating_GasA', 'Heating_GasW', 'Heating_Grav', 'Heating_OthW', 'Heating_Wall', 'HeatingQC_Ex', 'HeatingQC_Fa', 'HeatingQC_Gd', 'HeatingQC_Po', 'HeatingQC_TA', 'CentralAir', 'Electrical_FuseA', 'Electrical_FuseF', 'Electrical_FuseP', 'Electrical_Mix', 'Electrical_SBrkr', 'KitchenQual_Ex', 'KitchenQual_Fa', 'KitchenQual_Gd', 'KitchenQual_TA', 'Functional_Maj1', 'Functional_Maj2', 'Functional_Min1', 'Functional_Min2', 'Functional_Mod', 'Functional_Sev', 'Functional_Typ', 'FireplaceQu_Ex', 'FireplaceQu_Fa', 'FireplaceQu_Gd', 'FireplaceQu_Po', 'FireplaceQu_TA', 'GarageType_2Types', 'GarageType_Attchd', 'GarageType_Basment', 'GarageType_BuiltIn', 'GarageType_CarPort', 'GarageType_Detchd', 'GarageFinish_Fin', 'GarageFinish_RFn', 'GarageFinish_Unf', 'GarageQual_Ex', 'GarageQual_Fa', 'GarageQual_Gd', 'GarageQual_Po', 'GarageQual_TA', 'GarageCond_Ex', 'GarageCond_Fa', 'GarageCond_Gd', 'GarageCond_Po', 'GarageCond_TA', 'PavedDrive_N', 'PavedDrive_P', 'PavedDrive_Y', 'SaleType_COD', 'SaleType_CWD', 'SaleType_Con', 'SaleType_ConLD', 'SaleType_ConLI', 'SaleType_ConLw', 'SaleType_New', 'SaleType_Oth', 'SaleType_WD', 'SaleCondition_Abnorml', 'SaleCondition_AdjLand', 'SaleCondition_Alloca', 'SaleCondition_Family', 'SaleCondition_Normal', 'SaleCondition_Partial']
expected f123, f80, f115, f222, f58, f147, f103, f220, f244, f39, f143, f268, f10, f105, f251, f142, f226, f86, f132, f11, f175, f79, f27, f64, f154, f225, f241, f256, f126, f14, f130, f42, f20, f174, f179, f183, f178, f155, f16, f181, f47, f124, f163, f38, f167, f104, f83, f207, f201, f189, f200, f134, f107, f231, f269, f165, f169, f15, f239, f117, f49, f242, f233, f188, f92, f229, f195, f24, f259, f26, f13, f262, f60, f1, f101, f166, f40, f180, f216, f152, f265, f125, f62, f224, f5, f177, f227, f136, f230, f30, f29, f93, f18, f0, f33, f53, f76, f77, f50, f61, f87, f198, f212, f221, f234, f219, f232, f69, f194, f258, f254, f71, f112, f108, f6, f159, f148, f211, f98, f116, f95, f263, f91, f213, f110, f21, f135, f249, f144, f32, f140, f66, f7, f52, f48, f88, f99, f67, f247, f209, f36, f204, f63, f73, f75, f246, f46, f55, f146, f187, f151, f65, f133, f236, f4, f150, f57, f182, f2, f12, f45, f102, f173, f223, f170, f199, f245, f168, f54, f206, f3, f261, f192, f137, f23, f162, f202, f149, f59, f158, f74, f41, f160, f96, f252, f119, f266, f172, f81, f240, f97, f237, f9, f257, f8, f127, f37, f70, f68, f34, f56, f43, f250, f145, f141, f118, f153, f267, f109, f271, f157, f196, f84, f35, f120, f197, f208, f270, f205, f214, f19, f78, f94, f90, f122, f260, f106, f215, f51, f217, f253, f31, f85, f210, f185, f128, f156, f191, f17, f176, f186, f228, f255, f22, f89, f131, f28, f264, f139, f190, f111, f82, f121, f171, f44, f114, f235, f248, f184, f203, f113, f100, f238, f138, f129, f218, f243, f72, f25, f193, f161, f164 in input data
training data did not have the following fields: Heating_Grav, Condition1_RRAe, Exterior2nd_HdBoard, BedroomAbvGr, Neighborhood_SawyerW, Exterior2nd_CBlock, GarageCond_Fa, RoofStyle_Gambrel, Neighborhood_NoRidge, BsmtExposure_No, BsmtFullBath, BsmtFinType1_LwQ, Neighborhood_OldTown, RoofMatl_CompShg, BldgType_Twnhs, ExterQual_Ex, TotalBsmtSF, ExterCond_Fa, Functional_Sev, LotShape_IR1, FireplaceQu_Ex, Neighborhood_MeadowV, Condition1_RRNe, Exterior2nd_AsphShn, BsmtFinType1_GLQ, KitchenQual_TA, 3SsnPorch, BsmtFinType1_Unf, OpenPorchSF, MSSubClass, Condition2_PosN, Neighborhood_Sawyer, GarageArea, GarageQual_Ex, Neighborhood_BrkSide, Condition2_RRAe, Condition2_RRNn, Fireplaces, RoofMatl_Roll, Neighborhood_NPkVill, Exterior2nd_Other, LotShape_Reg, LotFrontage, GarageFinish_Unf, Neighborhood_Blueste, Exterior2nd_MetalSd, KitchenQual_Fa, Functional_Maj2, WoodDeckSF, LotConfig_Corner, Neighborhood_NWAmes, Condition2_RRAn, Exterior2nd_Wd Sdng, ExterCond_Gd, Foundation_Wood, BsmtQual_TA, BsmtCond_Fa, Condition1_RRNn, SaleCondition_AdjLand, KitchenAbvGr, RoofStyle_Flat, Exterior2nd_ImStucc, HouseStyle_1.5Fin, RoofStyle_Mansard, Foundation_PConc, MasVnrType_BrkCmn, GarageCars, LowQualFinSF, SaleType_New, GarageQual_Gd, BsmtCond_Gd, Exterior1st_WdShing, Exterior2nd_Stucco, BsmtFinSF1, Condition1_PosA, Exterior1st_AsphShn, ExterCond_TA, SaleType_CWD, LandSlope_Mod, BsmtExposure_Av, Utilities, MasVnrType_Stone, Neighborhood_StoneBr, Exterior2nd_Stone, Exterior2nd_VinylSd, GarageType_Attchd, EnclosedPorch, Exterior1st_Stucco, ExterQual_Fa, Exterior2nd_Wd Shng, Condition2_PosA, Condition2_Artery, MSZoning_RH, LandContour_Bnk, LotConfig_CulDSac, OverallCond, PavedDrive_Y, BsmtFinType2_LwQ, Exterior2nd_Plywood, ExterCond_Po, SaleType_ConLD, LotShape_IR2, LotShape_IR3, HouseStyle_2Story, Electrical_FuseA, FireplaceQu_TA, Exterior1st_ImStucc, RoofMatl_Metal, Heating_GasW, HouseStyle_SLvl, SaleType_COD, BsmtQual_Ex, SaleCondition_Partial, Exterior1st_Stone, HouseStyle_2.5Unf, PavedDrive_P, Neighborhood_Mitchel, BsmtQual_Fa, 1stFlrSF, BldgType_1Fam, LandContour_Lvl, RoofStyle_Gable, KitchenQual_Ex, Heating_Wall, Exterior1st_AsbShng, LandContour_HLS, Exterior1st_BrkFace, MasVnrArea, Neighborhood_Blmngtn, RoofStyle_Shed, GarageType_2Types, SaleCondition_Alloca, Neighborhood_NAmes, Condition1_Norm, Heating_GasA, GarageCond_Gd, Neighborhood_NridgHt, Exterior1st_VinylSd, SaleType_ConLw, LotConfig_FR2, Condition2_Feedr, ExterQual_Gd, BsmtExposure_Gd, BsmtFinType2_BLQ, SaleType_Oth, SaleType_WD, Neighborhood_IDOTRR, Exterior1st_MetalSd, BsmtExposure_Mn, RoofStyle_Hip, Condition1_Feedr, Exterior2nd_AsbShng, Exterior2nd_BrkFace, Neighborhood_Somerst, LandSlope_Sev, Exterior1st_HdBoard, Exterior2nd_CmentBd, GarageType_CarPort, GarageFinish_RFn, 2ndFlrSF, BsmtFinType1_BLQ, SaleType_Con, Exterior2nd_Brk Cmn, Exterior1st_Plywood, HeatingQC_Fa, TotRmsAbvGrd, RoofMatl_Tar&Grv, GarageYrBlt, Neighborhood_Edwards, CentralAir, BsmtQual_Gd, Exterior1st_Wd Sdng, Condition1_RRAn, FireplaceQu_Fa, HeatingQC_Ex, FireplaceQu_Gd, OverallQual, Foundation_Slab, MSZoning_C (all), BldgType_Duplex, Foundation_CBlock, BsmtFinType1_Rec, LotConfig_Inside, BsmtFinType2_Unf, ScreenPorch, Electrical_FuseP, Condition2_Norm, RoofMatl_ClyTile, Electrical_Mix, ExterQual_TA, YearRemodAdd, KitchenQual_Gd, Functional_Min2, GarageQual_Fa, MasVnrType_BrkFace, PavedDrive_N, Street, BsmtFinType2_Rec, HouseStyle_1.5Unf, FullBath, LotConfig_FR3, BsmtCond_Po, MSZoning_FV, LotArea, Functional_Min1, Functional_Mod, SaleCondition_Abnorml, MoSold, SaleCondition_Normal, Foundation_BrkTil, YearBuilt, HalfBath, Neighborhood_SWISU, BsmtCond_TA, SaleType_ConLI, HouseStyle_SFoyer, BsmtFinType2_ALQ, Heating_OthW, HeatingQC_TA, GarageCond_Po, MiscVal, ExterCond_Ex, MSZoning_RM, Neighborhood_ClearCr, Neighborhood_Gilbert, HouseStyle_1Story, GarageCond_Ex, HeatingQC_Po, PoolArea, BsmtHalfBath, Neighborhood_CollgCr, Neighborhood_Timber, Exterior1st_BrkComm, Condition1_PosN, Exterior1st_CemntBd, BsmtFinType1_ALQ, Electrical_SBrkr, LandContour_Low, Neighborhood_Veenker, Electrical_FuseF, GarageType_Basment, HeatingQC_Gd, Exterior1st_CBlock, GrLivArea, Neighborhood_BrDale, GarageCond_TA, YrSold, BsmtFinType2_GLQ, Foundation_Stone, HouseStyle_2.5Fin, RoofMatl_WdShake, GarageQual_Po, BsmtFinSF2, GarageType_Detchd, SaleCondition_Family, RoofMatl_Membran, MSZoning_RL, LandSlope_Gtl, GarageQual_TA, RoofMatl_WdShngl, MasVnrType_None, BsmtUnfSF, Heating_Floor, Condition1_Artery, GarageFinish_Fin, Functional_Maj1, Neighborhood_Crawfor, GarageType_BuiltIn, Functional_Typ, BldgType_2fmCon, FireplaceQu_Po, BldgType_TwnhsE

In [37]:
!pip install git+https://github.com/tensorflow/docs

Collecting git+https://github.com/tensorflow/docs
  Cloning https://github.com/tensorflow/docs to c:\users\hassa\appdata\local\temp\pip-req-build-f9yi3uxp
Collecting astor
  Downloading astor-0.8.1-py2.py3-none-any.whl (27 kB)
Building wheels for collected packages: tensorflow-docs
  Building wheel for tensorflow-docs (setup.py): started
  Building wheel for tensorflow-docs (setup.py): finished with status 'done'
  Created wheel for tensorflow-docs: filename=tensorflow_docs-0.0.0ddfe55f6903a44ad2e7a214f7578f6d349ef4a8a_-py3-none-any.whl size=111063 sha256=b03f318820f7cc78cec6ff8cab78c4d5beba0c0311ecc3b91693697fad7e372c
  Stored in directory: C:\Users\hassa\AppData\Local\Temp\pip-ephem-wheel-cache-83xa9v7f\wheels\cc\c4\d8\5341e93b6376c5c929c49469fce21155eb69cef1a4da4ce32c
Successfully built tensorflow-docs
Installing collected packages: astor, tensorflow-docs
Successfully installed astor-0.8.1 tensorflow-docs-0.0.0ddfe55f6903a44ad2e7a214f7578f6d349ef4a8a-


  Running command git clone -q https://github.com/tensorflow/docs 'C:\Users\hassa\AppData\Local\Temp\pip-req-build-f9yi3uxp'


In [91]:
%load_ext tensorboard

In [183]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout
import tensorflow_docs as tfdocs
import tensorflow_docs.plots
import tensorflow_docs.modeling

In [192]:
model = Sequential()

In [None]:
dense_layer = Dense(5)

In [322]:
def build_model():
  model = Sequential([
    Dense(128, activation='relu'),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dense(1)
  ])

  optimizer = tf.keras.optimizers.Adam()

  model.compile(loss='msle',
                optimizer=optimizer,
                metrics=['mae', 'mse', 'msle'])
  return model

In [323]:
x_input = tf.constant([[1,2.]], shape=(1,2))

In [324]:
model = build_model()

In [325]:
EPOCHS = 1000
log_dir = "logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")
history = model.fit(
  X, y,
  epochs=EPOCHS, validation_split = 0.2, verbose=0,
  callbacks=[tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)])



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



In [None]:
# %tensorboard --logdir logs/fit

In [326]:
pred = model.predict(X_test).flatten()

In [327]:
pred

array([145048.55 , 166108.72 , 190419.4  , ..., 179819.02 , 110505.414,
       240974.47 ], dtype=float32)

In [328]:
pred_series = pd.Series(pred, name='SalePrice')
output = pd.concat([Id_col, pred_series], axis=1)
output.to_csv(f'./submissions/tf_kaggle_submssion_{datetime.now().strftime("%d-%b-%Y_(%H-%M-%S)")}.csv', index = False)