In [98]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.linear_model import LinearRegression, Lasso
from sklearn.feature_selection import SelectFromModel
from sklearn.model_selection import cross_val_score, train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, PolynomialFeatures, MinMaxScaler

pd.pandas.set_option('display.max_columns', None)

In [82]:
df = pd.read_csv('../data/train_dummyfixed.csv')
final_df = pd.read_csv('../data/test_dummyfixed.csv')

In [83]:
print(df.shape)
print(final_df.shape)

(2051, 207)
(878, 206)


In [84]:
X = df.drop(columns = ['SalePrice'])
y = df['SalePrice']

In [85]:
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state= 13)

In [86]:
feature_scale=[feature for feature in df.columns if feature not in ['Id', 'SalePrice']]

scaler=StandardScaler()
scaler.fit(X_train[feature_scale])
scaler.transform(X_train[feature_scale])

array([[-0.97596858, -0.86699674,  0.65075791, ...,  3.43753788,
        -0.13617285, -2.61666558],
       [-0.93912449,  0.06352733,  0.11859579, ..., -0.29090589,
        -0.13617285,  0.38216576],
       [-0.9830266 ,  1.45931342, -1.57256107, ..., -0.29090589,
        -0.13617285,  0.38216576],
       ...,
       [-0.9762123 , -0.86699674,  0.53595394, ..., -0.29090589,
        -0.13617285,  0.38216576],
       [-0.97617009, -0.86699674,  0.33442701, ..., -0.29090589,
        -0.13617285,  0.38216576],
       [-0.9760687 ,  0.06352733,  1.63649906, ...,  3.43753788,
        -0.13617285, -2.61666558]])

In [87]:
#transform the train and test set, and add on the Id and SalePrice variables
new_X_train = pd.concat([X_train[['Id']].reset_index(drop=True),
                    pd.DataFrame(scaler.transform(X_train[feature_scale]), columns=feature_scale)],
                    axis=1)
new_X_train.head(2)

Unnamed: 0,Id,PID,MS SubClass,Lot Frontage,Lot Area,Neighborhood,Overall Qual,Overall Cond,Year Built,Year Remod/Add,Mas Vnr Area,BsmtFin SF 1,BsmtFin SF 2,Bsmt Unf SF,Total Bsmt SF,1st Flr SF,2nd Flr SF,Low Qual Fin SF,Gr Liv Area,Bsmt Full Bath,Bsmt Half Bath,Full Bath,Half Bath,Bedroom AbvGr,Kitchen AbvGr,TotRms AbvGrd,Fireplaces,Garage Yr Blt,Garage Cars,Garage Area,Wood Deck SF,Open Porch SF,Enclosed Porch,3Ssn Porch,Screen Porch,Pool Area,Misc Val,Mo Sold,Yr Sold,Lot Frontage_nans,Mas Vnr Area_nans,BsmtFin SF 1_nans,BsmtFin SF 2_nans,Bsmt Unf SF_nans,Total Bsmt SF_nans,Bsmt Full Bath_nans,Bsmt Half Bath_nans,Garage Yr Blt_nans,Garage Cars_nans,Garage Area_nans,basement_livable,Functional_Num,ExterCond_Num,ExterQual_Num,KitchenQual_Num,neigh_score,MS Zoning_RH,MS Zoning_RL,MS Zoning_RM,MS Zoning_Rare_var,Street_Rare_var,Alley_Missing,Alley_Pave,Lot Shape_IR2,Lot Shape_Rare_var,Lot Shape_Reg,Land Contour_HLS,Land Contour_Low,Land Contour_Lvl,Utilities_Rare_var,Lot Config_CulDSac,Lot Config_FR2,Lot Config_Inside,Lot Config_Rare_var,Land Slope_Mod,Land Slope_Rare_var,Condition 1_Feedr,Condition 1_Norm,Condition 1_PosN,Condition 1_RRAe,Condition 1_RRAn,Condition 1_Rare_var,Condition 2_Rare_var,Bldg Type_2fmCon,Bldg Type_Duplex,Bldg Type_Twnhs,Bldg Type_TwnhsE,House Style_1Story,House Style_2.5Unf,House Style_2Story,House Style_Rare_var,House Style_SFoyer,House Style_SLvl,Roof Style_Gambrel,Roof Style_Hip,Roof Style_Rare_var,Roof Matl_Rare_var,Exterior 1st_BrkFace,Exterior 1st_CemntBd,Exterior 1st_HdBoard,Exterior 1st_MetalSd,Exterior 1st_Plywood,Exterior 1st_Rare_var,Exterior 1st_Stucco,Exterior 1st_VinylSd,Exterior 1st_Wd Sdng,Exterior 1st_WdShing,Exterior 2nd_BrkFace,Exterior 2nd_CmentBd,Exterior 2nd_HdBoard,Exterior 2nd_MetalSd,Exterior 2nd_Plywood,Exterior 2nd_Rare_var,Exterior 2nd_Stucco,Exterior 2nd_VinylSd,Exterior 2nd_Wd Sdng,Exterior 2nd_Wd Shng,Mas Vnr Type_BrkFace,Mas Vnr Type_Missing,Mas Vnr Type_None,Mas Vnr Type_Rare_var,Mas Vnr Type_Stone,Exter Qual_Fa,Exter Qual_Gd,Exter Qual_TA,Exter Cond_Gd,Exter Cond_Rare_var,Exter Cond_TA,Foundation_CBlock,Foundation_PConc,Foundation_Rare_var,Foundation_Slab,Bsmt Qual_Fa,Bsmt Qual_Gd,Bsmt Qual_Missing,Bsmt Qual_Rare_var,Bsmt Qual_TA,Bsmt Cond_Gd,Bsmt Cond_Missing,Bsmt Cond_Rare_var,Bsmt Cond_TA,Bsmt Exposure_Gd,Bsmt Exposure_Missing,Bsmt Exposure_Mn,Bsmt Exposure_No,BsmtFin Type 1_BLQ,BsmtFin Type 1_GLQ,BsmtFin Type 1_LwQ,BsmtFin Type 1_Missing,BsmtFin Type 1_Rec,BsmtFin Type 1_Unf,BsmtFin Type 2_BLQ,BsmtFin Type 2_GLQ,BsmtFin Type 2_LwQ,BsmtFin Type 2_Missing,BsmtFin Type 2_Rec,BsmtFin Type 2_Unf,Heating_Rare_var,Heating QC_Fa,Heating QC_Gd,Heating QC_Rare_var,Heating QC_TA,Central Air_Y,Electrical_FuseF,Electrical_Rare_var,Electrical_SBrkr,Kitchen Qual_Fa,Kitchen Qual_Gd,Kitchen Qual_Rare_var,Kitchen Qual_TA,Functional_Min2,Functional_Mod,Functional_Rare_var,Functional_Typ,Fireplace Qu_Fa,Fireplace Qu_Gd,Fireplace Qu_Missing,Fireplace Qu_Po,Fireplace Qu_TA,Garage Type_Basment,Garage Type_BuiltIn,Garage Type_Detchd,Garage Type_Missing,Garage Type_Rare_var,Garage Finish_Missing,Garage Finish_RFn,Garage Finish_Unf,Garage Qual_Missing,Garage Qual_Rare_var,Garage Qual_TA,Garage Cond_Missing,Garage Cond_Rare_var,Garage Cond_TA,Paved Drive_P,Paved Drive_Y,Pool QC_Rare_var,Fence_GdWo,Fence_Missing,Fence_MnPrv,Fence_Rare_var,Misc Feature_Rare_var,Misc Feature_Shed,Sale Type_ConLD,Sale Type_New,Sale Type_Rare_var,Sale Type_WD
0,2463,-0.975969,-0.866997,0.650758,0.446504,1.610711,1.336728,-0.509137,-1.838635,-1.691424,-0.564662,-0.974398,-0.282909,1.629873,0.895136,0.919014,-0.774401,-0.113022,0.141369,-0.796882,-0.25419,0.77064,-0.73946,0.177827,-0.218701,0.370288,0.616281,-1.871514,0.297304,1.087226,-0.717376,1.350072,-0.377201,-0.106175,-0.292606,-0.058807,-0.092266,-0.06665,-1.336909,-0.43318,-0.105721,-0.025507,-0.025507,-0.025507,-0.025507,-0.036084,-0.036084,-0.252238,-0.025507,-0.025507,-1.0279,0.206909,-0.204156,1.021237,0.734201,0.802776,0.0,-1.898629,-0.420405,-0.136173,-0.05711,0.263706,-0.167556,-0.175588,-0.07231,0.765224,-0.211747,-0.154816,0.335976,-0.025507,-0.266516,-0.173611,0.61292,-0.05711,-0.215078,-0.076722,-0.224825,0.389949,-0.105721,-0.095845,-0.133675,-0.099242,-0.108821,-0.1526,-0.206668,-0.196182,-0.290906,0.960469,0.0,-0.649185,-0.128544,-0.150354,-0.216727,0.0,-0.486142,-0.131133,-0.114783,-0.177546,-0.224825,-0.406388,-0.44372,-0.290906,-0.067618,-0.117657,1.365306,-0.388841,-0.145768,-0.125905,-0.224825,-0.387733,-0.436351,-0.32017,-0.143425,-0.117657,1.371186,-0.381048,-0.175588,-0.662365,-0.105721,0.818489,-0.07231,-0.294832,-0.117657,1.3931,-1.246923,-0.352518,-0.067618,0.400942,-0.858984,1.110054,-0.05711,-0.138629,-0.173611,1.15492,-0.171613,0.0,-0.863564,4.879186,-0.171613,-0.05711,-2.965835,-0.337173,-0.173611,-0.289589,0.730342,-0.331162,-0.64413,-0.221615,-0.171613,-0.321403,1.526108,-0.157004,-0.10253,-0.177546,-0.171613,-0.19258,0.41396,-0.131133,-0.188919,-0.44372,-0.036084,-0.65121,0.274817,-0.136173,-0.067618,0.327523,-0.157004,1.246923,0.0,-1.02502,-0.138629,-0.120465,-0.108821,0.25945,-0.179485,1.712747,-0.983235,-0.125905,-0.491247,-0.114783,-0.255141,-0.582856,-0.250776,-0.125905,-0.252238,1.562279,-0.827379,-0.252238,-0.105721,0.352518,-0.252238,-0.108821,0.32017,-0.143425,0.325084,-0.062582,-0.196182,0.48512,-0.352518,-0.07231,-0.062582,-0.177546,0.0,3.437538,-0.136173,-2.616666
1,2618,-0.939124,0.063527,0.118596,1.403726,-0.347608,0.630965,2.222443,0.468815,-0.555344,-0.564662,0.447578,-0.282909,0.649282,0.810941,1.667683,0.515689,-0.113022,1.690296,1.112159,-0.25419,2.596902,-0.73946,0.177827,-0.218701,1.652954,2.145057,0.729023,0.297304,0.040859,3.309099,0.789167,-0.377201,-0.106175,-0.292606,-0.058807,-0.092266,1.01617,-1.336909,2.30851,-0.105721,-0.025507,-0.025507,-0.025507,-0.025507,-0.036084,-0.036084,-0.252238,-0.025507,-0.025507,0.327921,0.206909,-0.204156,-0.686005,-0.769396,0.97513,0.0,0.526696,-0.420405,-0.136173,-0.05711,0.263706,-0.167556,-0.175588,-0.07231,-1.306808,4.722608,-0.154816,-2.976403,-0.025507,-0.266516,-0.173611,0.61292,-0.05711,-0.215078,-0.076722,-0.224825,0.389949,-0.105721,-0.095845,-0.133675,-0.099242,-0.108821,-0.1526,-0.206668,-0.196182,-0.290906,-1.041158,0.0,1.540392,-0.128544,-0.150354,-0.216727,0.0,-0.486142,-0.131133,-0.114783,-0.177546,-0.224825,-0.406388,-0.44372,3.437538,-0.067618,-0.117657,-0.732437,-0.388841,-0.145768,-0.125905,-0.224825,-0.387733,-0.436351,3.123339,-0.143425,-0.117657,-0.729296,-0.381048,-0.175588,-0.662365,-0.105721,0.818489,-0.07231,-0.294832,-0.117657,-0.717824,0.801974,-0.352518,-0.067618,0.400942,1.164166,-0.900857,-0.05711,-0.138629,-0.173611,-0.865861,-0.171613,0.0,1.157991,-0.204952,-0.171613,-0.05711,0.337173,2.965835,-0.173611,-0.289589,-1.369221,-0.331162,-0.64413,-0.221615,-0.171613,-0.321403,-0.655262,-0.157004,-0.10253,-0.177546,-0.171613,-0.19258,0.41396,-0.131133,-0.188919,-0.44372,-0.036084,1.535604,0.274817,-0.136173,-0.067618,0.327523,-0.157004,-0.801974,0.0,0.97559,-0.138629,-0.120465,-0.108821,0.25945,-0.179485,1.712747,-0.983235,-0.125905,-0.491247,-0.114783,-0.255141,-0.582856,-0.250776,-0.125905,-0.252238,1.562279,-0.827379,-0.252238,-0.105721,0.352518,-0.252238,-0.108821,0.32017,-0.143425,0.325084,-0.062582,-0.196182,0.48512,-0.352518,-0.07231,-0.062582,-0.177546,0.0,-0.290906,-0.136173,0.382166


In [88]:
new_X_test = pd.concat([X_test[['Id']].reset_index(drop=True),
                    pd.DataFrame(scaler.transform(X_test[feature_scale]), columns=feature_scale)],
                    axis=1)
new_X_test.head(2)

Unnamed: 0,Id,PID,MS SubClass,Lot Frontage,Lot Area,Neighborhood,Overall Qual,Overall Cond,Year Built,Year Remod/Add,Mas Vnr Area,BsmtFin SF 1,BsmtFin SF 2,Bsmt Unf SF,Total Bsmt SF,1st Flr SF,2nd Flr SF,Low Qual Fin SF,Gr Liv Area,Bsmt Full Bath,Bsmt Half Bath,Full Bath,Half Bath,Bedroom AbvGr,Kitchen AbvGr,TotRms AbvGrd,Fireplaces,Garage Yr Blt,Garage Cars,Garage Area,Wood Deck SF,Open Porch SF,Enclosed Porch,3Ssn Porch,Screen Porch,Pool Area,Misc Val,Mo Sold,Yr Sold,Lot Frontage_nans,Mas Vnr Area_nans,BsmtFin SF 1_nans,BsmtFin SF 2_nans,Bsmt Unf SF_nans,Total Bsmt SF_nans,Bsmt Full Bath_nans,Bsmt Half Bath_nans,Garage Yr Blt_nans,Garage Cars_nans,Garage Area_nans,basement_livable,Functional_Num,ExterCond_Num,ExterQual_Num,KitchenQual_Num,neigh_score,MS Zoning_RH,MS Zoning_RL,MS Zoning_RM,MS Zoning_Rare_var,Street_Rare_var,Alley_Missing,Alley_Pave,Lot Shape_IR2,Lot Shape_Rare_var,Lot Shape_Reg,Land Contour_HLS,Land Contour_Low,Land Contour_Lvl,Utilities_Rare_var,Lot Config_CulDSac,Lot Config_FR2,Lot Config_Inside,Lot Config_Rare_var,Land Slope_Mod,Land Slope_Rare_var,Condition 1_Feedr,Condition 1_Norm,Condition 1_PosN,Condition 1_RRAe,Condition 1_RRAn,Condition 1_Rare_var,Condition 2_Rare_var,Bldg Type_2fmCon,Bldg Type_Duplex,Bldg Type_Twnhs,Bldg Type_TwnhsE,House Style_1Story,House Style_2.5Unf,House Style_2Story,House Style_Rare_var,House Style_SFoyer,House Style_SLvl,Roof Style_Gambrel,Roof Style_Hip,Roof Style_Rare_var,Roof Matl_Rare_var,Exterior 1st_BrkFace,Exterior 1st_CemntBd,Exterior 1st_HdBoard,Exterior 1st_MetalSd,Exterior 1st_Plywood,Exterior 1st_Rare_var,Exterior 1st_Stucco,Exterior 1st_VinylSd,Exterior 1st_Wd Sdng,Exterior 1st_WdShing,Exterior 2nd_BrkFace,Exterior 2nd_CmentBd,Exterior 2nd_HdBoard,Exterior 2nd_MetalSd,Exterior 2nd_Plywood,Exterior 2nd_Rare_var,Exterior 2nd_Stucco,Exterior 2nd_VinylSd,Exterior 2nd_Wd Sdng,Exterior 2nd_Wd Shng,Mas Vnr Type_BrkFace,Mas Vnr Type_Missing,Mas Vnr Type_None,Mas Vnr Type_Rare_var,Mas Vnr Type_Stone,Exter Qual_Fa,Exter Qual_Gd,Exter Qual_TA,Exter Cond_Gd,Exter Cond_Rare_var,Exter Cond_TA,Foundation_CBlock,Foundation_PConc,Foundation_Rare_var,Foundation_Slab,Bsmt Qual_Fa,Bsmt Qual_Gd,Bsmt Qual_Missing,Bsmt Qual_Rare_var,Bsmt Qual_TA,Bsmt Cond_Gd,Bsmt Cond_Missing,Bsmt Cond_Rare_var,Bsmt Cond_TA,Bsmt Exposure_Gd,Bsmt Exposure_Missing,Bsmt Exposure_Mn,Bsmt Exposure_No,BsmtFin Type 1_BLQ,BsmtFin Type 1_GLQ,BsmtFin Type 1_LwQ,BsmtFin Type 1_Missing,BsmtFin Type 1_Rec,BsmtFin Type 1_Unf,BsmtFin Type 2_BLQ,BsmtFin Type 2_GLQ,BsmtFin Type 2_LwQ,BsmtFin Type 2_Missing,BsmtFin Type 2_Rec,BsmtFin Type 2_Unf,Heating_Rare_var,Heating QC_Fa,Heating QC_Gd,Heating QC_Rare_var,Heating QC_TA,Central Air_Y,Electrical_FuseF,Electrical_Rare_var,Electrical_SBrkr,Kitchen Qual_Fa,Kitchen Qual_Gd,Kitchen Qual_Rare_var,Kitchen Qual_TA,Functional_Min2,Functional_Mod,Functional_Rare_var,Functional_Typ,Fireplace Qu_Fa,Fireplace Qu_Gd,Fireplace Qu_Missing,Fireplace Qu_Po,Fireplace Qu_TA,Garage Type_Basment,Garage Type_BuiltIn,Garage Type_Detchd,Garage Type_Missing,Garage Type_Rare_var,Garage Finish_Missing,Garage Finish_RFn,Garage Finish_Unf,Garage Qual_Missing,Garage Qual_Rare_var,Garage Qual_TA,Garage Cond_Missing,Garage Cond_Rare_var,Garage Cond_TA,Paved Drive_P,Paved Drive_Y,Pool QC_Rare_var,Fence_GdWo,Fence_Missing,Fence_MnPrv,Fence_Rare_var,Misc Feature_Rare_var,Misc Feature_Shed,Sale Type_ConLD,Sale Type_New,Sale Type_Rare_var,Sale Type_WD
0,1012,-0.982677,0.063527,0.496703,0.443354,1.610711,2.042491,-0.509137,-1.236122,-1.289759,0.821004,-0.974398,-0.282909,1.143654,0.252069,0.082487,2.450823,-0.113022,1.760763,-0.796882,-0.25419,0.77064,1.24188,1.382665,-0.218701,2.93562,0.616281,-1.192471,1.59632,0.744741,0.397648,-0.140754,-0.377201,-0.106175,-0.292606,-0.058807,-0.092266,-0.06665,0.188437,-0.43318,-0.105721,-0.025507,-0.025507,-0.025507,-0.025507,-0.036084,-0.036084,-0.252238,-0.025507,-0.025507,-1.0279,0.206909,-0.204156,1.021237,2.237798,1.145982,0.0,0.526696,-0.420405,-0.136173,-0.05711,0.263706,-0.167556,-0.175588,-0.07231,-1.306808,-0.211747,-0.154816,0.335976,-0.025507,-0.266516,-0.173611,0.61292,-0.05711,-0.215078,-0.076722,-0.224825,0.389949,-0.105721,-0.095845,-0.133675,-0.099242,-0.108821,-0.1526,-0.206668,-0.196182,-0.290906,-1.041158,0.0,1.540392,-0.128544,-0.150354,-0.216727,0.0,2.057011,-0.131133,-0.114783,-0.177546,-0.224825,-0.406388,-0.44372,-0.290906,-0.067618,-0.117657,1.365306,-0.388841,-0.145768,-0.125905,-0.224825,-0.387733,-0.436351,-0.32017,-0.143425,-0.117657,1.371186,-0.381048,-0.175588,1.509741,-0.105721,-1.221764,-0.07231,-0.294832,-0.117657,1.3931,-1.246923,-0.352518,-0.067618,0.400942,-0.858984,1.110054,-0.05711,-0.138629,-0.173611,1.15492,-0.171613,0.0,-0.863564,4.879186,-0.171613,-0.05711,-2.965835,-0.337173,-0.173611,-0.289589,0.730342,-0.331162,-0.64413,-0.221615,-0.171613,-0.321403,1.526108,-0.157004,-0.10253,-0.177546,-0.171613,-0.19258,0.41396,-0.131133,-0.188919,-0.44372,-0.036084,-0.65121,0.274817,-0.136173,-0.067618,0.327523,-0.157004,-0.801974,0.0,-1.02502,-0.138629,-0.120465,-0.108821,0.25945,-0.179485,1.712747,-0.983235,-0.125905,-0.491247,-0.114783,3.919401,-0.582856,-0.250776,-0.125905,-0.252238,-0.640091,-0.827379,-0.252238,-0.105721,0.352518,-0.252238,-0.108821,0.32017,-0.143425,0.325084,-0.062582,-0.196182,0.48512,-0.352518,-0.07231,-0.062582,-0.177546,0.0,-0.290906,-0.136173,0.382166
1,2704,1.016955,-0.634366,-0.816755,-1.053162,-1.326768,-0.780561,-2.33019,1.071781,1.314362,-0.564662,-0.974398,-0.282909,0.696088,-0.339876,-0.699884,-0.774401,-0.113022,-1.500739,-0.796882,-0.25419,-1.055622,-0.73946,-1.02701,-0.218701,-1.55371,-0.912494,1.408575,-1.001712,-1.116421,-0.717376,0.877731,-0.377201,-0.106175,-0.292606,-0.058807,-0.092266,-1.871349,-1.336909,-0.43318,-0.105721,-0.025507,-0.025507,-0.025507,-0.025507,-0.036084,-0.036084,-0.252238,-0.025507,-0.025507,-1.0279,0.206909,-0.204156,-0.686005,-0.769396,-1.596988,0.0,0.526696,-0.420405,-0.136173,-0.05711,0.263706,-0.167556,-0.175588,-0.07231,-1.306808,-0.211747,-0.154816,0.335976,-0.025507,-0.266516,-0.173611,0.61292,-0.05711,-0.215078,-0.076722,-0.224825,0.389949,-0.105721,-0.095845,-0.133675,-0.099242,-0.108821,-0.1526,-0.206668,-0.196182,-0.290906,0.960469,0.0,-0.649185,-0.128544,-0.150354,-0.216727,0.0,-0.486142,-0.131133,-0.114783,-0.177546,-0.224825,-0.406388,-0.44372,-0.290906,-0.067618,-0.117657,-0.732437,2.571743,-0.145768,-0.125905,-0.224825,-0.387733,-0.436351,-0.32017,-0.143425,-0.117657,-0.729296,-0.381048,5.695154,-0.662365,-0.105721,0.818489,-0.07231,-0.294832,-0.117657,-0.717824,0.801974,-0.352518,-0.067618,0.400942,-0.858984,-0.900857,-0.05711,-0.138629,-0.173611,-0.865861,-0.171613,0.0,1.157991,-0.204952,-0.171613,-0.05711,0.337173,-0.337173,-0.173611,-0.289589,0.730342,-0.331162,-0.64413,-0.221615,-0.171613,-0.321403,1.526108,-0.157004,-0.10253,-0.177546,-0.171613,-0.19258,0.41396,-0.131133,-0.188919,2.253675,-0.036084,-0.65121,0.274817,-0.136173,-0.067618,0.327523,-0.157004,-0.801974,0.0,0.97559,-0.138629,-0.120465,-0.108821,0.25945,-0.179485,-0.583857,1.01705,-0.125905,-0.491247,-0.114783,-0.255141,1.715689,-0.250776,-0.125905,-0.252238,-0.640091,1.208636,-0.252238,-0.105721,0.352518,-0.252238,-0.108821,0.32017,6.972295,-3.076132,-0.062582,-0.196182,0.48512,-0.352518,-0.07231,-0.062582,-0.177546,0.0,-0.290906,-0.136173,0.382166


In [89]:
final_feature_scale = [feature for feature in final_df.columns if feature not in ['Id']]

scaler=StandardScaler()
scaler.fit(final_df[final_feature_scale])
scaler.transform(final_df[final_feature_scale])

array([[ 0.98523208,  3.12075338,  0.14013017, ..., -0.3122499 ,
        -0.11771496,  0.40362595],
       [ 1.0000993 ,  0.7517222 ,  0.09650228, ..., -0.3122499 ,
        -0.11771496,  0.40362595],
       [-0.99611193,  0.04101284, -0.3788558 , ...,  3.20256308,
        -0.11771496, -2.4775414 ],
       ...,
       [ 0.99476013, -0.90659963, -0.53757226, ..., -0.3122499 ,
        -0.11771496,  0.40362595],
       [-1.00031203, -0.90659963, -0.27754245, ..., -0.3122499 ,
        -0.11771496,  0.40362595],
       [-0.95845869, -0.90659963,  0.1831303 , ..., -0.3122499 ,
        -0.11771496,  0.40362595]])

In [90]:
#transform the train and test set, and add on the Id and SalePrice variables
new_final_df = pd.concat([final_df[['Id']].reset_index(drop=True),
                    pd.DataFrame(scaler.transform(final_df[final_feature_scale]), columns=final_feature_scale)],
                    axis=1)
new_final_df.head(2)

Unnamed: 0,Id,PID,MS SubClass,Lot Frontage,Lot Area,Neighborhood,Overall Qual,Overall Cond,Year Built,Year Remod/Add,Mas Vnr Area,BsmtFin SF 1,BsmtFin SF 2,Bsmt Unf SF,Total Bsmt SF,1st Flr SF,2nd Flr SF,Low Qual Fin SF,Gr Liv Area,Bsmt Full Bath,Bsmt Half Bath,Full Bath,Half Bath,Bedroom AbvGr,Kitchen AbvGr,TotRms AbvGrd,Fireplaces,Garage Yr Blt,Garage Cars,Garage Area,Wood Deck SF,Open Porch SF,Enclosed Porch,3Ssn Porch,Screen Porch,Pool Area,Misc Val,Mo Sold,Yr Sold,Lot Frontage_nans,Mas Vnr Area_nans,BsmtFin SF 1_nans,BsmtFin SF 2_nans,Bsmt Unf SF_nans,Total Bsmt SF_nans,Bsmt Full Bath_nans,Bsmt Half Bath_nans,Garage Yr Blt_nans,Garage Cars_nans,Garage Area_nans,basement_livable,Functional_Num,ExterCond_Num,ExterQual_Num,KitchenQual_Num,neigh_score,MS Zoning_RH,MS Zoning_RL,MS Zoning_RM,MS Zoning_Rare_var,Street_Rare_var,Alley_Missing,Alley_Pave,Lot Shape_IR2,Lot Shape_Rare_var,Lot Shape_Reg,Land Contour_HLS,Land Contour_Low,Land Contour_Lvl,Utilities_Rare_var,Lot Config_CulDSac,Lot Config_FR2,Lot Config_Inside,Lot Config_Rare_var,Land Slope_Mod,Land Slope_Rare_var,Condition 1_Feedr,Condition 1_Norm,Condition 1_PosN,Condition 1_RRAe,Condition 1_RRAn,Condition 1_Rare_var,Condition 2_Rare_var,Bldg Type_2fmCon,Bldg Type_Duplex,Bldg Type_Twnhs,Bldg Type_TwnhsE,House Style_1Story,House Style_2.5Unf,House Style_2Story,House Style_Rare_var,House Style_SFoyer,House Style_SLvl,Roof Style_Gambrel,Roof Style_Hip,Roof Style_Rare_var,Roof Matl_Rare_var,Exterior 1st_BrkFace,Exterior 1st_CemntBd,Exterior 1st_HdBoard,Exterior 1st_MetalSd,Exterior 1st_Plywood,Exterior 1st_Rare_var,Exterior 1st_Stucco,Exterior 1st_VinylSd,Exterior 1st_Wd Sdng,Exterior 1st_WdShing,Exterior 2nd_BrkFace,Exterior 2nd_CmentBd,Exterior 2nd_HdBoard,Exterior 2nd_MetalSd,Exterior 2nd_Plywood,Exterior 2nd_Rare_var,Exterior 2nd_Stucco,Exterior 2nd_VinylSd,Exterior 2nd_Wd Sdng,Exterior 2nd_Wd Shng,Mas Vnr Type_BrkFace,Mas Vnr Type_Missing,Mas Vnr Type_None,Mas Vnr Type_Rare_var,Mas Vnr Type_Stone,Exter Qual_Fa,Exter Qual_Gd,Exter Qual_TA,Exter Cond_Gd,Exter Cond_Rare_var,Exter Cond_TA,Foundation_CBlock,Foundation_PConc,Foundation_Rare_var,Foundation_Slab,Bsmt Qual_Fa,Bsmt Qual_Gd,Bsmt Qual_Missing,Bsmt Qual_Rare_var,Bsmt Qual_TA,Bsmt Cond_Gd,Bsmt Cond_Missing,Bsmt Cond_Rare_var,Bsmt Cond_TA,Bsmt Exposure_Gd,Bsmt Exposure_Missing,Bsmt Exposure_Mn,Bsmt Exposure_No,BsmtFin Type 1_BLQ,BsmtFin Type 1_GLQ,BsmtFin Type 1_LwQ,BsmtFin Type 1_Missing,BsmtFin Type 1_Rec,BsmtFin Type 1_Unf,BsmtFin Type 2_BLQ,BsmtFin Type 2_GLQ,BsmtFin Type 2_LwQ,BsmtFin Type 2_Missing,BsmtFin Type 2_Rec,BsmtFin Type 2_Unf,Heating_Rare_var,Heating QC_Fa,Heating QC_Gd,Heating QC_Rare_var,Heating QC_TA,Central Air_Y,Electrical_FuseF,Electrical_Rare_var,Electrical_SBrkr,Kitchen Qual_Fa,Kitchen Qual_Gd,Kitchen Qual_Rare_var,Kitchen Qual_TA,Functional_Min2,Functional_Mod,Functional_Rare_var,Functional_Typ,Fireplace Qu_Fa,Fireplace Qu_Gd,Fireplace Qu_Missing,Fireplace Qu_Po,Fireplace Qu_TA,Garage Type_Basment,Garage Type_BuiltIn,Garage Type_Detchd,Garage Type_Missing,Garage Type_Rare_var,Garage Finish_Missing,Garage Finish_RFn,Garage Finish_Unf,Garage Qual_Missing,Garage Qual_Rare_var,Garage Qual_TA,Garage Cond_Missing,Garage Cond_Rare_var,Garage Cond_TA,Paved Drive_P,Paved Drive_Y,Pool QC_Rare_var,Fence_GdWo,Fence_Missing,Fence_MnPrv,Fence_Rare_var,Misc Feature_Rare_var,Misc Feature_Shed,Sale Type_ConLD,Sale Type_New,Sale Type_Rare_var,Sale Type_WD
0,2658,0.985232,3.120753,0.14013,0.058783,-0.385864,-0.036625,2.157251,1.53161,1.348135,-0.567521,-1.007121,-0.301889,1.078891,0.093629,-0.574551,1.551978,-0.083828,0.933602,-0.830056,-0.24312,0.823523,-0.788376,1.349887,4.253806,1.595239,-0.924179,1.880148,-0.990177,0.032794,-0.771435,0.189742,1.201172,-0.104133,-0.279956,-0.063043,-0.088203,-0.834055,-1.375733,-0.472061,-0.033768,0.0,0.0,0.0,0.0,0.0,0.0,-0.232425,0.0,0.0,-1.070181,0.0,0.0,0.0,0.0,0.0,-0.122592,-1.817669,2.239129,-0.089648,-0.075679,-3.760044,-0.164014,-0.156538,-0.089648,0.746149,-0.203761,-0.140515,0.333755,-0.033768,-0.24312,-0.171197,0.616849,-0.075679,-0.20975,-0.08295,-0.258512,0.403626,-0.112638,0.0,-0.127294,-0.156538,-0.058554,7.339959,-0.20071,-0.194487,-0.298881,-0.961997,-0.107335,1.484715,-0.101768,-0.197619,-0.20071,-0.107335,-0.459385,-0.122592,-0.140515,-0.16764,-0.203761,-0.439243,-0.395961,-0.292045,-0.075679,-0.136241,-0.724089,-0.442928,-0.112638,-0.122592,-0.203761,-0.41877,-0.403626,-0.335859,-0.122592,-0.140515,-0.709524,-0.426258,-0.144673,-0.630943,0.0,0.802617,-0.047782,-0.316624,-0.101768,-0.705899,0.768492,-0.325259,-0.08295,-2.670137,-0.875557,-0.879623,9.826269,-0.131838,5.509732,-0.823879,-0.171197,-0.033768,-0.906409,-0.197619,-0.171197,0.0,0.35242,-0.316624,-0.171197,-0.307836,0.740608,-0.292045,-0.618609,-0.250906,-0.171197,-0.368557,1.593839,-0.152676,-0.112638,-0.184818,-0.171197,-0.17469,0.415006,-0.117715,-0.171197,2.142978,0.0,-0.661051,-3.868286,-0.131838,20.92845,-3.536622,6.097041,-0.821932,-0.033768,-1.018392,-0.181497,0.0,-0.131838,0.285098,-0.136241,-0.578227,1.039504,-0.131838,-0.530803,-0.101768,-0.253461,1.602843,-0.229691,-0.095893,-0.232425,-0.601033,1.139486,-0.232425,9.826269,-2.854091,-0.232425,9.316652,-3.115657,-0.164014,0.333755,-0.067651,-0.194487,0.493585,-0.364559,-0.047782,-0.047782,-0.215601,-0.101768,-0.31225,-0.117715,0.403626
1,2718,1.000099,0.751722,0.096502,0.162851,-1.370713,-0.767467,-1.38803,-0.00162,0.495609,-0.567521,-1.007121,-0.301889,2.281042,1.692852,1.865758,-0.805101,-0.083828,0.994457,-0.830056,-0.24312,0.823523,-0.788376,3.758832,4.253806,2.221914,-0.924179,0.176106,0.343221,0.530945,0.632217,-0.690771,-0.328721,-0.104133,-0.279956,-0.063043,-0.088203,0.680527,-1.375733,2.118372,-0.033768,0.0,0.0,0.0,0.0,0.0,0.0,-0.232425,0.0,0.0,-1.070181,0.0,0.0,0.0,0.0,0.0,-0.122592,0.550155,-0.446602,-0.089648,-0.075679,0.265954,-0.164014,-0.156538,-0.089648,-1.340216,-0.203761,-0.140515,0.333755,-0.033768,-0.24312,-0.171197,0.616849,-0.075679,-0.20975,-0.08295,-0.258512,0.403626,-0.112638,0.0,-0.127294,-0.156538,-0.058554,-0.136241,4.982322,-0.194487,-0.298881,1.039504,-0.107335,-0.67353,-0.101768,-0.197619,-0.20071,-0.107335,-0.459385,-0.122592,-0.140515,-0.16764,-0.203761,-0.439243,-0.395961,3.424126,-0.075679,-0.136241,-0.724089,-0.442928,-0.112638,-0.122592,-0.203761,-0.41877,-0.403626,2.977443,-0.122592,-0.140515,-0.709524,-0.426258,-0.144673,-0.630943,0.0,0.802617,-0.047782,-0.316624,-0.101768,-0.705899,0.768492,-0.325259,-0.08295,0.374513,1.14213,-0.879623,-0.101768,-0.131838,-0.181497,1.213771,-0.171197,-0.033768,-0.906409,-0.197619,-0.171197,0.0,0.35242,-0.316624,-0.171197,-0.307836,0.740608,-0.292045,-0.618609,-0.250906,-0.171197,-0.368557,1.593839,-0.152676,-0.112638,-0.184818,-0.171197,-0.17469,0.415006,-0.117715,-0.171197,-0.46664,0.0,1.512742,0.258512,-0.131838,-0.047782,0.282756,-0.164014,-0.821932,-0.033768,0.98194,-0.181497,0.0,-0.131838,0.285098,-0.136241,-0.578227,1.039504,-0.131838,-0.530803,-0.101768,-0.253461,-0.623891,-0.229691,-0.095893,-0.232425,-0.601033,-0.877588,-0.232425,-0.101768,0.350374,-0.232425,-0.107335,0.32096,-0.164014,0.333755,-0.067651,-0.194487,0.493585,-0.364559,-0.047782,-0.047782,-0.215601,-0.101768,-0.31225,-0.117715,0.403626


In [91]:
feature_select_model = SelectFromModel(Lasso(alpha = 0.0065, random_state=13)) 
feature_select_model.fit(new_X_train, y_train)

len(feature_select_model.get_support())  #MinMax alpha=0.0008      Standard alpha = 0.006 or 0.007

206

In [92]:
selected_features = new_X_train.columns[feature_select_model.get_support()]
print(len(selected_features))
selected_features

46


Index(['Lot Frontage', 'Lot Area', 'Neighborhood', 'Overall Qual',
       'Overall Cond', 'Year Built', 'Year Remod/Add', 'BsmtFin SF 1',
       'Total Bsmt SF', '1st Flr SF', 'Gr Liv Area', 'Bsmt Full Bath',
       'Kitchen AbvGr', 'Fireplaces', 'Garage Yr Blt', 'Garage Cars',
       'Wood Deck SF', 'Screen Porch', 'Misc Val', 'basement_livable',
       'KitchenQual_Num', 'neigh_score', 'MS Zoning_RL', 'MS Zoning_RM',
       'MS Zoning_Rare_var', 'Street_Rare_var', 'Land Contour_HLS',
       'Utilities_Rare_var', 'Condition 1_Norm', 'Condition 1_PosN',
       'House Style_Rare_var', 'Roof Style_Rare_var', 'Exterior 1st_BrkFace',
       'Mas Vnr Type_Rare_var', 'Exter Cond_TA', 'Foundation_PConc',
       'Bsmt Qual_Fa', 'Bsmt Exposure_Gd', 'BsmtFin Type 1_Unf',
       'Heating_Rare_var', 'Heating QC_Rare_var', 'Heating QC_TA',
       'Central Air_Y', 'Functional_Rare_var', 'Garage Cond_TA',
       'Paved Drive_Y'],
      dtype='object')

In [93]:
new_X_train = new_X_train[selected_features]
new_X_test = new_X_test[selected_features]

In [None]:
# The keys MUST match the names of the arguments!
knn_params = {
    'n_neighbors': range(1, 51, 10),
    'metric': ['euclidean', 'manhattan']
}

In [None]:
# Instantiate our GridSearchCV object.
knn_gridsearch = GridSearchCV(KNeighborsClassifier(), # What is the model we want to fit?
                              knn_params, # What is the dictionary of hyperparameters?
                              cv=5, # What number of folds in CV will we use?
                              verbose=1)

In [None]:
# Fit the GridSearchCV object to the data
knn_gridsearch.fit(X_train_sc, y_train);

In [None]:
#This returns the actual best parameters it found
knn.gridsearch.best_params_

In [94]:
lr = LinearRegression()
lr.fit(new_X_train, y_train)

print('train score: ',lr.score(new_X_train,y_train))

print('test score: ',lr.score(new_X_test,y_test))

print('cross val score: ', cross_val_score(lr,new_X_train,y_train).mean())

train score:  0.9223532128936078
test score:  0.8684930112043439
cross val score:  0.8954339221406087


In [95]:
#train score:  0.9176032388861152     #minmaxscale at 0.0008
#test score:  0.8873418271757585
#cross val score:  0.9007027112310265

In [96]:
preds = lr.predict(new_final_df[selected_features])
preds.shape

(878,)

In [97]:
new_final_df['SalePrice'] = np.exp(preds)
submission = new_final_df[['Id', 'SalePrice']]
submission.set_index('Id', inplace=True)
submission.head()

Unnamed: 0_level_0,SalePrice
Id,Unnamed: 1_level_1
2658,130313.82732
2718,156496.133592
2414,221808.241689
1989,106267.133775
625,183235.420373


In [31]:
submission.to_csv('../data/submissions/22_submission.csv')

In [None]:
#MinMax Predictions 

2658	100281.250139   #MORE Features included -> HIGHER Predictions
2718	132639.254512
2414	155850.895756
1989	76974.361361
625	148724.967061