In [490]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.linear_model import LinearRegression, Lasso, LassoCV
from sklearn.feature_selection import SelectFromModel
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.preprocessing import StandardScaler, PolynomialFeatures, MinMaxScaler

pd.pandas.set_option('display.max_columns', None)

In [491]:
df = pd.read_csv('../data/train_dummyfixed.csv')
final_df = pd.read_csv('../data/test_dummyfixed.csv')

In [492]:
print(df.shape)
print(final_df.shape)

(2051, 208)
(876, 207)


In [493]:
df.isnull().sum().sum()

28

In [494]:
df.fillna(0, inplace = True)

In [495]:
feature_scale=[feature for feature in df.columns if feature not in ['Id', 'SalePrice']]

scaler=StandardScaler()
scaler.fit(df[feature_scale])
scaler.transform(df[feature_scale])

array([[-0.95542911,  0.06743263,  0.1129455 , ..., -0.28890379,
        -0.15314403,  0.38769636],
       [-0.96588874,  0.06743263, -1.31183171, ..., -0.28890379,
        -0.15314403,  0.38769636],
       [-0.94508138, -0.86469425,  0.1129455 , ..., -0.28890379,
        -0.15314403,  0.38769636],
       ...,
       [-0.9882429 ,  0.06743263, -0.2761591 , ..., -0.28890379,
        -0.15314403,  0.38769636],
       [ 1.00039249,  3.09684498,  0.15832995, ..., -0.28890379,
        -0.15314403,  0.38769636],
       [ 1.01527241,  0.76652779,  0.1129455 , ..., -0.28890379,
        -0.15314403,  0.38769636]])

In [496]:
#transform the train and test set, and add on the Id and SalePrice variables
new_df = pd.concat([df[['Id','SalePrice']].reset_index(drop=True),
                    pd.DataFrame(scaler.transform(df[feature_scale]), columns=feature_scale)],
                    axis=1)
new_df.head(2)

Unnamed: 0,Id,SalePrice,PID,MS SubClass,Lot Frontage,Lot Area,Neighborhood,Overall Qual,Overall Cond,Year Built,Year Remod/Add,Mas Vnr Area,BsmtFin SF 1,BsmtFin SF 2,Bsmt Unf SF,Total Bsmt SF,1st Flr SF,2nd Flr SF,Low Qual Fin SF,Gr Liv Area,Bsmt Full Bath,Bsmt Half Bath,Full Bath,Half Bath,Bedroom AbvGr,Kitchen AbvGr,TotRms AbvGrd,Fireplaces,Garage Yr Blt,Garage Cars,Garage Area,Wood Deck SF,Open Porch SF,Enclosed Porch,3Ssn Porch,Screen Porch,Pool Area,Misc Val,Mo Sold,Yr Sold,Lot Frontage_nans,Mas Vnr Area_nans,BsmtFin SF 1_nans,BsmtFin SF 2_nans,Bsmt Unf SF_nans,Total Bsmt SF_nans,Bsmt Full Bath_nans,Bsmt Half Bath_nans,Garage Yr Blt_nans,Garage Cars_nans,Garage Area_nans,basement_livable,Total_sf,Functional_Num,ExterCond_Num,ExterQual_Num,KitchenQual_Num,neigh_score,MS Zoning_RH,MS Zoning_RL,MS Zoning_RM,MS Zoning_Rare_var,Street_Rare_var,Alley_Missing,Alley_Pave,Lot Shape_IR2,Lot Shape_Rare_var,Lot Shape_Reg,Land Contour_HLS,Land Contour_Low,Land Contour_Lvl,Utilities_Rare_var,Lot Config_CulDSac,Lot Config_FR2,Lot Config_Inside,Lot Config_Rare_var,Land Slope_Mod,Land Slope_Rare_var,Condition 1_Feedr,Condition 1_Norm,Condition 1_PosN,Condition 1_RRAe,Condition 1_RRAn,Condition 1_Rare_var,Condition 2_Rare_var,Bldg Type_2fmCon,Bldg Type_Duplex,Bldg Type_Twnhs,Bldg Type_TwnhsE,House Style_1Story,House Style_2.5Unf,House Style_2Story,House Style_Rare_var,House Style_SFoyer,House Style_SLvl,Roof Style_Gambrel,Roof Style_Hip,Roof Style_Rare_var,Roof Matl_Rare_var,Exterior 1st_BrkFace,Exterior 1st_CemntBd,Exterior 1st_HdBoard,Exterior 1st_MetalSd,Exterior 1st_Plywood,Exterior 1st_Rare_var,Exterior 1st_Stucco,Exterior 1st_VinylSd,Exterior 1st_Wd Sdng,Exterior 1st_WdShing,Exterior 2nd_BrkFace,Exterior 2nd_CmentBd,Exterior 2nd_HdBoard,Exterior 2nd_MetalSd,Exterior 2nd_Plywood,Exterior 2nd_Rare_var,Exterior 2nd_Stucco,Exterior 2nd_VinylSd,Exterior 2nd_Wd Sdng,Exterior 2nd_Wd Shng,Mas Vnr Type_BrkFace,Mas Vnr Type_Missing,Mas Vnr Type_None,Mas Vnr Type_Rare_var,Mas Vnr Type_Stone,Exter Qual_Fa,Exter Qual_Gd,Exter Qual_TA,Exter Cond_Gd,Exter Cond_Rare_var,Exter Cond_TA,Foundation_CBlock,Foundation_PConc,Foundation_Rare_var,Foundation_Slab,Bsmt Qual_Fa,Bsmt Qual_Gd,Bsmt Qual_Missing,Bsmt Qual_Rare_var,Bsmt Qual_TA,Bsmt Cond_Gd,Bsmt Cond_Missing,Bsmt Cond_Rare_var,Bsmt Cond_TA,Bsmt Exposure_Gd,Bsmt Exposure_Missing,Bsmt Exposure_Mn,Bsmt Exposure_No,BsmtFin Type 1_BLQ,BsmtFin Type 1_GLQ,BsmtFin Type 1_LwQ,BsmtFin Type 1_Missing,BsmtFin Type 1_Rec,BsmtFin Type 1_Unf,BsmtFin Type 2_BLQ,BsmtFin Type 2_GLQ,BsmtFin Type 2_LwQ,BsmtFin Type 2_Missing,BsmtFin Type 2_Rec,BsmtFin Type 2_Unf,Heating_Rare_var,Heating QC_Fa,Heating QC_Gd,Heating QC_Rare_var,Heating QC_TA,Central Air_Y,Electrical_FuseF,Electrical_Rare_var,Electrical_SBrkr,Kitchen Qual_Fa,Kitchen Qual_Gd,Kitchen Qual_Rare_var,Kitchen Qual_TA,Functional_Min2,Functional_Mod,Functional_Rare_var,Functional_Typ,Fireplace Qu_Fa,Fireplace Qu_Gd,Fireplace Qu_Missing,Fireplace Qu_Po,Fireplace Qu_TA,Garage Type_Basment,Garage Type_BuiltIn,Garage Type_Detchd,Garage Type_Missing,Garage Type_Rare_var,Garage Finish_Missing,Garage Finish_RFn,Garage Finish_Unf,Garage Qual_Missing,Garage Qual_Rare_var,Garage Qual_TA,Garage Cond_Missing,Garage Cond_Rare_var,Garage Cond_TA,Paved Drive_P,Paved Drive_Y,Pool QC_Rare_var,Fence_GdWo,Fence_Missing,Fence_MnPrv,Fence_Rare_var,Misc Feature_Rare_var,Misc Feature_Shed,Sale Type_ConLD,Sale Type_New,Sale Type_Rare_var,Sale Type_WD
0,109,11.779129,-0.955429,0.067433,0.112946,0.844603,-1.318334,-0.075831,2.203434,0.309033,-0.504945,1.113599,0.217098,-0.290653,-0.350154,-0.243363,-1.306558,0.99769,-0.107977,0.122893,-0.816476,-0.250902,0.769779,1.258191,0.185734,-0.207131,-0.279331,-0.925825,0.467645,0.293602,0.187304,-0.730089,-0.048736,-0.377984,-0.102738,-0.287856,-0.059658,-0.099699,-1.174852,1.69549,2.279556,-0.104129,-0.022086,-0.022086,-0.022086,-0.022086,-0.031242,-0.031242,-0.242598,-0.022086,-0.022086,0.102708,0.180712,0.211884,-0.178612,1.005822,0.724673,1.196298,0.0,0.533182,-0.427568,-0.133664,-0.058521,0.271702,-0.165997,-0.165997,-0.062576,-1.310174,-0.20793,-0.146336,0.334145,-0.031242,3.828379,-0.173596,-1.658175,-0.066389,-0.211729,-0.069997,-0.235763,-2.49947,-0.115499,9.831921,-0.133664,-0.10171,-0.113312,-0.153144,-0.196166,-0.186583,-0.291865,-1.033218,0.0,1.558771,-0.125895,-0.158074,-0.219164,0.0,-0.488392,-0.131762,-0.111084,-0.17947,-0.212983,2.415919,-0.438682,-0.283921,-0.066389,-0.113312,-0.738642,-0.394326,-0.149776,-0.129833,-0.212983,-0.3935,-0.433138,3.166525,-0.139225,-0.119759,-0.736278,-0.382689,-0.178017,1.501851,-0.104129,-1.211656,-0.079867,-0.296755,-0.113312,1.393776,-1.247941,-0.342202,-0.066389,0.392673,1.17211,-0.90547,-0.062576,-0.129833,-0.175081,-0.854015,-0.165997,-0.022086,1.145552,-0.212983,-0.165997,-0.062576,0.343978,-0.329619,-0.170593,-0.293828,0.727637,-0.328709,1.531614,-0.228767,-0.165997,-0.312995,-0.646835,-0.154803,-0.106495,-0.173596,-0.167542,-0.201466,0.415536,-0.127878,-0.183767,-0.429958,-0.038273,-0.641531,0.272735,-0.131762,-0.066389,0.313933,-0.154803,1.242846,0.0,-1.022187,-0.144589,-0.119759,-0.106495,0.266493,-0.1721,-0.583543,1.023184,-0.123881,-0.497561,-0.115499,-0.262271,-0.595558,-0.24147,-0.121837,-0.242598,1.594464,-0.841275,-0.242598,-0.108812,0.346631,-0.242598,-0.106495,0.313933,-0.139225,0.319524,-0.062576,-0.201466,0.492217,-0.352777,-0.069997,-0.062576,-0.167542,0.0,-0.288904,-0.153144,0.387696
1,544,12.301383,-0.965889,0.067433,-1.311832,0.518734,-1.318334,0.627922,-0.509197,-0.34409,-0.042146,0.199947,0.453661,-0.290653,-0.163826,-0.03402,-0.588743,2.066719,-0.107977,1.253907,1.101729,-0.250902,0.769779,1.258191,1.391245,-0.207131,1.013916,0.647387,-0.262319,0.293602,0.338745,-0.730089,0.407063,-0.377984,-0.102738,-0.287856,-0.059658,-0.099699,-0.810034,0.933726,-0.438682,-0.104129,-0.022086,-0.022086,-0.022086,-0.022086,-0.031242,-0.031242,-0.242598,-0.022086,-0.022086,0.327901,1.400852,0.211884,-0.178612,1.005822,0.724673,0.447437,0.0,0.533182,-0.427568,-0.133664,-0.058521,0.271702,-0.165997,-0.165997,-0.062576,-1.310174,-0.20793,-0.146336,0.334145,-0.031242,3.828379,-0.173596,-1.658175,-0.066389,-0.211729,-0.069997,-0.235763,0.400085,-0.115499,-0.10171,-0.133664,-0.10171,-0.113312,-0.153144,-0.196166,-0.186583,-0.291865,-1.033218,0.0,1.558771,-0.125895,-0.158074,-0.219164,0.0,-0.488392,-0.131762,-0.111084,-0.17947,-0.212983,-0.413921,-0.438682,-0.283921,-0.066389,-0.113312,1.353836,-0.394326,-0.149776,-0.129833,-0.212983,-0.3935,-0.433138,-0.315804,-0.139225,-0.119759,1.358183,-0.382689,-0.178017,1.501851,-0.104129,-1.211656,-0.079867,-0.296755,-0.113312,1.393776,-1.247941,-0.342202,-0.066389,0.392673,-0.853162,1.104399,-0.062576,-0.129833,-0.175081,1.170939,-0.165997,-0.022086,-0.872942,-0.212983,-0.165997,-0.062576,0.343978,-0.329619,-0.170593,-0.293828,0.727637,-0.328709,1.531614,-0.228767,-0.165997,-0.312995,-0.646835,-0.154803,-0.106495,-0.173596,-0.167542,-0.201466,0.415536,-0.127878,-0.183767,-0.429958,-0.038273,-0.641531,0.272735,-0.131762,-0.066389,0.313933,-0.154803,1.242846,0.0,-1.022187,-0.144589,-0.119759,-0.106495,0.266493,-0.1721,-0.583543,-0.977341,-0.123881,2.009804,-0.115499,-0.262271,-0.595558,-0.24147,-0.121837,-0.242598,1.594464,-0.841275,-0.242598,-0.108812,0.346631,-0.242598,-0.106495,0.313933,-0.139225,0.319524,-0.062576,-0.201466,0.492217,-0.352777,-0.069997,-0.062576,-0.167542,0.0,-0.288904,-0.153144,0.387696


In [497]:
final_df.isnull().sum().sum()

0

In [498]:
final_df.fillna(0, inplace = True)

In [499]:
final_feature_scale = [feature for feature in final_df.columns if feature not in ['Id']]

scaler=StandardScaler()
scaler.fit(final_df[final_feature_scale])
scaler.transform(final_df[final_feature_scale])

array([[-0.9938316 ,  0.04565082, -0.37816048, ...,  3.19855737,
        -0.11785113, -2.47425771],
       [ 0.98698686, -0.66845837, -0.27696092, ..., -0.31264095,
        -0.11785113,  0.40416162],
       [-0.95735503, -0.90649476,  0.09666373, ..., -0.31264095,
        -0.11785113,  0.40416162],
       ...,
       [ 0.99701248, -0.90649476, -0.53669869, ..., -0.31264095,
        -0.11785113,  0.40416162],
       [-0.99803164, -0.90649476, -0.27696092, ..., -0.31264095,
        -0.11785113,  0.40416162],
       [-0.9561789 , -0.90649476,  0.18319446, ..., -0.31264095,
        -0.11785113,  0.40416162]])

In [500]:
#transform the train and test set, and add on the Id and SalePrice variables
new_final_df = pd.concat([final_df[['Id']].reset_index(drop=True),
                    pd.DataFrame(scaler.transform(final_df[final_feature_scale]), columns=final_feature_scale)],
                    axis=1)
new_final_df.head(2)

Unnamed: 0,Id,PID,MS SubClass,Lot Frontage,Lot Area,Neighborhood,Overall Qual,Overall Cond,Year Built,Year Remod/Add,Mas Vnr Area,BsmtFin SF 1,BsmtFin SF 2,Bsmt Unf SF,Total Bsmt SF,1st Flr SF,2nd Flr SF,Low Qual Fin SF,Gr Liv Area,Bsmt Full Bath,Bsmt Half Bath,Full Bath,Half Bath,Bedroom AbvGr,Kitchen AbvGr,TotRms AbvGrd,Fireplaces,Garage Yr Blt,Garage Cars,Garage Area,Wood Deck SF,Open Porch SF,Enclosed Porch,3Ssn Porch,Screen Porch,Pool Area,Misc Val,Mo Sold,Yr Sold,Lot Frontage_nans,Mas Vnr Area_nans,BsmtFin SF 1_nans,BsmtFin SF 2_nans,Bsmt Unf SF_nans,Total Bsmt SF_nans,Bsmt Full Bath_nans,Bsmt Half Bath_nans,Garage Yr Blt_nans,Garage Cars_nans,Garage Area_nans,basement_livable,Total_sf,Functional_Num,ExterCond_Num,ExterQual_Num,KitchenQual_Num,neigh_score,MS Zoning_RH,MS Zoning_RL,MS Zoning_RM,MS Zoning_Rare_var,Street_Rare_var,Alley_Missing,Alley_Pave,Lot Shape_IR2,Lot Shape_Rare_var,Lot Shape_Reg,Land Contour_HLS,Land Contour_Low,Land Contour_Lvl,Utilities_Rare_var,Lot Config_CulDSac,Lot Config_FR2,Lot Config_Inside,Lot Config_Rare_var,Land Slope_Mod,Land Slope_Rare_var,Condition 1_Feedr,Condition 1_Norm,Condition 1_PosN,Condition 1_RRAe,Condition 1_RRAn,Condition 1_Rare_var,Condition 2_Rare_var,Bldg Type_2fmCon,Bldg Type_Duplex,Bldg Type_Twnhs,Bldg Type_TwnhsE,House Style_1Story,House Style_2.5Unf,House Style_2Story,House Style_Rare_var,House Style_SFoyer,House Style_SLvl,Roof Style_Gambrel,Roof Style_Hip,Roof Style_Rare_var,Roof Matl_Rare_var,Exterior 1st_BrkFace,Exterior 1st_CemntBd,Exterior 1st_HdBoard,Exterior 1st_MetalSd,Exterior 1st_Plywood,Exterior 1st_Rare_var,Exterior 1st_Stucco,Exterior 1st_VinylSd,Exterior 1st_Wd Sdng,Exterior 1st_WdShing,Exterior 2nd_BrkFace,Exterior 2nd_CmentBd,Exterior 2nd_HdBoard,Exterior 2nd_MetalSd,Exterior 2nd_Plywood,Exterior 2nd_Rare_var,Exterior 2nd_Stucco,Exterior 2nd_VinylSd,Exterior 2nd_Wd Sdng,Exterior 2nd_Wd Shng,Mas Vnr Type_BrkFace,Mas Vnr Type_Missing,Mas Vnr Type_None,Mas Vnr Type_Rare_var,Mas Vnr Type_Stone,Exter Qual_Fa,Exter Qual_Gd,Exter Qual_TA,Exter Cond_Gd,Exter Cond_Rare_var,Exter Cond_TA,Foundation_CBlock,Foundation_PConc,Foundation_Rare_var,Foundation_Slab,Bsmt Qual_Fa,Bsmt Qual_Gd,Bsmt Qual_Missing,Bsmt Qual_Rare_var,Bsmt Qual_TA,Bsmt Cond_Gd,Bsmt Cond_Missing,Bsmt Cond_Rare_var,Bsmt Cond_TA,Bsmt Exposure_Gd,Bsmt Exposure_Missing,Bsmt Exposure_Mn,Bsmt Exposure_No,BsmtFin Type 1_BLQ,BsmtFin Type 1_GLQ,BsmtFin Type 1_LwQ,BsmtFin Type 1_Missing,BsmtFin Type 1_Rec,BsmtFin Type 1_Unf,BsmtFin Type 2_BLQ,BsmtFin Type 2_GLQ,BsmtFin Type 2_LwQ,BsmtFin Type 2_Missing,BsmtFin Type 2_Rec,BsmtFin Type 2_Unf,Heating_Rare_var,Heating QC_Fa,Heating QC_Gd,Heating QC_Rare_var,Heating QC_TA,Central Air_Y,Electrical_FuseF,Electrical_Rare_var,Electrical_SBrkr,Kitchen Qual_Fa,Kitchen Qual_Gd,Kitchen Qual_Rare_var,Kitchen Qual_TA,Functional_Min2,Functional_Mod,Functional_Rare_var,Functional_Typ,Fireplace Qu_Fa,Fireplace Qu_Gd,Fireplace Qu_Missing,Fireplace Qu_Po,Fireplace Qu_TA,Garage Type_Basment,Garage Type_BuiltIn,Garage Type_Detchd,Garage Type_Missing,Garage Type_Rare_var,Garage Finish_Missing,Garage Finish_RFn,Garage Finish_Unf,Garage Qual_Missing,Garage Qual_Rare_var,Garage Qual_TA,Garage Cond_Missing,Garage Cond_Rare_var,Garage Cond_TA,Paved Drive_P,Paved Drive_Y,Pool QC_Rare_var,Fence_GdWo,Fence_Missing,Fence_MnPrv,Fence_Rare_var,Misc Feature_Rare_var,Misc Feature_Shed,Sale Type_ConLD,Sale Type_New,Sale Type_Rare_var,Sale Type_WD
0,2414,-0.993832,0.045651,-0.37816,1.236056,-0.387876,0.692742,-0.502146,-1.871326,-1.690464,-0.568377,0.254826,-0.302264,-1.04437,-0.724572,-1.56267,1.119064,-0.083924,0.1649,1.055731,-0.243414,0.825102,1.187011,0.152468,-0.208108,0.347337,0.567963,-1.902786,0.342306,-0.020479,0.054053,-0.338851,-0.327641,-0.104254,-0.280301,-0.063115,-0.088304,1.058489,-1.380288,-0.470911,-0.033806,0.0,0.0,0.0,0.0,0.0,0.0,-0.232705,0.0,0.0,0.124592,0.193977,0.0,0.0,0.0,0.0,0.0,-0.122734,0.549213,-0.445374,-0.089751,-0.075766,0.263813,-0.164206,-0.156721,-0.089751,-1.341164,-0.204003,-0.140679,0.334178,-0.033806,-0.243414,-0.171398,0.617822,-0.075766,-0.21,-0.083045,-0.258827,0.404162,-0.112769,0.0,-0.127441,-0.156721,-0.058621,-0.131991,-0.197853,-0.194717,-0.299253,-0.961912,-0.107459,1.4862,-0.101885,-0.197853,-0.200948,-0.107459,-0.46002,-0.122734,-0.140679,-0.167836,-0.204003,-0.439841,-0.396484,-0.290101,-0.075766,-0.136399,1.378645,-0.443533,-0.112769,-0.122734,-0.204003,-0.419332,-0.404162,-0.334178,-0.122734,-0.140679,1.40698,-0.426833,-0.144841,-0.63195,0.0,0.804125,-0.047836,-0.317021,-0.101885,1.414214,-1.298891,-0.325669,-0.083045,0.373017,-0.875288,1.134551,-0.096003,-0.131991,-0.178331,1.214321,-0.171398,-0.033806,-0.908295,5.054251,-0.171398,0.0,-2.833889,-0.317021,-0.171398,-0.308221,-1.347858,-0.292407,1.613982,-0.251211,-0.171398,-0.369035,-0.624881,-0.152854,-0.112769,-0.185036,-0.171398,-0.174895,0.415561,-0.117851,-0.171398,-0.465475,0.0,-0.660353,0.256307,-0.131991,-0.033806,0.280745,-0.160503,1.214321,-0.033806,-1.018435,-0.181711,0.0,-0.131991,0.28545,-0.136399,1.726794,-0.959715,-0.131991,-0.53158,-0.101885,-0.253769,-0.623115,-0.229967,-0.096003,-0.232705,1.661221,-0.877324,-0.232705,-0.096003,0.348768,-0.232705,-0.101885,0.319197,-0.164206,0.334178,-0.067729,-0.194717,0.494286,-0.36503,-0.047836,-0.047836,-0.215859,-0.101885,3.198557,-0.117851,-2.474258
1,1989,0.986987,-0.668458,-0.276961,-0.073432,-0.387876,-0.767768,0.386501,1.313748,-1.284126,-0.568377,-1.009439,-0.302264,1.005373,-0.010589,-0.371462,-0.804734,-0.083924,-1.157813,-0.831657,-0.243414,-0.96398,-0.789836,-1.061732,-0.208108,-0.909966,-0.926137,1.376828,0.342306,0.182804,-0.771151,-0.690758,2.185111,-0.104254,-0.280301,-0.063115,-0.088304,0.301561,-0.625994,-0.470911,-0.033806,0.0,0.0,0.0,0.0,0.0,0.0,-0.232705,0.0,0.0,-1.072805,-0.76993,0.0,0.0,0.0,0.0,0.0,-0.122734,-1.820789,2.245302,-0.089751,-0.075766,0.263813,-0.164206,-0.156721,-0.089751,0.745621,-0.204003,-0.140679,0.334178,-0.033806,-0.243414,-0.171398,0.617822,-0.075766,-0.21,-0.083045,-0.258827,0.404162,-0.112769,0.0,-0.127441,-0.156721,-0.058621,-0.131991,-0.197853,-0.194717,-0.299253,1.039596,-0.107459,-0.672857,-0.101885,-0.197853,-0.200948,-0.107459,-0.46002,-0.122734,-0.140679,-0.167836,-0.204003,-0.439841,-0.396484,-0.290101,-0.075766,-0.136399,-0.72535,2.254625,-0.112769,-0.122734,-0.204003,-0.419332,-0.404162,-0.334178,-0.122734,-0.140679,-0.710742,2.342838,-0.144841,-0.63195,0.0,0.804125,-0.047836,-0.317021,-0.101885,1.414214,-1.298891,-0.325669,-0.083045,0.373017,1.142481,-0.881406,-0.096003,-0.131991,-0.178331,-0.823505,-0.171398,-0.033806,1.100964,-0.197853,-0.171398,0.0,0.352872,-0.317021,-0.171398,-0.308221,0.741918,-0.292407,-0.619586,-0.251211,-0.171398,-0.369035,1.600305,-0.152854,-0.112769,-0.185036,-0.171398,-0.174895,0.415561,-0.117851,-0.171398,-0.465475,0.0,1.514342,0.256307,-0.131991,-0.033806,0.280745,-0.160503,-0.823505,-0.033806,0.981899,-0.181711,0.0,-0.131991,0.28545,-0.136399,-0.579108,1.041976,-0.131991,-0.53158,-0.101885,-0.253769,1.60484,-0.229967,-0.096003,-0.232705,-0.601967,1.13983,-0.232705,-0.096003,-2.867238,-0.232705,-0.101885,0.319197,-0.164206,-2.992415,-0.067729,-0.194717,0.494286,-0.36503,-0.047836,-0.047836,-0.215859,-0.101885,-0.312641,-0.117851,0.404162


In [501]:
X = new_df.drop(columns = ['SalePrice'])
y = new_df['SalePrice']

In [502]:
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state= 13)

In [503]:
feature_select_model = SelectFromModel(Lasso(alpha = 0.007, random_state=13)) 
feature_select_model.fit(X_train, y_train)

len(feature_select_model.get_support())  #MinMax alpha=0.0008      Standard alpha = 0.006 or 0.007

207

In [504]:
selected_features = X_train.columns[feature_select_model.get_support()]
print(len(selected_features))
selected_features

48


Index(['Lot Area', 'Neighborhood', 'Overall Qual', 'Year Built',
       'BsmtFin SF 1', 'Total Bsmt SF', '1st Flr SF', '2nd Flr SF',
       'Low Qual Fin SF', 'Bsmt Full Bath', 'Kitchen AbvGr', 'Fireplaces',
       'Garage Yr Blt', 'Garage Cars', 'Garage Area', 'Wood Deck SF',
       'Screen Porch', 'BsmtFin SF 1_nans', 'basement_livable', 'Total_sf',
       'KitchenQual_Num', 'neigh_score', 'MS Zoning_RM', 'MS Zoning_Rare_var',
       'Lot Shape_Reg', 'Land Contour_HLS', 'Condition 1_Norm',
       'Roof Style_Rare_var', 'Exterior 1st_BrkFace', 'Exter Qual_Fa',
       'Exter Cond_Gd', 'Foundation_Rare_var', 'Foundation_Slab',
       'Bsmt Qual_Fa', 'Bsmt Qual_Missing', 'Bsmt Cond_Missing',
       'Bsmt Exposure_Gd', 'BsmtFin Type 1_Missing', 'BsmtFin Type 2_Missing',
       'Heating_Rare_var', 'Heating QC_Rare_var', 'Heating QC_TA',
       'Central Air_Y', 'Electrical_Rare_var', 'Garage Qual_TA',
       'Garage Cond_TA', 'Paved Drive_Y', 'Sale Type_WD '],
      dtype='object')

In [508]:
X_train, X_test, y_train, y_test = train_test_split(X[selected_features],y, random_state= 13)

In [506]:
lr = LinearRegression()
lr.fit(X_train, y_train,)

print('train score: ',lr.score(X_train,y_train))

print('test score: ',lr.score(X_test,y_test))

print('cross val score: ', cross_val_score(lr,X_train,y_train))

train score:  0.9562369347319214
test score:  0.9126013603457314
cross val score:  [ 8.88888733e-01  9.03598875e-01  9.48010979e-01 -2.11196981e+23
  9.26073710e-01]


In [442]:
#train score:  0.9176032388861152     #minmaxscale at 0.0008
#test score:  0.8873418271757585
#cross val score:  0.9007027112310265

In [443]:
new_final_df.isnull().sum().sum()

0

In [444]:
new_final_df.head(2)

Unnamed: 0,Id,PID,MS SubClass,Lot Frontage,Lot Area,Neighborhood,Overall Qual,Overall Cond,Year Built,Year Remod/Add,Mas Vnr Area,BsmtFin SF 1,BsmtFin SF 2,Bsmt Unf SF,Total Bsmt SF,1st Flr SF,2nd Flr SF,Low Qual Fin SF,Gr Liv Area,Bsmt Full Bath,Bsmt Half Bath,Full Bath,Half Bath,Bedroom AbvGr,Kitchen AbvGr,TotRms AbvGrd,Fireplaces,Garage Yr Blt,Garage Cars,Garage Area,Wood Deck SF,Open Porch SF,Enclosed Porch,3Ssn Porch,Screen Porch,Pool Area,Misc Val,Mo Sold,Yr Sold,Lot Frontage_nans,Mas Vnr Area_nans,BsmtFin SF 1_nans,BsmtFin SF 2_nans,Bsmt Unf SF_nans,Total Bsmt SF_nans,Bsmt Full Bath_nans,Bsmt Half Bath_nans,Garage Yr Blt_nans,Garage Cars_nans,Garage Area_nans,basement_livable,Functional_Num,ExterCond_Num,ExterQual_Num,KitchenQual_Num,neigh_score,MS Zoning_RH,MS Zoning_RL,MS Zoning_RM,MS Zoning_Rare_var,Street_Rare_var,Alley_Missing,Alley_Pave,Lot Shape_IR2,Lot Shape_Rare_var,Lot Shape_Reg,Land Contour_HLS,Land Contour_Low,Land Contour_Lvl,Utilities_Rare_var,Lot Config_CulDSac,Lot Config_FR2,Lot Config_Inside,Lot Config_Rare_var,Land Slope_Mod,Land Slope_Rare_var,Condition 1_Feedr,Condition 1_Norm,Condition 1_PosN,Condition 1_RRAe,Condition 1_RRAn,Condition 1_Rare_var,Condition 2_Rare_var,Bldg Type_2fmCon,Bldg Type_Duplex,Bldg Type_Twnhs,Bldg Type_TwnhsE,House Style_1Story,House Style_2.5Unf,House Style_2Story,House Style_Rare_var,House Style_SFoyer,House Style_SLvl,Roof Style_Gambrel,Roof Style_Hip,Roof Style_Rare_var,Roof Matl_Rare_var,Exterior 1st_BrkFace,Exterior 1st_CemntBd,Exterior 1st_HdBoard,Exterior 1st_MetalSd,Exterior 1st_Plywood,Exterior 1st_Rare_var,Exterior 1st_Stucco,Exterior 1st_VinylSd,Exterior 1st_Wd Sdng,Exterior 1st_WdShing,Exterior 2nd_BrkFace,Exterior 2nd_CmentBd,Exterior 2nd_HdBoard,Exterior 2nd_MetalSd,Exterior 2nd_Plywood,Exterior 2nd_Rare_var,Exterior 2nd_Stucco,Exterior 2nd_VinylSd,Exterior 2nd_Wd Sdng,Exterior 2nd_Wd Shng,Mas Vnr Type_BrkFace,Mas Vnr Type_Missing,Mas Vnr Type_None,Mas Vnr Type_Rare_var,Mas Vnr Type_Stone,Exter Qual_Fa,Exter Qual_Gd,Exter Qual_TA,Exter Cond_Gd,Exter Cond_Rare_var,Exter Cond_TA,Foundation_CBlock,Foundation_PConc,Foundation_Rare_var,Foundation_Slab,Bsmt Qual_Fa,Bsmt Qual_Gd,Bsmt Qual_Missing,Bsmt Qual_Rare_var,Bsmt Qual_TA,Bsmt Cond_Gd,Bsmt Cond_Missing,Bsmt Cond_Rare_var,Bsmt Cond_TA,Bsmt Exposure_Gd,Bsmt Exposure_Missing,Bsmt Exposure_Mn,Bsmt Exposure_No,BsmtFin Type 1_BLQ,BsmtFin Type 1_GLQ,BsmtFin Type 1_LwQ,BsmtFin Type 1_Missing,BsmtFin Type 1_Rec,BsmtFin Type 1_Unf,BsmtFin Type 2_BLQ,BsmtFin Type 2_GLQ,BsmtFin Type 2_LwQ,BsmtFin Type 2_Missing,BsmtFin Type 2_Rec,BsmtFin Type 2_Unf,Heating_Rare_var,Heating QC_Fa,Heating QC_Gd,Heating QC_Rare_var,Heating QC_TA,Central Air_Y,Electrical_FuseF,Electrical_Rare_var,Electrical_SBrkr,Kitchen Qual_Fa,Kitchen Qual_Gd,Kitchen Qual_Rare_var,Kitchen Qual_TA,Functional_Min2,Functional_Mod,Functional_Rare_var,Functional_Typ,Fireplace Qu_Fa,Fireplace Qu_Gd,Fireplace Qu_Missing,Fireplace Qu_Po,Fireplace Qu_TA,Garage Type_Basment,Garage Type_BuiltIn,Garage Type_Detchd,Garage Type_Missing,Garage Type_Rare_var,Garage Finish_Missing,Garage Finish_RFn,Garage Finish_Unf,Garage Qual_Missing,Garage Qual_Rare_var,Garage Qual_TA,Garage Cond_Missing,Garage Cond_Rare_var,Garage Cond_TA,Paved Drive_P,Paved Drive_Y,Pool QC_Rare_var,Fence_GdWo,Fence_Missing,Fence_MnPrv,Fence_Rare_var,Misc Feature_Rare_var,Misc Feature_Shed,Sale Type_ConLD,Sale Type_New,Sale Type_Rare_var,Sale Type_WD
0,2658,0.985232,3.120753,0.14013,0.058783,-0.385864,-0.036625,2.157251,1.53161,1.348135,-0.567521,-1.007121,-0.301889,1.078891,0.093629,-0.574551,1.551978,-0.083828,0.933602,-0.830056,-0.24312,0.823523,-0.788376,1.349887,4.253806,1.595239,-0.924179,1.880148,-0.990177,0.032794,-0.771435,0.189742,1.201172,-0.104133,-0.279956,-0.063043,-0.088203,-0.834055,-1.375733,-0.472061,-0.033768,0.0,0.0,0.0,0.0,0.0,0.0,-0.232425,0.0,0.0,-1.070181,0.0,0.0,0.0,0.0,0.0,-0.122592,-1.817669,2.239129,-0.089648,-0.075679,-3.760044,-0.164014,-0.156538,-0.089648,0.746149,-0.203761,-0.140515,0.333755,-0.033768,-0.24312,-0.171197,0.616849,-0.075679,-0.20975,-0.08295,-0.258512,0.403626,-0.112638,0.0,-0.127294,-0.156538,-0.058554,7.339959,-0.20071,-0.194487,-0.298881,-0.961997,-0.107335,1.484715,-0.101768,-0.197619,-0.20071,-0.107335,-0.459385,-0.122592,-0.140515,-0.16764,-0.203761,-0.439243,-0.395961,-0.292045,-0.075679,-0.136241,-0.724089,-0.442928,-0.112638,-0.122592,-0.203761,-0.41877,-0.403626,-0.335859,-0.122592,-0.140515,-0.709524,-0.426258,-0.144673,-0.630943,0.0,0.802617,-0.047782,-0.316624,-0.101768,-0.705899,0.768492,-0.325259,-0.08295,-2.670137,-0.875557,-0.879623,9.826269,-0.131838,5.509732,-0.823879,-0.171197,-0.033768,-0.906409,-0.197619,-0.171197,0.0,0.35242,-0.316624,-0.171197,-0.307836,0.740608,-0.292045,-0.618609,-0.250906,-0.171197,-0.368557,1.593839,-0.152676,-0.112638,-0.184818,-0.171197,-0.17469,0.415006,-0.117715,-0.171197,2.142978,0.0,-0.661051,-3.868286,-0.131838,20.92845,-3.536622,6.097041,-0.821932,-0.033768,-1.018392,-0.181497,0.0,-0.131838,0.285098,-0.136241,-0.578227,1.039504,-0.131838,-0.530803,-0.101768,-0.253461,1.602843,-0.229691,-0.095893,-0.232425,-0.601033,1.139486,-0.232425,9.826269,-2.854091,-0.232425,9.316652,-3.115657,-0.164014,0.333755,-0.067651,-0.194487,0.493585,-0.364559,-0.047782,-0.047782,-0.215601,-0.101768,-0.31225,-0.117715,0.403626
1,2718,1.000099,0.751722,0.096502,0.162851,-1.370713,-0.767467,-1.38803,-0.00162,0.495609,-0.567521,-1.007121,-0.301889,2.281042,1.692852,1.865758,-0.805101,-0.083828,0.994457,-0.830056,-0.24312,0.823523,-0.788376,3.758832,4.253806,2.221914,-0.924179,0.176106,0.343221,0.530945,0.632217,-0.690771,-0.328721,-0.104133,-0.279956,-0.063043,-0.088203,0.680527,-1.375733,2.118372,-0.033768,0.0,0.0,0.0,0.0,0.0,0.0,-0.232425,0.0,0.0,-1.070181,0.0,0.0,0.0,0.0,0.0,-0.122592,0.550155,-0.446602,-0.089648,-0.075679,0.265954,-0.164014,-0.156538,-0.089648,-1.340216,-0.203761,-0.140515,0.333755,-0.033768,-0.24312,-0.171197,0.616849,-0.075679,-0.20975,-0.08295,-0.258512,0.403626,-0.112638,0.0,-0.127294,-0.156538,-0.058554,-0.136241,4.982322,-0.194487,-0.298881,1.039504,-0.107335,-0.67353,-0.101768,-0.197619,-0.20071,-0.107335,-0.459385,-0.122592,-0.140515,-0.16764,-0.203761,-0.439243,-0.395961,3.424126,-0.075679,-0.136241,-0.724089,-0.442928,-0.112638,-0.122592,-0.203761,-0.41877,-0.403626,2.977443,-0.122592,-0.140515,-0.709524,-0.426258,-0.144673,-0.630943,0.0,0.802617,-0.047782,-0.316624,-0.101768,-0.705899,0.768492,-0.325259,-0.08295,0.374513,1.14213,-0.879623,-0.101768,-0.131838,-0.181497,1.213771,-0.171197,-0.033768,-0.906409,-0.197619,-0.171197,0.0,0.35242,-0.316624,-0.171197,-0.307836,0.740608,-0.292045,-0.618609,-0.250906,-0.171197,-0.368557,1.593839,-0.152676,-0.112638,-0.184818,-0.171197,-0.17469,0.415006,-0.117715,-0.171197,-0.46664,0.0,1.512742,0.258512,-0.131838,-0.047782,0.282756,-0.164014,-0.821932,-0.033768,0.98194,-0.181497,0.0,-0.131838,0.285098,-0.136241,-0.578227,1.039504,-0.131838,-0.530803,-0.101768,-0.253461,-0.623891,-0.229691,-0.095893,-0.232425,-0.601033,-0.877588,-0.232425,-0.101768,0.350374,-0.232425,-0.107335,0.32096,-0.164014,0.333755,-0.067651,-0.194487,0.493585,-0.364559,-0.047782,-0.047782,-0.215601,-0.101768,-0.31225,-0.117715,0.403626


In [445]:
preds = lr.predict(new_final_df[selected_features])
preds.shape

(878,)

In [446]:
new_final_df['SalePrice'] = np.exp(preds)
submission = new_final_df[['Id', 'SalePrice']]
submission.set_index('Id', inplace=True)
submission.head()

Unnamed: 0_level_0,SalePrice
Id,Unnamed: 1_level_1
2658,130228.10522
2718,157526.580984
2414,221704.799474
1989,106327.614006
625,182775.026835


In [325]:
#submission.to_csv('../data/submissions/21_submission.csv')

In [None]:

2658	130228.105220
2718	157526.580984
2414	221704.799474
1989	106327.614006
625	182775.026835

In [None]:
#MinMax Predictions 

2658	100281.250139   #MORE Features included -> HIGHER Predictions
2718	132639.254512
2414	155850.895756
1989	76974.361361
625	148724.967061