# Machine Learning Model Building Pipeline: Feature Selection

In [None]:
# For production: Fewer variables mean smaller client input requirements
# (e.g. customers filling out a form on a website or mobile app),
# and hence less code for error handling. This reduces the chances of bugs.
# For model performance: Fewer variables mean simpler, more interpretable, less over-fitted models

In [1]:
# to handle datasets
import pandas as pd
import numpy as np

# for plotting
import matplotlib.pyplot as plt
%matplotlib inline

# to build the models
from sklearn.linear_model import Lasso
from sklearn.feature_selection import SelectFromModel

# to visualise all the columns in the dataframe
pd.pandas.set_option('display.max_columns', None)

In [2]:
# load the datasets
X_train = pd.read_csv('xtrain.csv')
X_test = pd.read_csv('xtest.csv')

X_train.head()

Unnamed: 0,Id,SalePrice,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,GarageYrBlt_na,LotFrontage_na,MasVnrArea_na
0,931,12.21106,0.0,0.75,0.461171,0.377048,1.0,1.0,0.333333,1.0,1.0,0.0,0.0,0.863636,0.4,1.0,0.75,0.6,0.777778,0.5,0.014706,0.04918,0.0,0.0,1.0,1.0,0.0,0.0,0.666667,1.0,1.0,0.75,0.75,0.75,1.0,0.002835,0.666667,0.0,0.673479,0.239935,1.0,1.0,1.0,1.0,0.55976,0.0,0.0,0.52325,0.0,0.0,0.666667,0.0,0.375,0.333333,0.666667,0.416667,1.0,0.0,0.2,0.8,0.018692,1.0,0.75,0.430183,0.666667,1.0,1.0,0.116686,0.032907,0.0,0.0,0.0,0.0,0.0,0.75,1.0,0.0,0.545455,0.75,0.666667,0.75,0.0,0.0,0.0
1,657,11.887931,0.0,0.75,0.456066,0.399443,1.0,1.0,0.333333,0.333333,1.0,0.0,0.0,0.363636,0.4,1.0,0.75,0.6,0.444444,0.75,0.360294,0.04918,0.0,0.0,0.6,0.6,0.666667,0.03375,0.666667,1.0,0.5,0.5,0.75,0.25,0.666667,0.142807,0.666667,0.0,0.114724,0.17234,1.0,1.0,1.0,1.0,0.434539,0.0,0.0,0.406196,0.333333,0.0,0.333333,0.5,0.375,0.333333,0.666667,0.25,1.0,0.0,0.2,0.8,0.457944,0.666667,0.25,0.220028,0.666667,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,1.0,0.0,0.636364,0.5,0.666667,0.75,0.0,0.0,0.0
2,46,12.675764,0.588235,0.75,0.394699,0.347082,1.0,1.0,0.0,0.333333,1.0,0.0,0.0,0.954545,0.4,1.0,1.0,0.6,0.888889,0.5,0.036765,0.098361,1.0,0.0,0.3,0.2,0.666667,0.2575,1.0,1.0,1.0,1.0,0.75,0.25,1.0,0.080794,0.666667,0.0,0.601951,0.286743,1.0,1.0,1.0,1.0,0.627205,0.0,0.0,0.586296,0.333333,0.0,0.666667,0.0,0.25,0.333333,1.0,0.333333,1.0,0.333333,0.8,0.8,0.046729,0.666667,0.5,0.406206,0.666667,1.0,1.0,0.228705,0.149909,0.0,0.0,0.0,0.0,0.0,0.75,1.0,0.0,0.090909,1.0,0.666667,0.75,0.0,0.0,0.0
3,1349,12.278393,0.0,0.75,0.388581,0.493677,1.0,1.0,0.666667,0.666667,1.0,0.0,0.0,0.454545,0.4,1.0,0.75,0.6,0.666667,0.5,0.066176,0.163934,0.0,0.0,1.0,1.0,0.0,0.0,0.666667,1.0,1.0,0.75,0.75,1.0,1.0,0.25567,0.666667,0.0,0.018114,0.242553,1.0,1.0,1.0,1.0,0.56692,0.0,0.0,0.529943,0.333333,0.0,0.666667,0.0,0.375,0.333333,0.666667,0.25,1.0,0.333333,0.4,0.8,0.084112,0.666667,0.5,0.362482,0.666667,1.0,1.0,0.469078,0.045704,0.0,0.0,0.0,0.0,0.0,0.75,1.0,0.0,0.636364,0.25,0.666667,0.75,0.0,0.0,0.0
4,56,12.103486,0.0,0.75,0.577658,0.402702,1.0,1.0,0.333333,0.333333,1.0,0.0,0.0,0.363636,0.4,1.0,0.75,0.6,0.555556,0.5,0.323529,0.737705,0.0,0.0,0.6,0.7,0.666667,0.17,0.333333,1.0,0.5,0.5,0.75,0.25,0.333333,0.086818,0.666667,0.0,0.434278,0.233224,1.0,0.75,1.0,1.0,0.549026,0.0,0.0,0.513216,0.0,0.0,0.666667,0.0,0.375,0.333333,0.333333,0.416667,1.0,0.333333,0.8,0.8,0.411215,0.666667,0.5,0.406206,0.666667,1.0,1.0,0.0,0.0,0.0,0.801181,0.0,0.0,0.0,0.75,1.0,0.0,0.545455,0.5,0.666667,0.75,0.0,0.0,0.0


In [3]:
# capture the target
y_train = X_train['SalePrice']
y_test = X_test['SalePrice']

# drop unnecessary variables from our training and testing sets
X_train.drop(['Id', 'SalePrice'], axis=1, inplace=True)
X_test.drop(['Id', 'SalePrice'], axis=1, inplace=True)

In [4]:
X_train

Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,GarageYrBlt_na,LotFrontage_na,MasVnrArea_na
0,0.000000,0.75,0.461171,0.377048,1.0,1.0,0.333333,1.000000,1.0,0.0,0.0,0.863636,0.4,1.0,0.75,0.6,0.777778,0.50,0.014706,0.049180,0.0,0.0,1.0,1.0,0.000000,0.000000,0.666667,1.0,1.0,0.75,0.75,0.75,1.000000,0.002835,0.666667,0.0,0.673479,0.239935,1.0,1.00,1.0,1.0,0.559760,0.000000,0.0,0.523250,0.000000,0.0,0.666667,0.0,0.375,0.333333,0.666667,0.416667,1.00,0.000000,0.2,0.8,0.018692,1.000000,0.75,0.430183,0.666667,1.0,1.0,0.116686,0.032907,0.0,0.000000,0.000,0.0,0.0,0.75,1.0,0.0,0.545455,0.75,0.666667,0.75,0.0,0.0,0.0
1,0.000000,0.75,0.456066,0.399443,1.0,1.0,0.333333,0.333333,1.0,0.0,0.0,0.363636,0.4,1.0,0.75,0.6,0.444444,0.75,0.360294,0.049180,0.0,0.0,0.6,0.6,0.666667,0.033750,0.666667,1.0,0.5,0.50,0.75,0.25,0.666667,0.142807,0.666667,0.0,0.114724,0.172340,1.0,1.00,1.0,1.0,0.434539,0.000000,0.0,0.406196,0.333333,0.0,0.333333,0.5,0.375,0.333333,0.666667,0.250000,1.00,0.000000,0.2,0.8,0.457944,0.666667,0.25,0.220028,0.666667,1.0,1.0,0.000000,0.000000,0.0,0.000000,0.000,0.0,0.0,0.50,1.0,0.0,0.636364,0.50,0.666667,0.75,0.0,0.0,0.0
2,0.588235,0.75,0.394699,0.347082,1.0,1.0,0.000000,0.333333,1.0,0.0,0.0,0.954545,0.4,1.0,1.00,0.6,0.888889,0.50,0.036765,0.098361,1.0,0.0,0.3,0.2,0.666667,0.257500,1.000000,1.0,1.0,1.00,0.75,0.25,1.000000,0.080794,0.666667,0.0,0.601951,0.286743,1.0,1.00,1.0,1.0,0.627205,0.000000,0.0,0.586296,0.333333,0.0,0.666667,0.0,0.250,0.333333,1.000000,0.333333,1.00,0.333333,0.8,0.8,0.046729,0.666667,0.50,0.406206,0.666667,1.0,1.0,0.228705,0.149909,0.0,0.000000,0.000,0.0,0.0,0.75,1.0,0.0,0.090909,1.00,0.666667,0.75,0.0,0.0,0.0
3,0.000000,0.75,0.388581,0.493677,1.0,1.0,0.666667,0.666667,1.0,0.0,0.0,0.454545,0.4,1.0,0.75,0.6,0.666667,0.50,0.066176,0.163934,0.0,0.0,1.0,1.0,0.000000,0.000000,0.666667,1.0,1.0,0.75,0.75,1.00,1.000000,0.255670,0.666667,0.0,0.018114,0.242553,1.0,1.00,1.0,1.0,0.566920,0.000000,0.0,0.529943,0.333333,0.0,0.666667,0.0,0.375,0.333333,0.666667,0.250000,1.00,0.333333,0.4,0.8,0.084112,0.666667,0.50,0.362482,0.666667,1.0,1.0,0.469078,0.045704,0.0,0.000000,0.000,0.0,0.0,0.75,1.0,0.0,0.636364,0.25,0.666667,0.75,0.0,0.0,0.0
4,0.000000,0.75,0.577658,0.402702,1.0,1.0,0.333333,0.333333,1.0,0.0,0.0,0.363636,0.4,1.0,0.75,0.6,0.555556,0.50,0.323529,0.737705,0.0,0.0,0.6,0.7,0.666667,0.170000,0.333333,1.0,0.5,0.50,0.75,0.25,0.333333,0.086818,0.666667,0.0,0.434278,0.233224,1.0,0.75,1.0,1.0,0.549026,0.000000,0.0,0.513216,0.000000,0.0,0.666667,0.0,0.375,0.333333,0.333333,0.416667,1.00,0.333333,0.8,0.8,0.411215,0.666667,0.50,0.406206,0.666667,1.0,1.0,0.000000,0.000000,0.0,0.801181,0.000,0.0,0.0,0.75,1.0,0.0,0.545455,0.50,0.666667,0.75,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1309,0.235294,0.75,0.504203,0.387820,1.0,1.0,0.000000,0.333333,1.0,0.0,0.0,1.000000,0.4,1.0,0.75,1.0,0.777778,0.50,0.073529,0.180328,0.0,0.0,1.0,1.0,0.666667,0.420625,0.666667,1.0,1.0,0.75,0.75,0.50,1.000000,0.206060,0.666667,0.0,0.041338,0.204910,1.0,1.00,1.0,1.0,0.504851,0.586004,0.0,0.692428,0.333333,0.0,0.666667,0.5,0.375,0.333333,0.666667,0.500000,1.00,0.333333,0.8,0.8,0.093458,0.666667,0.75,0.603667,0.666667,1.0,1.0,0.000000,0.234004,0.0,0.000000,0.375,0.0,0.0,0.75,1.0,0.0,0.545455,0.75,0.666667,0.75,0.0,0.0,0.0
1310,0.000000,0.75,0.388581,0.391317,1.0,1.0,0.000000,0.333333,1.0,0.0,0.0,0.272727,0.4,1.0,0.75,0.6,0.333333,0.75,0.441176,0.262295,0.0,0.0,1.0,0.6,0.000000,0.000000,0.333333,1.0,0.5,0.75,0.75,0.25,0.333333,0.078313,0.666667,0.0,0.290293,0.174632,1.0,0.50,1.0,1.0,0.439537,0.000000,0.0,0.410869,0.000000,0.0,0.666667,0.0,0.250,0.333333,0.666667,0.166667,0.25,0.000000,0.2,0.8,0.130841,0.333333,0.50,0.307475,0.666667,1.0,1.0,0.338390,0.000000,0.0,0.000000,0.000,0.0,0.0,0.75,1.0,0.0,0.090909,1.00,0.666667,0.75,0.0,0.0,0.0
1311,0.411765,0.25,0.434909,0.377157,1.0,1.0,0.000000,0.333333,1.0,0.0,0.0,0.272727,0.8,1.0,0.25,0.2,0.555556,0.50,0.235294,0.540984,0.0,0.0,1.0,1.0,0.000000,0.000000,0.333333,1.0,0.0,0.00,0.25,0.00,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,1.0,0.50,1.0,1.0,0.519487,0.311966,0.0,0.615356,0.000000,0.0,0.666667,0.0,0.500,0.666667,0.333333,0.500000,1.00,0.000000,0.2,0.8,0.299065,0.333333,0.50,0.380113,0.666667,1.0,1.0,0.000000,0.000000,0.0,0.000000,0.000,0.0,0.0,0.75,1.0,0.0,0.272727,1.00,0.666667,0.75,0.0,0.0,0.0
1312,0.588235,0.75,0.388581,0.176055,1.0,1.0,0.000000,0.333333,1.0,0.0,0.0,0.636364,0.4,1.0,1.00,0.6,0.666667,0.50,0.022059,0.049180,0.0,0.0,1.0,1.0,0.666667,0.011250,0.666667,1.0,1.0,0.75,0.75,1.00,0.833333,0.000000,0.666667,0.0,0.638179,0.224877,1.0,1.00,1.0,1.0,0.582551,0.000000,0.0,0.544554,0.000000,0.0,0.666667,0.0,0.250,0.333333,0.666667,0.416667,1.00,0.333333,0.6,0.8,0.028037,1.000000,0.50,0.296192,0.666667,1.0,1.0,0.166861,0.036563,0.0,0.000000,0.000,0.0,0.0,0.75,1.0,0.0,0.818182,0.00,0.666667,0.75,0.0,0.0,0.0


In [5]:
X_test

Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,LandSlope,Neighborhood,Condition1,Condition2,BldgType,HouseStyle,OverallQual,OverallCond,YearBuilt,YearRemodAdd,RoofStyle,RoofMatl,Exterior1st,Exterior2nd,MasVnrType,MasVnrArea,ExterQual,ExterCond,Foundation,BsmtQual,BsmtCond,BsmtExposure,BsmtFinType1,BsmtFinSF1,BsmtFinType2,BsmtFinSF2,BsmtUnfSF,TotalBsmtSF,Heating,HeatingQC,CentralAir,Electrical,1stFlrSF,2ndFlrSF,LowQualFinSF,GrLivArea,BsmtFullBath,BsmtHalfBath,FullBath,HalfBath,BedroomAbvGr,KitchenAbvGr,KitchenQual,TotRmsAbvGrd,Functional,Fireplaces,FireplaceQu,GarageType,GarageYrBlt,GarageFinish,GarageCars,GarageArea,GarageQual,GarageCond,PavedDrive,WoodDeckSF,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,GarageYrBlt_na,LotFrontage_na,MasVnrArea_na
0,0.000000,0.75,0.388581,0.630999,1.0,1.0,0.333333,0.333333,1.0,1.0,0.0,0.727273,0.4,1.0,0.75,0.6,0.555556,0.250,0.367647,0.540984,1.0,0.0,0.1,0.5,0.333333,0.000000,0.666667,1.000000,1.0,0.50,0.75,0.25,0.166667,0.215982,0.666667,0.000000,0.379006,0.333061,1.0,0.50,1.0,1.000000,0.764014,0.000000,0.0,0.714182,0.333333,0.0,1.000000,0.0,0.500,0.666667,0.333333,0.583333,0.0,0.666667,0.6,0.8,0.299065,0.666667,0.50,0.341326,0.666667,1.0,1.0,0.000000,0.000000,0.362319,0.0,0.0,0.0,0.0,0.75,1.0,0.0,0.181818,0.25,0.666667,0.00,0.0,0.0,0.0
1,0.176471,0.75,0.490408,0.389061,1.0,1.0,0.000000,0.333333,1.0,0.0,0.0,0.363636,0.0,1.0,0.75,0.2,0.555556,0.750,0.477941,0.934426,0.0,0.0,0.1,0.1,0.000000,0.000000,0.333333,1.000000,0.5,0.50,0.75,0.25,0.333333,0.071403,0.500000,0.111940,0.110543,0.131915,1.0,0.50,1.0,0.666667,0.398758,0.331197,0.0,0.549294,0.333333,0.0,0.333333,0.0,0.375,0.333333,0.000000,0.250000,1.0,0.666667,0.6,0.8,0.607477,0.333333,0.25,0.169252,0.666667,1.0,1.0,0.000000,0.000000,0.057971,0.0,0.0,0.0,0.0,0.50,1.0,0.0,0.636364,0.00,0.666667,0.75,0.0,0.0,0.0
2,0.176471,0.75,0.388581,0.329918,1.0,1.0,0.333333,0.000000,1.0,0.5,0.0,0.181818,0.4,1.0,0.75,0.2,0.444444,0.375,0.433824,0.983607,0.0,0.0,0.3,0.2,0.333333,0.100625,0.333333,1.000000,0.5,0.50,0.75,0.25,0.500000,0.032778,0.666667,0.000000,0.243381,0.116039,1.0,0.50,1.0,1.000000,0.406964,0.119658,0.0,0.453307,0.333333,0.0,0.333333,0.0,0.375,0.333333,0.666667,0.250000,1.0,0.333333,0.6,0.4,0.551402,0.333333,0.25,0.248237,0.666667,1.0,1.0,0.000000,0.000000,0.449275,0.0,0.0,0.0,0.0,0.75,1.0,0.0,0.545455,0.75,0.666667,0.75,0.0,0.0,0.0
3,0.235294,0.75,0.508690,0.399404,1.0,1.0,0.000000,0.333333,1.0,0.0,0.0,0.772727,0.4,1.0,0.75,1.0,0.666667,0.500,0.227941,0.524590,1.0,0.0,0.7,0.7,0.666667,0.186875,0.333333,1.000000,0.5,0.75,0.75,0.25,0.333333,0.069454,0.666667,0.000000,0.356712,0.189853,1.0,1.00,1.0,1.000000,0.469855,0.462607,0.0,0.636999,0.000000,0.0,0.666667,0.5,0.500,0.333333,0.333333,0.500000,1.0,0.333333,0.6,0.8,0.289720,1.000000,0.50,0.356135,0.666667,1.0,1.0,0.336056,0.213894,0.000000,0.0,0.0,0.0,0.0,0.75,1.0,0.0,0.181818,0.50,0.666667,0.75,0.0,0.0,0.0
4,0.823529,0.25,0.000000,0.050188,1.0,1.0,0.000000,0.333333,1.0,0.0,0.0,0.090909,0.4,1.0,0.50,1.0,0.555556,0.500,0.286765,0.655738,0.0,0.0,0.6,0.5,0.666667,0.238125,0.333333,1.000000,0.5,0.50,0.75,0.25,0.833333,0.000000,0.666667,0.000000,0.243846,0.085925,1.0,0.50,1.0,1.000000,0.171149,0.302885,0.0,0.419061,0.000000,0.0,0.333333,0.5,0.375,0.333333,0.333333,0.333333,1.0,0.000000,0.2,0.4,0.364486,0.333333,0.25,0.186178,0.666667,1.0,1.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.75,1.0,0.0,0.181818,1.00,0.666667,0.50,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
141,0.941176,0.25,0.189077,0.203387,1.0,1.0,0.000000,0.333333,1.0,0.0,0.0,0.136364,0.4,1.0,1.00,0.8,0.444444,0.500,0.007353,0.032787,0.0,0.0,1.0,1.0,0.666667,0.050000,0.333333,1.000000,1.0,0.75,0.75,1.00,1.000000,0.096917,0.666667,0.000000,0.000000,0.089525,1.0,0.75,1.0,1.000000,0.441306,0.000000,0.0,0.412522,0.333333,0.0,0.333333,0.0,0.250,0.333333,0.333333,0.250000,1.0,0.000000,0.2,0.6,0.009346,1.000000,0.50,0.370240,0.666667,1.0,1.0,0.000000,0.051188,0.000000,0.0,0.0,0.0,0.0,0.75,1.0,0.0,0.363636,0.00,0.666667,0.75,0.0,0.0,0.0
142,0.000000,0.75,0.388581,0.544516,1.0,1.0,0.000000,0.000000,1.0,0.5,0.0,0.727273,0.4,1.0,0.75,0.6,0.555556,0.500,0.397059,0.901639,1.0,0.0,0.1,0.1,0.666667,0.115000,0.333333,0.666667,0.5,0.75,0.75,0.50,0.666667,0.006201,0.500000,0.589552,0.420344,0.296072,1.0,0.50,1.0,1.000000,0.723389,0.000000,0.0,0.676207,0.333333,0.0,0.666667,0.0,0.375,0.333333,0.666667,0.416667,1.0,0.666667,0.8,0.6,0.504673,0.333333,0.50,0.317348,0.666667,1.0,1.0,0.193699,0.219378,0.347826,0.0,0.0,0.0,0.0,0.50,1.0,0.0,0.818182,0.25,0.000000,0.25,0.0,0.0,0.0
143,0.000000,0.75,0.394699,0.374287,1.0,1.0,0.333333,0.333333,1.0,0.0,0.0,0.681818,0.4,1.0,0.75,0.6,0.444444,0.750,0.235294,0.032787,0.0,0.0,0.6,0.6,0.000000,0.000000,0.333333,1.000000,0.5,0.75,0.75,0.50,0.500000,0.094259,0.500000,0.097693,0.169066,0.170213,1.0,0.50,1.0,1.000000,0.429838,0.000000,0.0,0.401802,0.000000,0.0,0.666667,0.0,0.375,0.333333,0.666667,0.250000,1.0,0.000000,0.2,0.4,0.299065,0.333333,0.50,0.341326,0.666667,1.0,1.0,0.000000,0.000000,0.000000,0.0,0.6,0.0,0.0,0.75,1.0,0.0,0.727273,0.75,0.666667,0.75,0.0,0.0,0.0
144,0.411765,0.75,0.388581,0.449144,1.0,1.0,0.000000,0.333333,1.0,0.0,0.0,0.272727,0.2,1.0,0.25,0.0,0.333333,0.375,0.286765,0.655738,0.0,0.0,0.7,0.7,0.000000,0.000000,0.333333,1.000000,0.5,0.75,0.75,0.75,1.000000,0.212261,0.666667,0.000000,0.000000,0.196072,1.0,0.50,1.0,1.000000,0.501855,0.000000,0.0,0.469121,0.666667,0.0,0.000000,1.0,0.000,0.666667,0.333333,0.333333,1.0,0.000000,0.2,0.2,0.364486,0.333333,0.50,0.282087,0.333333,1.0,1.0,0.140023,0.000000,0.000000,0.0,0.0,0.0,0.0,0.75,1.0,0.0,0.000000,0.50,0.666667,0.00,0.0,0.0,0.0


In [10]:
# model fitting and feature selection altogether in one line

# first, specify the Lasso Regression model and select a suitable alpha (equivalent of penalty).
# The bigger the alpha the less features that will be selected.
# Then I use the selectFromModel object from sklearn, which will select the features which coefficients are non-zero

classifyFeatures = SelectFromModel(Lasso(alpha=0.005, random_state=0))
classifyFeatures.fit(X_train, y_train)

SelectFromModel(estimator=Lasso(alpha=0.005, copy_X=True, fit_intercept=True,
                                max_iter=1000, normalize=False, positive=False,
                                precompute=False, random_state=0,
                                selection='cyclic', tol=0.0001,
                                warm_start=False),
                max_features=None, norm_order=1, prefit=False, threshold=None)

In [11]:
# this command let's us visualise those features that were kept.
# features to be kept are having True indicator, otherwise False.
# False are the features which is shrunk to zero by lasso.
classifyFeatures.get_support()

array([ True,  True, False, False, False, False, False, False, False,
       False, False,  True, False, False, False, False,  True,  True,
       False,  True,  True, False, False, False,  True, False, False,
       False, False,  True, False,  True, False, False, False, False,
       False, False, False,  True,  True, False,  True, False, False,
        True,  True, False, False, False, False, False,  True, False,
       False,  True,  True,  True, False,  True,  True, False, False,
       False,  True, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False])

In [15]:
classifyFeatures

SelectFromModel(estimator=Lasso(alpha=0.005, copy_X=True, fit_intercept=True,
                                max_iter=1000, normalize=False, positive=False,
                                precompute=False, random_state=0,
                                selection='cyclic', tol=0.0001,
                                warm_start=False),
                max_features=None, norm_order=1, prefit=False, threshold=None)

In [16]:
# Number of total and selected features
# this is how we can make a list of the selected features

selected_features = X_train.columns[(classifyFeatures.get_support())]

# let's print some stats
print('total features: {}'.format((X_train.shape[1])))
print('selected features: {}'.format(len(selected_features)))
print('features with coefficients shrank to zero: {}'.format(np.sum(classifyFeatures.estimator_.coef_ == 0)))

total features: 82
selected features: 22
features with coefficients shrank to zero: 60


In [17]:
# print the selected features
selected_features

Index(['MSSubClass', 'MSZoning', 'Neighborhood', 'OverallQual', 'OverallCond',
       'YearRemodAdd', 'RoofStyle', 'MasVnrType', 'BsmtQual', 'BsmtExposure',
       'HeatingQC', 'CentralAir', '1stFlrSF', 'GrLivArea', 'BsmtFullBath',
       'KitchenQual', 'Fireplaces', 'FireplaceQu', 'GarageType',
       'GarageFinish', 'GarageCars', 'PavedDrive'],
      dtype='object')

In [None]:
"""
# this is an alternative way of identifying the selected features 
# based on the non-zero regularisation coefficients:
selected_feats = X_train.columns[(sel_.estimator_.coef_ != 0).ravel().tolist()]
selected_feats
"""

In [18]:
# save the selected list of features
pd.Series(selected_features).to_csv('selected_features.csv', index=False)

  
