In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import Ridge, Lasso, LinearRegression,RidgeCV
from sklearn.metrics import mean_squared_error

plt.style.use('ggplot')
%matplotlib inline

In [2]:
df=pd.read_csv('lasso.csv',index_col=0)

In [3]:
df.shape

(1458, 107)

In [5]:
SalePrice=df.SalePrice
ridge_df=df.drop('SalePrice',axis=1)

# Split the Ames_df into a 80% train-set and a 20% test set.
np.random.seed(19)
testIdxes = np.random.choice(range(1458), size= 292, replace=False)
trainIdxes = list(set(range(1458))-set(testIdxes))
trainX = ridge_df.iloc[trainIdxes]
testX  = ridge_df.iloc[testIdxes]
trainY = SalePrice.iloc[trainIdxes]
testY  = SalePrice.iloc[testIdxes]

In [6]:
alphaRange = np.linspace(1e-3,200,40) # Use np.linspace to generate 40 alpha
ridge_cv = RidgeCV(alphas = alphaRange,cv=10) #cross validation to get best lambda
ridge_cv.fit(trainX, trainY)
best_alpha=ridge_cv.alpha_
best_alpha=round(best_alpha,3)
best_alpha

5.129

In [16]:
ridge=Ridge()
ridge.set_params(alpha=best_alpha,normalize=True)
ridge.fit(trainX,trainY)
ridge_intercept = ridge.intercept_
ridge_coef = pd.Series(ridge.coef_,index=ridge_df.columns)
rtrain_score=ridge.score(trainX,trainY)
rtest_score= ridge.score(testX,testY)

In [17]:
ridge_intercept

-12392.297753578721

In [18]:
ridge_predict = ridge.predict(testX)
RMSE=np.sqrt(mean_squared_error(testY,ridge_predict))
RMSE

31021.84134312618

In [20]:
ridge.score(trainX,trainY)

0.7502133115255105

In [21]:
ridge.score(testX,testY)

0.7927125689097279

In [19]:
#Sorted the coefs, the larger the number is , the larger impact it has to the whole model
#showing the top 60 features
sorted_coef=abs(ridge_coef).sort_values(ascending=False)
sorted_coef[1:60]

Neighborhood_NoRidge     13946.264435
Neighborhood_StoneBr     13350.064627
Neighborhood_NridgHt     10053.263496
Exterior_ImStucc          7503.818948
SaleType_CWD              7060.041641
SaleType_New              6591.971147
SaleCondition_Partial     6486.255144
Street_Grvl               6093.238105
ExterQual                 6073.447817
PoolQC                    6054.619083
MasVnrType_Stone          5993.326132
KitchenQual               5247.243215
Exterior_CemntBd          4786.861877
GarageType_BuiltIn        4738.295875
Neighborhood_Crawfor      4690.768192
LotShape_IR2              4477.648978
MasVnrType_BrkCmn         4228.532787
RoofStyle_Hip             4191.673414
TotalBath                 4156.512755
MSSubClass_160            4073.039077
MSSubClass_30             4048.090655
BldgType_Twnhs            4001.291645
Fireplaces                3896.430267
GarageCars                3893.825159
Exterior_BrkFace          3755.263464
MSSubClass_60             3605.139105
HouseStyle_S

In [51]:
#showing the features names in a list
list(sorted_coef.index)

['Neighborhood_NridgHt',
 'PoolQC',
 'Neighborhood_StoneBr',
 'Neighborhood_Crawfor',
 'Neighborhood_NoRidge',
 'BldgType_Twnhs',
 'SaleType_New',
 'KitchenAbvGr',
 'LotConfig_CulDSac',
 'Neighborhood_BrkSide',
 'Exterior_ImStucc',
 'BldgType_TwnhsE',
 'Exterior_BrkFace',
 'Street_Grvl',
 'MasVnrType_BrkFace',
 'Exterior_CemntBd',
 'BsmtCond',
 'OverallQual',
 'LandContour_Low',
 'BedroomAbvGr',
 'ExterQual',
 'HouseStyle_SLvl',
 'Neighborhood_NWAmes',
 'Neighborhood_Mitchel',
 'HouseStyle_SFoyer',
 'GarageType_Detchd',
 'GarageType_BuiltIn',
 'Condition_Feedr',
 'Neighborhood_CollgCr',
 'SaleType_COD',
 'RoofStyle_Hip',
 'BsmtExposure',
 'Functional',
 'LandSlope_Mod',
 'OverallCond',
 'TotalBath',
 'SaleCondition_Abnorml',
 'MSZoning_RM',
 'MSSubClass_120',
 'BldgType_Duplex',
 'MSSubClass_90',
 'Neighborhood_Sawyer',
 'GarageQual',
 'GarageCars',
 'LotShape_IR2',
 'KitchenQual',
 'MSSubClass_30',
 'Exterior_Plywood',
 'BsmtScore',
 'SaleCondition_Partial',
 'ExterCond',
 'TotRmsAbvG