In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import Ridge, Lasso, LinearRegression,RidgeCV
from sklearn.metrics import mean_squared_error

plt.style.use('ggplot')
%matplotlib inline

In [5]:
df=pd.read_csv('lasso82col.csv',index_col=0)

In [6]:
df.shape

(1460, 82)

In [41]:
SalePrice=df.SalePrice
ridge_df=df.drop('SalePrice',axis=1)

# Split the Ames_df into a 70% train-set and a 30% test set.
np.random.seed(2)
testIdxes = np.random.choice(range(1460), size= 438, replace=False)
trainIdxes = list(set(range(1460))-set(testIdxes))
trainX = ridge_df.iloc[trainIdxes]
testX  = ridge_df.iloc[testIdxes]
trainY = SalePrice.iloc[trainIdxes]
testY  = SalePrice.iloc[testIdxes]

In [42]:
alphaRange = np.linspace(1e-3,200,40) # Use np.linspace to generate 40 alpha
ridge_cv = RidgeCV(alphas = alphaRange,cv=10) #cross validation to get best lambda
ridge_cv.fit(trainX, trainY)
best_alpha=ridge_cv.alpha_
best_alpha=round(best_alpha,3)
best_alpha

5.129

In [43]:
#Use alpha=10.257 to build the Ridge regression model again
ridge=Ridge()
ridge.set_params(alpha=best_alpha)
ridge.fit(trainX,trainY)
ridge_intercept = ridge.intercept_
ridge_coef = pd.Series(ridge.coef_,index=ridge_df.columns)
rtrain_score=ridge.score(trainX,trainY)
rtest_score= ridge.score(testX,testY)

In [44]:
ridge_intercept

-136368.1489195908

In [45]:
ridge_predict = ridge.predict(testX)
RMSE=np.sqrt(mean_squared_error(testY,ridge_predict))
RMSE

49317.15479535841

In [49]:
#Sorted the coefs, the larger the number is , the larger impact it has to the whole model
#showing the top 60 features
sorted_coef=abs(ridge_coef).sort_values(ascending=False)
sorted_coef[1:60]

PoolQC                   20628.937810
Neighborhood_StoneBr     19702.648898
Neighborhood_Crawfor     17770.244889
Neighborhood_NoRidge     16575.998846
BldgType_Twnhs           14441.445269
SaleType_New             14163.061913
KitchenAbvGr             13761.291060
LotConfig_CulDSac        12713.022463
Neighborhood_BrkSide     11497.875138
Exterior_ImStucc         10625.271862
BldgType_TwnhsE          10326.403795
Exterior_BrkFace         10113.412198
Street_Grvl              10013.156272
MasVnrType_BrkFace        9814.270266
Exterior_CemntBd          8968.877163
BsmtCond                  8580.644083
OverallQual               8489.613829
LandContour_Low           8370.832018
BedroomAbvGr              8170.915868
ExterQual                 7896.520958
HouseStyle_SLvl           7741.335863
Neighborhood_NWAmes       7359.102855
Neighborhood_Mitchel      7022.424801
HouseStyle_SFoyer         6996.605848
GarageType_Detchd         6966.792241
GarageType_BuiltIn        6933.578860
Condition_Fe

In [51]:
#showing the features names in a list
list(sorted_coef.index)

['Neighborhood_NridgHt',
 'PoolQC',
 'Neighborhood_StoneBr',
 'Neighborhood_Crawfor',
 'Neighborhood_NoRidge',
 'BldgType_Twnhs',
 'SaleType_New',
 'KitchenAbvGr',
 'LotConfig_CulDSac',
 'Neighborhood_BrkSide',
 'Exterior_ImStucc',
 'BldgType_TwnhsE',
 'Exterior_BrkFace',
 'Street_Grvl',
 'MasVnrType_BrkFace',
 'Exterior_CemntBd',
 'BsmtCond',
 'OverallQual',
 'LandContour_Low',
 'BedroomAbvGr',
 'ExterQual',
 'HouseStyle_SLvl',
 'Neighborhood_NWAmes',
 'Neighborhood_Mitchel',
 'HouseStyle_SFoyer',
 'GarageType_Detchd',
 'GarageType_BuiltIn',
 'Condition_Feedr',
 'Neighborhood_CollgCr',
 'SaleType_COD',
 'RoofStyle_Hip',
 'BsmtExposure',
 'Functional',
 'LandSlope_Mod',
 'OverallCond',
 'TotalBath',
 'SaleCondition_Abnorml',
 'MSZoning_RM',
 'MSSubClass_120',
 'BldgType_Duplex',
 'MSSubClass_90',
 'Neighborhood_Sawyer',
 'GarageQual',
 'GarageCars',
 'LotShape_IR2',
 'KitchenQual',
 'MSSubClass_30',
 'Exterior_Plywood',
 'BsmtScore',
 'SaleCondition_Partial',
 'ExterCond',
 'TotRmsAbvG