## Importing Libraries

In [63]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## Data

In [64]:
df = pd.read_csv('final_ames_data.csv')

In [65]:
df.head()

Unnamed: 0.1,Unnamed: 0,Lot Frontage,Lot Area,Overall Qual,Overall Cond,Year Built,Year Remod/Add,Mas Vnr Area,BsmtFin SF 1,BsmtFin SF 2,...,Sale Type_ConLw,Sale Type_New,Sale Type_Oth,Sale Type_VWD,Sale Type_WD,Sale Condition_AdjLand,Sale Condition_Alloca,Sale Condition_Family,Sale Condition_Normal,Sale Condition_Partial
0,0,141.0,31770,6,5,1960,1960,112.0,639.0,0.0,...,0,0,0,0,1,0,0,0,1,0
1,1,80.0,11622,5,6,1961,1961,0.0,468.0,144.0,...,0,0,0,0,1,0,0,0,1,0
2,2,81.0,14267,6,6,1958,1958,108.0,923.0,0.0,...,0,0,0,0,1,0,0,0,1,0
3,3,93.0,11160,7,5,1968,1968,0.0,1065.0,0.0,...,0,0,0,0,1,0,0,0,1,0
4,4,74.0,13830,5,5,1997,1998,0.0,791.0,0.0,...,0,0,0,0,1,0,0,0,1,0


In [66]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2925 entries, 0 to 2924
Columns: 275 entries, Unnamed: 0 to Sale Condition_Partial
dtypes: float64(11), int64(264)
memory usage: 6.1 MB


In [67]:
df.drop('Unnamed: 0', axis = 1, inplace = True)

In [68]:
df.describe()

Unnamed: 0,Lot Frontage,Lot Area,Overall Qual,Overall Cond,Year Built,Year Remod/Add,Mas Vnr Area,BsmtFin SF 1,BsmtFin SF 2,Bsmt Unf SF,...,Sale Type_ConLw,Sale Type_New,Sale Type_Oth,Sale Type_VWD,Sale Type_WD,Sale Condition_AdjLand,Sale Condition_Alloca,Sale Condition_Family,Sale Condition_Normal,Sale Condition_Partial
count,2925.0,2925.0,2925.0,2925.0,2925.0,2925.0,2925.0,2925.0,2925.0,2925.0,...,2925.0,2925.0,2925.0,2925.0,2925.0,2925.0,2925.0,2925.0,2925.0,2925.0
mean,69.683099,10109.861538,6.091624,5.563761,1971.323761,1984.22906,100.318632,438.974359,49.790427,558.959316,...,0.002735,0.080684,0.002393,0.000342,0.866325,0.004103,0.007863,0.015726,0.824615,0.082735
std,21.344161,7785.509739,1.406386,1.112262,30.228826,20.857822,176.667803,439.426274,169.274143,439.729905,...,0.052235,0.272395,0.04887,0.01849,0.340361,0.063931,0.088341,0.124437,0.380361,0.275528
min,0.0,1300.0,1.0,1.0,1872.0,1950.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,60.0,7438.0,5.0,5.0,1954.0,1965.0,0.0,0.0,0.0,218.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
50%,70.0,9430.0,6.0,5.0,1973.0,1993.0,0.0,370.0,0.0,465.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
75%,80.0,11526.0,7.0,6.0,2000.0,2004.0,162.0,734.0,0.0,801.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
max,313.0,215245.0,10.0,9.0,2010.0,2010.0,1600.0,2288.0,1526.0,2336.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


## Train|Test|Split and Data scaling

In [69]:
X = df.drop('SalePrice',axis=1)
y = df['SalePrice']

In [70]:
from sklearn.model_selection import train_test_split

In [71]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.10, random_state=101)

In [72]:
from sklearn.preprocessing import StandardScaler

In [73]:
scaler = StandardScaler()

In [74]:
scaled_X_train = scaler.fit_transform(X_train)
scaled_X_test = scaler.transform(X_test)

## Implementing Elastic Net Model using GridSearchCV

In [75]:
from sklearn.linear_model import ElasticNet

In [76]:
base_elastic_model = ElasticNet(max_iter=1000000)

In [77]:
param_grid = {'alpha':[0.1,1,5,10,50,100],
              'l1_ratio':[.1, .5, .7, .9, .95, .99, 1]}

In [78]:
from sklearn.model_selection import GridSearchCV

In [79]:
# verbose number a personal preference
grid_model = GridSearchCV(estimator=base_elastic_model,
                          param_grid=param_grid,
                          scoring='neg_mean_squared_error',
                          cv=5,
                          verbose=1)

In [80]:
grid_model.fit(scaled_X_train,y_train)

Fitting 5 folds for each of 42 candidates, totalling 210 fits


GridSearchCV(cv=5, estimator=ElasticNet(max_iter=1000000),
             param_grid={'alpha': [0.1, 1, 5, 10, 50, 100],
                         'l1_ratio': [0.1, 0.5, 0.7, 0.9, 0.95, 0.99, 1]},
             scoring='neg_mean_squared_error', verbose=1)

In [81]:
grid_model.best_params_

{'alpha': 100, 'l1_ratio': 1}

In [82]:
y_pred =grid_model.predict(scaled_X_test)

In [83]:
from sklearn.metrics import mean_absolute_error,mean_squared_error

In [84]:
mean_absolute_error(y_test,y_pred)

14195.35490056217

In [85]:
np.sqrt(mean_squared_error(y_test,y_pred))

20558.508566893175

In [87]:
np.mean(df['SalePrice'])

180815.53743589742

## Lasso Model

In [88]:
from sklearn.linear_model import LassoCV

In [107]:
lasso_model = LassoCV()

In [108]:
lasso_model.fit(scaled_X_train,y_train)

LassoCV()

In [109]:
lasso_model.alpha_

121.23517289436131

In [110]:
test_pred = lasso_model.predict(scaled_X_test)

In [111]:
MAE = mean_absolute_error(y_test,test_pred)
MSE = mean_squared_error(y_test,test_pred)
RMSE = np.sqrt(MSE)

In [112]:
MAE

14186.390457864803

In [113]:
RMSE

20558.75100803518