# Creating Elastic Net model of Ames Housing Data.

***
## 1. Preparation
### Importing necessary libraries and functions.

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_absolute_error, mean_squared_error
from joblib import dump
import warnings

### Reading the data.

In [2]:
df = pd.read_csv("AMES_final_data.csv")

### First five rows in DataFrame.

In [3]:
df.head()

Unnamed: 0,Lot Frontage,Lot Area,Overall Qual,Overall Cond,Year Built,Year Remod/Add,Mas Vnr Area,BsmtFin SF 1,BsmtFin SF 2,Bsmt Unf SF,...,Sale Type_ConLw,Sale Type_New,Sale Type_Oth,Sale Type_VWD,Sale Type_WD,Sale Condition_AdjLand,Sale Condition_Alloca,Sale Condition_Family,Sale Condition_Normal,Sale Condition_Partial
0,141.0,31770,6,5,1960,1960,112.0,639.0,0.0,441.0,...,0,0,0,0,1,0,0,0,1,0
1,80.0,11622,5,6,1961,1961,0.0,468.0,144.0,270.0,...,0,0,0,0,1,0,0,0,1,0
2,81.0,14267,6,6,1958,1958,108.0,923.0,0.0,406.0,...,0,0,0,0,1,0,0,0,1,0
3,93.0,11160,7,5,1968,1968,0.0,1065.0,0.0,1045.0,...,0,0,0,0,1,0,0,0,1,0
4,74.0,13830,5,5,1997,1998,0.0,791.0,0.0,137.0,...,0,0,0,0,1,0,0,0,1,0


### Informations about DataFrame.

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2925 entries, 0 to 2924
Columns: 274 entries, Lot Frontage to Sale Condition_Partial
dtypes: float64(11), int64(263)
memory usage: 6.1 MB


### Separating data into X features and y labels.
The label we are trying to predict is SalePrice column.

In [5]:
X = df.drop(columns='SalePrice')
y = df.SalePrice

### Splitting up the data into X and y traing and test set with test proportion 10%.

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

### The dataset features has a variety of scales and units. For optimal regression performance, scaling the X features.

In [7]:
scaler = StandardScaler()
scaler.fit(X_train)

StandardScaler()

In [8]:
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

***
## 2. Creating Elastic Net Model and choosing best parameters using Grid Search

### Creating an instance of Elastic Net.

In [9]:
model = ElasticNet()

### The Elastic Net model has two main parameters, alpha and the L1 ratio. 

In [10]:
param_grid = {
    'alpha': [1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 0.0, 1.0, 10.0, 100.0],
    'l1_ratio': [0, 0.01, 0.1, 0.7, 0.99, 1]
}

### Using GridSearch with Cross Validation for the best parameters basend on scaled training data.

In [11]:
grid = GridSearchCV(model, param_grid, scoring='neg_mean_squared_error', cv=5, verbose=10)

In [12]:
warnings.filterwarnings('ignore')
grid.fit(X_train, y_train)

Fitting 5 folds for each of 54 candidates, totalling 270 fits
[CV 1/5; 1/54] START alpha=1e-05, l1_ratio=0....................................
[CV 1/5; 1/54] END alpha=1e-05, l1_ratio=0;, score=-500459002.590 total time=   0.3s
[CV 2/5; 1/54] START alpha=1e-05, l1_ratio=0....................................
[CV 2/5; 1/54] END alpha=1e-05, l1_ratio=0;, score=-608487712.208 total time=   0.2s
[CV 3/5; 1/54] START alpha=1e-05, l1_ratio=0....................................
[CV 3/5; 1/54] END alpha=1e-05, l1_ratio=0;, score=-501778308.861 total time=   0.2s
[CV 4/5; 1/54] START alpha=1e-05, l1_ratio=0....................................
[CV 4/5; 1/54] END alpha=1e-05, l1_ratio=0;, score=-460777795.221 total time=   0.2s
[CV 5/5; 1/54] START alpha=1e-05, l1_ratio=0....................................
[CV 5/5; 1/54] END alpha=1e-05, l1_ratio=0;, score=-563935872.773 total time=   0.2s
[CV 1/5; 2/54] START alpha=1e-05, l1_ratio=0.01.................................
[CV 1/5; 2/54] END alpha=1e

[CV 4/5; 10/54] END alpha=0.0001, l1_ratio=0.7;, score=-460448448.044 total time=   0.2s
[CV 5/5; 10/54] START alpha=0.0001, l1_ratio=0.7................................
[CV 5/5; 10/54] END alpha=0.0001, l1_ratio=0.7;, score=-563934680.559 total time=   0.2s
[CV 1/5; 11/54] START alpha=0.0001, l1_ratio=0.99...............................
[CV 1/5; 11/54] END alpha=0.0001, l1_ratio=0.99;, score=-500772536.085 total time=   0.2s
[CV 2/5; 11/54] START alpha=0.0001, l1_ratio=0.99...............................
[CV 2/5; 11/54] END alpha=0.0001, l1_ratio=0.99;, score=-608529448.019 total time=   0.2s
[CV 3/5; 11/54] START alpha=0.0001, l1_ratio=0.99...............................
[CV 3/5; 11/54] END alpha=0.0001, l1_ratio=0.99;, score=-501801565.913 total time=   0.2s
[CV 4/5; 11/54] START alpha=0.0001, l1_ratio=0.99...............................
[CV 4/5; 11/54] END alpha=0.0001, l1_ratio=0.99;, score=-460938582.265 total time=   0.2s
[CV 5/5; 11/54] START alpha=0.0001, l1_ratio=0.99........

[CV 3/5; 20/54] END alpha=0.01, l1_ratio=0.01;, score=-497609611.171 total time=   0.2s
[CV 4/5; 20/54] START alpha=0.01, l1_ratio=0.01.................................
[CV 4/5; 20/54] END alpha=0.01, l1_ratio=0.01;, score=-459154646.407 total time=   0.3s
[CV 5/5; 20/54] START alpha=0.01, l1_ratio=0.01.................................
[CV 5/5; 20/54] END alpha=0.01, l1_ratio=0.01;, score=-552813133.579 total time=   0.3s
[CV 1/5; 21/54] START alpha=0.01, l1_ratio=0.1..................................
[CV 1/5; 21/54] END alpha=0.01, l1_ratio=0.1;, score=-481093367.105 total time=   0.3s
[CV 2/5; 21/54] START alpha=0.01, l1_ratio=0.1..................................
[CV 2/5; 21/54] END alpha=0.01, l1_ratio=0.1;, score=-607223965.178 total time=   0.3s
[CV 3/5; 21/54] START alpha=0.01, l1_ratio=0.1..................................
[CV 3/5; 21/54] END alpha=0.01, l1_ratio=0.1;, score=-497891876.374 total time=   0.3s
[CV 4/5; 21/54] START alpha=0.01, l1_ratio=0.1........................

[CV 2/5; 30/54] END alpha=0.1, l1_ratio=1;, score=-608461652.977 total time=   0.3s
[CV 3/5; 30/54] START alpha=0.1, l1_ratio=1.....................................
[CV 3/5; 30/54] END alpha=0.1, l1_ratio=1;, score=-501716142.446 total time=   0.3s
[CV 4/5; 30/54] START alpha=0.1, l1_ratio=1.....................................
[CV 4/5; 30/54] END alpha=0.1, l1_ratio=1;, score=-460620456.926 total time=   0.2s
[CV 5/5; 30/54] START alpha=0.1, l1_ratio=1.....................................
[CV 5/5; 30/54] END alpha=0.1, l1_ratio=1;, score=-563945122.215 total time=   0.3s
[CV 1/5; 31/54] START alpha=0.0, l1_ratio=0.....................................
[CV 1/5; 31/54] END alpha=0.0, l1_ratio=0;, score=-171461528776736.531 total time=   0.3s
[CV 2/5; 31/54] START alpha=0.0, l1_ratio=0.....................................
[CV 2/5; 31/54] END alpha=0.0, l1_ratio=0;, score=-13631730276726.803 total time=   0.2s
[CV 3/5; 31/54] START alpha=0.0, l1_ratio=0.....................................

[CV 2/5; 40/54] END alpha=1.0, l1_ratio=0.7;, score=-761276393.540 total time=   0.0s
[CV 3/5; 40/54] START alpha=1.0, l1_ratio=0.7...................................
[CV 3/5; 40/54] END alpha=1.0, l1_ratio=0.7;, score=-523774040.567 total time=   0.0s
[CV 4/5; 40/54] START alpha=1.0, l1_ratio=0.7...................................
[CV 4/5; 40/54] END alpha=1.0, l1_ratio=0.7;, score=-503161482.525 total time=   0.0s
[CV 5/5; 40/54] START alpha=1.0, l1_ratio=0.7...................................
[CV 5/5; 40/54] END alpha=1.0, l1_ratio=0.7;, score=-538580689.556 total time=   0.0s
[CV 1/5; 41/54] START alpha=1.0, l1_ratio=0.99..................................
[CV 1/5; 41/54] END alpha=1.0, l1_ratio=0.99;, score=-481227028.067 total time=   0.3s
[CV 2/5; 41/54] START alpha=1.0, l1_ratio=0.99..................................
[CV 2/5; 41/54] END alpha=1.0, l1_ratio=0.99;, score=-607675150.416 total time=   0.3s
[CV 3/5; 41/54] START alpha=1.0, l1_ratio=0.99...............................

[CV 2/5; 51/54] END alpha=100.0, l1_ratio=0.1;, score=-5621893195.817 total time=   0.0s
[CV 3/5; 51/54] START alpha=100.0, l1_ratio=0.1.................................
[CV 3/5; 51/54] END alpha=100.0, l1_ratio=0.1;, score=-4830035217.453 total time=   0.0s
[CV 4/5; 51/54] START alpha=100.0, l1_ratio=0.1.................................
[CV 4/5; 51/54] END alpha=100.0, l1_ratio=0.1;, score=-4756014537.674 total time=   0.0s
[CV 5/5; 51/54] START alpha=100.0, l1_ratio=0.1.................................
[CV 5/5; 51/54] END alpha=100.0, l1_ratio=0.1;, score=-4559926449.001 total time=   0.0s
[CV 1/5; 52/54] START alpha=100.0, l1_ratio=0.7.................................
[CV 1/5; 52/54] END alpha=100.0, l1_ratio=0.7;, score=-4064307157.229 total time=   0.0s
[CV 2/5; 52/54] START alpha=100.0, l1_ratio=0.7.................................
[CV 2/5; 52/54] END alpha=100.0, l1_ratio=0.7;, score=-4119048342.507 total time=   0.0s
[CV 3/5; 52/54] START alpha=100.0, l1_ratio=0.7..............

GridSearchCV(cv=5, estimator=ElasticNet(),
             param_grid={'alpha': [1e-05, 0.0001, 0.001, 0.01, 0.1, 0.0, 1.0,
                                   10.0, 100.0],
                         'l1_ratio': [0, 0.01, 0.1, 0.7, 0.99, 1]},
             scoring='neg_mean_squared_error', verbose=10)

### Combinantion of best parameters for model.

In [13]:
grid.best_params_

{'alpha': 100.0, 'l1_ratio': 1}

### Counting Predictions

In [14]:
y_pred = grid.predict(X_test)

***
## 3. Evaluating model performance using Mean Absolute Error (MAE), Mean Squared Error (MSE) and Root Mean Squared Error (RMSE).

### Mean Absolute Error

In [15]:
MAE = mean_absolute_error(y_test, y_pred)
MAE

15068.945537104504

### Mean Squred Error

In [16]:
MSE = mean_squared_error(y_test, y_pred)
MSE

392485885.54314977

### Root Mean Squared Error

In [17]:
RMSE = np.sqrt(MSE)
RMSE

19811.25653620057

### Saving the model

In [18]:
dump(grid, 'house_sell_price_model.joblib') 

['house_sell_price_model.joblib']