# Regression with Scikit

Deep Learning Regression on __House Price dataset__ 


1) Perform __<font color=red>Baseline</font>__ regression using Scikit learn library:
- KerasRegressor
- k-fold cross validation

NOTE:
- We can ignore the -ve sign of Root Mean Square as Scikit Library inverts to maximise instead of minimizing +ve number.

In [1]:
import pandas as pd
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold

In [2]:
dataframe = pd.read_csv('Data/housing.csv', delim_whitespace=True, header=None)
dataset = dataframe.values

In [3]:
X = dataset[:, 0:13].astype(float)
y = dataset[:, 13]

In [4]:
def baseline_model():
    model = Sequential()
    model.add(Dense(13, input_dim=13, activation='relu'))
    model.add(Dense(1))
    
    model.compile(loss='mean_squared_error', optimizer='adam')
    
    return model

In [5]:
estimator = KerasRegressor(build_fn=baseline_model, epochs=100, batch_size=5, verbose=0)
kfold = KFold(n_splits=10)
results = cross_val_score(estimator, X, y, cv=kfold)

In [6]:
print("Baseline: %.2f (%.2f) MSE" % (results.mean(), results.std()))

Baseline: -44.78 (25.71) MSE


2) Perform __<font color=red>Standardization</font>__ followed by regression to improve performance:
- KerasRegressor
- Standardization, k-fold cross validation

In [7]:
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [8]:
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=baseline_model, epochs=50, batch_size=5, verbose=0)))

pipeline = Pipeline(estimators)
kfold = KFold(n_splits=10)
results = cross_val_score(pipeline, X, y, cv=kfold)

In [9]:
print("Standardized: %.2f (%.2f) MSE" % (results.mean(), results.std()))

Standardized: -30.26 (31.47) MSE


3) Perform __<font color=red>Layer addition or reduction</font>__ followed by regression:
- KerasRegressor
- k-fold cross validation
- Increase and Decrease layers

<u>Larger Neural Network</u>:

In [10]:
def larger_model():
    model = Sequential()
    model.add(Dense(13, input_dim=13, activation='relu'))
    model.add(Dense(6, activation='relu'))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    
    return model

In [11]:
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=baseline_model, epochs=50, batch_size=5, verbose=0)))

pipeline = Pipeline(estimators)
kfold = KFold(n_splits=10)
results = cross_val_score(pipeline, X, y, cv=kfold)

In [12]:
print("Larger model: %.2f (%.2f) MSE" % (results.mean(), results.std()))

Larger model: -25.07 (19.69) MSE


<u>Wider Neural Network</u>:

In [13]:
def larger_model():
    model = Sequential()
    model.add(Dense(20, input_dim=13, activation='relu'))
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    
    return model

In [14]:
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasRegressor(build_fn=baseline_model, epochs=50, batch_size=5, verbose=0)))

pipeline = Pipeline(estimators)
kfold = KFold(n_splits=10)
results = cross_val_score(pipeline, X, y, cv=kfold)

In [15]:
print("Wider model: %.2f (%.2f) MSE" % (results.mean(), results.std()))

Wider model: -27.35 (29.88) MSE
