In this course project, you will build a regression model using the deep learning Keras library, and then you will experiment with increasing the number of training epochs and changing number of hidden layers and you will see how changing these parameters impacts the performance of the model.

Import python libraries and modules

In [1]:
import pandas as pd
import numpy as np

In [2]:
import keras

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [3]:
from keras.models import Sequential
from keras.layers import Dense

Download data

In [4]:
concrete_data = pd.read_csv('https://cocl.us/concrete_data')
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


Split data into predictors and target

In [5]:
concrete_data_columns = concrete_data.columns

predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']]
target = concrete_data['Strength']

In [6]:
predictors.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


In [7]:
target.head()

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64

In [8]:
n_cols = predictors.shape[1]
n_cols

8

Build A Neural Network

In [9]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [18]:
mse = []

for i in range(50):
    #split train and test data
    predictors_train, predictors_test, target_train, target_test = train_test_split(predictors, target, test_size=0.30)
    
    #create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(1))
    
    #compile model
    model.compile(optimizer='adam', loss='mean_squared_error')

    #fit the model
    model.fit(predictors_train, target_train, epochs=50, verbose=0)
    
    #predict target
    target_pred = model.predict(predictors_test)
    
    #Evaluate mean squared error
    mse.append(mean_squared_error(target_test, target_pred))

Mean and Standard Deviation of MSE's

In [19]:
print("The mean is: {:.2f}".format(np.mean(mse)))
print("The Standard deviation is: {:.2f}".format(np.std(mse)))

The mean is: 354.34
The Standard deviation is: 347.04


Normalize the data

In [21]:
predictors_norm = (predictors - predictors.mean()) / predictors.std()
predictors_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


Build neural network with normalized data and find the mean and standard deviation.

In [22]:
mse_norm = []

for i in range(50):
    #split train and test data
    predictors_train, predictors_test, target_train, target_test = train_test_split(predictors, target, test_size=0.30)
    
    #create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(1))
    
    #compile model
    model.compile(optimizer='adam', loss='mean_squared_error')

    #fit the model
    model.fit(predictors_train, target_train, epochs=50, verbose=0)
    
    #predict target
    target_pred = model.predict(predictors_test)
    
    #Evaluate mean squared error
    mse_norm.append(mean_squared_error(target_test, target_pred))

In [23]:
print("The mean is: {:.2f}".format(np.mean(mse_norm)))
print("The Standard deviation is: {:.2f}".format(np.std(mse_norm)))

The mean is: 357.85
The Standard deviation is: 339.79


In [28]:
print("The mean sligthly INCREASES and the standard deviation slightly DECREASES compared to the unormalized data.")

The mean sligthly INCREASES and the standard deviation slightly DECREASES compared to the unormalized data.


Repeat procedure with 100 epochs

In [25]:
mse_norm2 = []

for i in range(50):
    #split train and test data
    predictors_train, predictors_test, target_train, target_test = train_test_split(predictors, target, test_size=0.30)
    
    #create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(1))
    
    #compile model
    model.compile(optimizer='adam', loss='mean_squared_error')

    #fit the model
    model.fit(predictors_train, target_train, epochs=100, verbose=0)
    
    #predict target
    target_pred = model.predict(predictors_test)
    
    #Evaluate mean squared error
    mse_norm2.append(mean_squared_error(target_test, target_pred))

In [26]:
print("The mean is: {:.2f}".format(np.mean(mse_norm2)))
print("The Standard deviation is: {:.2f}".format(np.std(mse_norm2)))

The mean is: 150.87
The Standard deviation is: 66.28


In [29]:
print("The mean DECREASES and the standard deviation DECREASES when the epochs are doubled.")

The mean DECREASES and the standard deviation DECREASES when the epochs are doubled.


Repeat procedure with 50 epochs and 3 hidden layers, 10 nodes and ReLU activation function.

In [30]:
mse_norm3 = []

for i in range(50):
    #split train and test data
    predictors_train, predictors_test, target_train, target_test = train_test_split(predictors, target, test_size=0.30)
    
    #create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    
    #compile model
    model.compile(optimizer='adam', loss='mean_squared_error')

    #fit the model
    model.fit(predictors_train, target_train, epochs=50, verbose=0)
    
    #predict target
    target_pred = model.predict(predictors_test)
    
    #Evaluate mean squared error
    mse_norm3.append(mean_squared_error(target_test, target_pred))

In [31]:
print("The mean is: {:.2f}".format(np.mean(mse_norm3)))
print("The Standard deviation is: {:.2f}".format(np.std(mse_norm3)))

The mean is: 123.45
The Standard deviation is: 48.15


In [32]:
print("The mean DECREASES and the standard deviation DECREASES when the three hidden layers are used instead of one.")

The mean DECREASES and the standard deviation DECREASES when the three hidden layers are used instead of one.
