# Build a Regression Model in Keras

### Table Of Contents

#### The dataset is about the compressive strength of different samples of concrete based on the volumes of the different ingredients that were used to make them. Ingredients include:

In [37]:
import time
import os
import pandas as pd
import numpy as np

from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

from keras.models import Sequential
from keras.layers import Dense

In [38]:
COL_NAME_CEMENT = "Cement"
COL_NAME_BLAST_FURNACE_SLAG = "Blast Furnace Slag"
COL_NAME_FLY_ASH = "Fly Ash"
COL_NAME_WATER = "Water"
COL_NAME_SUPERPLASTICIZER = "Superplasticizer"
COL_NAME_COARSE_AGGREGATE = "Coarse Aggregate"
COL_NAME_FINE_AGGREGATE = "Fine Aggregate"
COL_NAME_AGE = "Age"
COL_NAME_STRENGTH = "Strength"

COL_NAME_EXPERIMENT = "Experiment"
COL_NAME_MSE = "Mean MSE"
COL_NAME_RMSE = "Std Deviation MSE"

# This dataframe contains three columns: 
# name_of_experiments, mse, rmse
header_of_df_mse_and_rmse = [COL_NAME_EXPERIMENT, COL_NAME_MSE, COL_NAME_RMSE]
df_mse_and_rmse = pd.DataFrame(columns=header_of_df_mse_and_rmse, data=[])


def get_round(score, num_of_digits=2):
    """Get round with given number of decimal digits 
    """
    return round(score, num_of_digits)


def get_mean(list_of_mse_scores):
    """Get mean
    """
    if list_of_mse_scores:
        return get_round(np.mean(list_of_mse_scores))
    return None

def get_standard_deviation(list_of_mse_scores):
    """Get standard deviation
    """
    if list_of_mse_scores:
        return get_round(np.std(list_of_mse_scores))
    return None


def build_model_with_one_hidden_layer(num_of_features=3):
    """ Building baseline model that contains:

    + One hidden layer of 10 nodes, and a ReLU activation function.
    + Use the adam optimizer and the mean squared error as the loss function.
    """    
    # Create model
    model = Sequential()

    model.add(Dense(10, activation="relu", input_shape=(num_of_features,)))
    model.add(Dense(1))

    # Compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model


def build_model_with_three_hidden_layers(num_of_features=3):
    """ Building model that contains:
    
     + Three hidden layers, each of 10 nodes and ReLU activation function.    
    + Use the adam optimizer and the mean squared error as the loss function.
    """
    
    # Create model
    model = Sequential()

    model.add(Dense(10, activation="relu", input_shape=(num_of_features,)))
    model.add(Dense(10, activation="relu"))
    model.add(Dense(10, activation="relu"))
    model.add(Dense(1))

    # Compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model


def get_mean_squared_error(compiled_model, X, y, epochs=50, verbose=1):
    """Get report (dataframe) of two metrics: 
    The mean and the standard deviation of the mean squared errors
    """   
    
    # 1. Randomly split the data into a training and test sets by holding 30% 
    # of the data for testing. You can use the train_test_split helper function 
    # from Scikit-learn.    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=24)   
    print("Training set: ", X_train.shape, y_train.shape)
    print("Testing set: ", X_test.shape, y_test.shape)
    
    
    # 2. Train the model on the training data using 50 epochs.
    # Note that: given model which is compiled
    # Fit the built model with training set
    model.fit(X_train, y_train, epochs=epochs, verbose=verbose) 
    # 3. Evaluate the model on the test data and compute the mean squared error 
    # between the predicted concrete strength and the actual concrete strength. 
    # You can use the mean_squared_error function from Scikit-learn.    
    y_hat = model.predict(X_test)    
    mse = mean_squared_error(y_test, y_hat)
    
    # Return the mean squared error
    return mse


def get_mean_and_std_of_mse(df_X, 
                            df_y, 
                            compiled_model,                
                            max_iteration=50, 
                            epochs=50, 
                            verbose=0):
    """Generate the mean and the standard deviation of the mean squared errors 
    """
    # Repeat steps 1 - 3, 50 times, i.e., create a list of 50 mean squared errors.    
    list_of_mean_squared_errors = []
    for i in range(max_iteration):
        start_time = time.time()
        print("-" * 36)
        print("Processing current number of iteration : {}".format(i+1))        
        mse = get_mean_squared_error(compiled_model, df_X, df_y, epochs=epochs, verbose=verbose)
        list_of_mean_squared_errors.append(mse)
        print("Duration (seconds): {}".format(time.time()-start_time))
    # end for

    print("Finished - {} times.\nAnd the list of mean squared errors : {}".format(max_iteration,
                                                                              
                                                                              list_of_mean_squared_errors))

    mean_mse = get_mean(list_of_mean_squared_errors)
    std_mse = get_standard_deviation(list_of_mean_squared_errors)

    print("-" * 72)
    print("The mean and the standard deviation of the mean squared errors are: {} and {}, respectively".format(
           mean_mse, std_mse))
    
    return mean_mse, std_mse


def get_report(name_of_experiment, mean_mse, std_mse):
    """Get report (dataframe) of two metrics: 
    The mean and the standard deviation of the mean squared errors
    """
    values = [[name_of_experiment, mean_mse, std_mse]]

    return pd.DataFrame(columns=header_of_df_mse_and_rmse, data=values)
    

# Loading input corpus

Let's assign the path of input corpus. Because we re-use after dowloading the input corpus.

In [41]:
url = 'https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv'

In [42]:
df = pd.read_csv(url)

Let's read input data into a dataframe

In [43]:
df.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [48]:
df.columns

Index(['Cement', 'Blast Furnace Slag', 'Fly Ash', 'Water', 'Superplasticizer',
       'Coarse Aggregate', 'Fine Aggregate', 'Age', 'Strength'],
      dtype='object')

In [49]:
df.describe()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,35.817961
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,16.705742
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,2.33
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,23.71
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,34.445
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,46.135
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,82.6


In [50]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1030 entries, 0 to 1029
Data columns (total 9 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Cement              1030 non-null   float64
 1   Blast Furnace Slag  1030 non-null   float64
 2   Fly Ash             1030 non-null   float64
 3   Water               1030 non-null   float64
 4   Superplasticizer    1030 non-null   float64
 5   Coarse Aggregate    1030 non-null   float64
 6   Fine Aggregate      1030 non-null   float64
 7   Age                 1030 non-null   int64  
 8   Strength            1030 non-null   float64
dtypes: float64(8), int64(1)
memory usage: 72.5 KB


In [51]:
df.head(3)

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27


So the first concrete sample has "540" cubic meter of cement, "0" cubic meter of blast furnace slag, "0" cubic meter of fly ash, "162" cubic meter of water, "2.5" cubic meter of superplaticizer, "1040" cubic meter of coarse aggregate, "676" cubic meter of fine aggregate. Such a concrete mix which is "28" days old, has a compressive strength of "79.99" MPa.

In [52]:
print("(row, column) = {}".format(df.shape))

(row, column) = (1030, 9)


So, there are approximately 1000 samples to train our model on when splitting with 30% for the data of testing.

Let's check the data for any missing value

In [54]:
df.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

As you see, the above input corpus look pretty good to train the model. However, we could use the normalization technique to normalize it.

# Normalizing input data

In [56]:
list_of_column_names = df.columns
list_of_column_names

Index(['Cement', 'Blast Furnace Slag', 'Fly Ash', 'Water', 'Superplasticizer',
       'Coarse Aggregate', 'Fine Aggregate', 'Age', 'Strength'],
      dtype='object')

## Splitting into predictors and target

Filtering the list of column names of dataframe predictors

In [57]:
list_of_col_names_predictors = [x for x in list_of_column_names 
                                if x != COL_NAME_STRENGTH]

In [58]:
list_of_col_names_predictors

['Cement',
 'Blast Furnace Slag',
 'Fly Ash',
 'Water',
 'Superplasticizer',
 'Coarse Aggregate',
 'Fine Aggregate',
 'Age']

In [59]:
df_predictors = df[list_of_col_names_predictors]

In [60]:
df_target = df[[COL_NAME_STRENGTH]]

Reviewing the data in two dataframes: predictors and target

In [61]:
df_predictors.head(3)

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270


In [62]:
df_target.head(3)

Unnamed: 0,Strength
0,79.99
1,61.89
2,40.27


# Applying normalization method

Recall that one way to normalize the data is by subtracting the mean from the individual predictors and dividing by the standard deviation.

In [64]:
df_predictors_norm = (df_predictors - df_predictors.mean())/df_predictors.std()

In [65]:
df_predictors_norm.head(3)

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134


# A - Experiment with a baseline model

## Building and Training with the baseline model

In [66]:
num_of_features = len(df.columns) - 1
print("Number of features for input layer : ", num_of_features)

Number of features for input layer :  8


In [67]:
max_iteration = 50
epochs = 50
verbose = 0

# Get the compiled model
model = build_model_with_one_hidden_layer(num_of_features=num_of_features)

mean_mse, std_mse = get_mean_and_std_of_mse(df_predictors, 
                                            df_target, 
                                            model, 
                                            max_iteration=max_iteration, 
                                            epochs=epochs, verbose=verbose)

------------------------------------
Processing current number of iteration : 1
Training set:  (721, 8) (721, 1)
Testing set:  (309, 8) (309, 1)
Duration (seconds): 12.028974533081055
------------------------------------
Processing current number of iteration : 2
Training set:  (721, 8) (721, 1)
Testing set:  (309, 8) (309, 1)
Duration (seconds): 5.534287214279175
------------------------------------
Processing current number of iteration : 3
Training set:  (721, 8) (721, 1)
Testing set:  (309, 8) (309, 1)
Duration (seconds): 5.583523273468018
------------------------------------
Processing current number of iteration : 4
Training set:  (721, 8) (721, 1)
Testing set:  (309, 8) (309, 1)
Duration (seconds): 5.706511497497559
------------------------------------
Processing current number of iteration : 5
Training set:  (721, 8) (721, 1)
Testing set:  (309, 8) (309, 1)
Duration (seconds): 5.510452508926392
------------------------------------
Processing current number of iteration : 6
Trai

## Report the mean and the standard deviation of the mean squared errors

In [68]:
name_of_experiment = "Baseline-Raw (50 epochs)"

# Report the mean and the standard deviation of the mean squared errors
df_result_baseline = get_report(name_of_experiment, mean_mse, std_mse)
df_result_baseline

Unnamed: 0,Experiment,Mean MSE,Std Deviation MSE
0,Baseline-Raw (50 epochs),60.9,31.15


In [69]:
# Concat baseline dataframe into result
df_mse_and_rmse = pd.concat([df_mse_and_rmse, df_result_baseline], axis=0)

# Review the result dataframe
df_mse_and_rmse.reset_index(drop=True)

Unnamed: 0,Experiment,Mean MSE,Std Deviation MSE
0,Baseline-Raw (50 epochs),60.9,31.15


# B - Experiment with Normalized Data

Repeat Part A but use a normalized version of the data. Recall that one way to normalize the data is by subtracting the mean from the individual predictors and dividing by the standard deviation.

How does the mean of the mean squared errors compare to that from Step A?

## Normalize the data

by substracting the mean and dividing by the standard deviation.

## Before normalization

In [70]:
df_predictors.head(3)

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270


# After normalization

In [71]:
df_predictors_norm.head(3)

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134


## Building and Training with the baseline model after normalizing the data with 50 epochs

In [72]:
max_iteration = 50
epochs = 50
verbose = 0

# Get the compiled model
model = build_model_with_one_hidden_layer(num_of_features=num_of_features)

mean_mse, std_mse = get_mean_and_std_of_mse(df_predictors_norm, 
                                            df_target, 
                                            model, 
                                            max_iteration=max_iteration, 
                                            epochs=epochs, verbose=verbose)

------------------------------------
Processing current number of iteration : 1
Training set:  (721, 8) (721, 1)
Testing set:  (309, 8) (309, 1)
Duration (seconds): 8.05424427986145
------------------------------------
Processing current number of iteration : 2
Training set:  (721, 8) (721, 1)
Testing set:  (309, 8) (309, 1)
Duration (seconds): 5.512316942214966
------------------------------------
Processing current number of iteration : 3
Training set:  (721, 8) (721, 1)
Testing set:  (309, 8) (309, 1)
Duration (seconds): 5.534383773803711
------------------------------------
Processing current number of iteration : 4
Training set:  (721, 8) (721, 1)
Testing set:  (309, 8) (309, 1)
Duration (seconds): 5.5043625831604
------------------------------------
Processing current number of iteration : 5
Training set:  (721, 8) (721, 1)
Testing set:  (309, 8) (309, 1)
Duration (seconds): 5.4938740730285645
------------------------------------
Processing current number of iteration : 6
Trainin

### Report the mean and the standard deviation of the mean squared errors

In [73]:
name_of_experiment = "Normalized-1 Hidden Layers(50 epochs)"

# Report the mean and the standard deviation of the mean squared errors
df_result_baseline = get_report(name_of_experiment, mean_mse, std_mse)
df_result_baseline

Unnamed: 0,Experiment,Mean MSE,Std Deviation MSE
0,Normalized-1 Hidden Layers(50 epochs),48.24,36.87


In [74]:
# Concat baseline dataframe into result
df_mse_and_rmse = pd.concat([df_mse_and_rmse, df_result_baseline], axis=0)

# Review the result dataframe
df_mse_and_rmse.reset_index(drop=True)

Unnamed: 0,Experiment,Mean MSE,Std Deviation MSE
0,Baseline-Raw (50 epochs),60.9,31.15
1,Normalized-1 Hidden Layers(50 epochs),48.24,36.87


# C. Increate the number of epochs

Repeat Part B but use 100 epochs this time for training.

How does the mean of the mean squared errors compare to that from Step B?

# Building and Training with the baseline model after normalizing the data with 100 epochs

In [75]:
max_iteration = 50
epochs = 100
verbose = 0

# Get the compiled model
model = build_model_with_one_hidden_layer(num_of_features=num_of_features)

mean_mse, std_mse = get_mean_and_std_of_mse(df_predictors_norm, 
                                            df_target, 
                                            model, 
                                            max_iteration=max_iteration, 
                                            epochs=epochs, verbose=verbose)

------------------------------------
Processing current number of iteration : 1
Training set:  (721, 8) (721, 1)
Testing set:  (309, 8) (309, 1)
Duration (seconds): 11.011007308959961
------------------------------------
Processing current number of iteration : 2
Training set:  (721, 8) (721, 1)
Testing set:  (309, 8) (309, 1)
Duration (seconds): 9.673621416091919
------------------------------------
Processing current number of iteration : 3
Training set:  (721, 8) (721, 1)
Testing set:  (309, 8) (309, 1)
Duration (seconds): 10.909236192703247
------------------------------------
Processing current number of iteration : 4
Training set:  (721, 8) (721, 1)
Testing set:  (309, 8) (309, 1)
Duration (seconds): 10.72933578491211
------------------------------------
Processing current number of iteration : 5
Training set:  (721, 8) (721, 1)
Testing set:  (309, 8) (309, 1)
Duration (seconds): 13.2069833278656
------------------------------------
Processing current number of iteration : 6
Trai

## Report the mean and the standard deviation of the mean squared errors

In [76]:
name_of_experiment = "Normalized-1 Hidden Layers(100 epochs)"

# Report the mean and the standard deviation of the mean squared errors
df_result_baseline = get_report(name_of_experiment, mean_mse, std_mse)
df_result_baseline

Unnamed: 0,Experiment,Mean MSE,Std Deviation MSE
0,Normalized-1 Hidden Layers(100 epochs),49.96,15.96


In [77]:
# Concat baseline dataframe into result
df_mse_and_rmse = pd.concat([df_mse_and_rmse, df_result_baseline], axis=0)

# Review the result dataframe
df_mse_and_rmse.reset_index(drop=True)

Unnamed: 0,Experiment,Mean MSE,Std Deviation MSE
0,Baseline-Raw (50 epochs),60.9,31.15
1,Normalized-1 Hidden Layers(50 epochs),48.24,36.87
2,Normalized-1 Hidden Layers(100 epochs),49.96,15.96


# D. Increase the number of hidden layers

Repeat part B but use a neural network with the following instead:

Three hidden layers, each of 10 nodes and ReLU activation function.
How does the mean of the mean squared errors compare to that from Step B?

## Building and Training with the model after normalizing the data with 50 epochs

In [78]:
max_iteration = 50
epochs = 50
verbose = 0

# Get the compiled model
model = build_model_with_three_hidden_layers(num_of_features=num_of_features)

mean_mse, std_mse = get_mean_and_std_of_mse(df_predictors_norm, 
                                            df_target, 
                                            model, 
                                            max_iteration=max_iteration, 
                                            epochs=epochs, 
                                            verbose=verbose)

------------------------------------
Processing current number of iteration : 1
Training set:  (721, 8) (721, 1)
Testing set:  (309, 8) (309, 1)
Duration (seconds): 11.61649227142334
------------------------------------
Processing current number of iteration : 2
Training set:  (721, 8) (721, 1)
Testing set:  (309, 8) (309, 1)
Duration (seconds): 7.670273542404175
------------------------------------
Processing current number of iteration : 3
Training set:  (721, 8) (721, 1)
Testing set:  (309, 8) (309, 1)
Duration (seconds): 7.561892509460449
------------------------------------
Processing current number of iteration : 4
Training set:  (721, 8) (721, 1)
Testing set:  (309, 8) (309, 1)
Duration (seconds): 8.30018663406372
------------------------------------
Processing current number of iteration : 5
Training set:  (721, 8) (721, 1)
Testing set:  (309, 8) (309, 1)
Duration (seconds): 8.256969213485718
------------------------------------
Processing current number of iteration : 6
Traini

## Report the mean and the standard deviation of the mean squared errors

In [79]:
name_of_experiment = "Normalized-3 Hidden Layers(50 epochs)"

# Report the mean and the standard deviation of the mean squared errors
df_result_baseline = get_report(name_of_experiment, mean_mse, std_mse)
df_result_baseline

Unnamed: 0,Experiment,Mean MSE,Std Deviation MSE
0,Normalized-3 Hidden Layers(50 epochs),39.88,12.73


In [80]:
# Concat baseline dataframe into result
df_mse_and_rmse = pd.concat([df_mse_and_rmse, df_result_baseline], axis=0)

# Review the result dataframe
df_mse_and_rmse.reset_index(drop=True)

Unnamed: 0,Experiment,Mean MSE,Std Deviation MSE
0,Baseline-Raw (50 epochs),60.9,31.15
1,Normalized-1 Hidden Layers(50 epochs),48.24,36.87
2,Normalized-1 Hidden Layers(100 epochs),49.96,15.96
3,Normalized-3 Hidden Layers(50 epochs),39.88,12.73


# Discussion

As you see, the mean squared error (MSE) tells us how close a regression model is to our testing set. And the standard deviation of residuals is used to estimate the disagreement between a set of data and a linear regression model.

Thus, according to the mean squared error, the smaller score, the closer we are finding the regression line of best fit.

Indeed, the model (D-Normalized-3 Hidden Layers(50 epochs)), which is trained with three hidden layers, each of 10 nodes and ReLU activation function, is the best one. Because its mean of the mean squared errors is 34.17. Moreover, its error is lower than about 16 and 17 when comparing with the trained model applying one hidden layer with 50 epochs and 100 epochs.

Also, when comparing to mean of the MSEs of baseline model, the MSE of model (D) is lower, about 35.

However, it is interesting that the mean of the MSEs of baseline model (B-Normalized-1 Hidden Layers(50 epochs)) is lower about 1 than the mean the MSEs of model (C-Normalized-1 Hidden Layers(100 epochs)) which is trained by the normalized data and the same configuration of model, but model (C) did 100 epochs.

In conclusion, in order to get the better result, we could apply several techniques to tune the model such as normalizing the input data, improving the number of epochs or the number of hidden layers.