# Regression modelling with Keras

### Load data

In [1]:
!pip install numpy
!pip install pandas
!pip install keras
!pip install tensorflow
!pip install sklearn



In [17]:
import pandas as pd
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Input
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import root_mean_squared_error

import warnings
warnings.simplefilter('ignore', FutureWarning)

In [31]:
concrete_data = pd.read_csv('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv')
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [32]:
concrete_data.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

In [33]:
#split data into predictors and target
concrete_data_columns = concrete_data.columns

predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']]
target = concrete_data['Strength']

In [None]:
n_cols = predictors.shape[1]

### A. Build a baseline model (5 marks)

In [34]:
# define regression model
def regression_model():
    # create model
    model = Sequential()
    model.add(Input(shape=(n_cols,)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [35]:
#create list for mse values
mse_list = []

for i in range(1, 51):
    #splitting the data
    X_train, X_test, Y_train, Y_test = train_test_split(predictors, target, test_size=0.3)

    #build the model
    modelA = regression_model()

    #fit the model
    modelA.fit(X_train, Y_train, epochs=50, verbose=2)

    #make prediction
    Y_pred = modelA.predict(X_test)
    print('Y_pred: ', Y_pred)

    #evaluate the model
    mse = root_mean_squared_error(Y_test, Y_pred)
    mse_list.append(mse)

    #print successful end of iteration
    print('Iteration number {} has been completed.'.format(i))

mse_list = np.array(mse_list)

std_of_mse = np.std(mse_list)
mean_of_mse = np.mean(mse_list)

#print data
print('Model without normalization of data after 50 iterations of 50 epochs for each')
print('Length of MSE list: {}'.format(len(mse_list)))
print('Std of MSE: {:.2f}'.format(std_of_mse))
print('Mean of MSE: {:.2f}'.format(mean_of_mse))

Epoch 1/50
23/23 - 1s - 57ms/step - loss: 99413.5156
Epoch 2/50
23/23 - 0s - 3ms/step - loss: 67755.9297
Epoch 3/50
23/23 - 0s - 3ms/step - loss: 40085.3086
Epoch 4/50
23/23 - 0s - 2ms/step - loss: 21163.2754
Epoch 5/50
23/23 - 0s - 2ms/step - loss: 10088.1953
Epoch 6/50
23/23 - 0s - 3ms/step - loss: 4491.8379
Epoch 7/50
23/23 - 0s - 3ms/step - loss: 2085.9634
Epoch 8/50
23/23 - 0s - 5ms/step - loss: 1269.8187
Epoch 9/50
23/23 - 0s - 2ms/step - loss: 1031.5234
Epoch 10/50
23/23 - 0s - 3ms/step - loss: 979.4182
Epoch 11/50
23/23 - 0s - 2ms/step - loss: 959.9202
Epoch 12/50
23/23 - 0s - 3ms/step - loss: 948.8353
Epoch 13/50
23/23 - 0s - 2ms/step - loss: 936.4012
Epoch 14/50
23/23 - 0s - 3ms/step - loss: 924.9146
Epoch 15/50
23/23 - 0s - 2ms/step - loss: 912.2775
Epoch 16/50
23/23 - 0s - 2ms/step - loss: 900.2800
Epoch 17/50
23/23 - 0s - 3ms/step - loss: 887.6489
Epoch 18/50
23/23 - 0s - 3ms/step - loss: 875.5175
Epoch 19/50
23/23 - 0s - 3ms/step - loss: 864.5936
Epoch 20/50
23/23 - 0s - 

### B. Normalize the data (5 marks)

In [36]:
#normalize predictors data
predictors_norm = (predictors - predictors.mean()) / predictors.std()
predictors_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


In [38]:
print(predictors_norm.shape)
print(target.shape)

(1030, 8)
(1030,)


In [39]:
#create list for mse values
mse_list = []

for i in range(1, 51):
    #splitting the data
    X_train, X_test, Y_train, Y_test = train_test_split(predictors_norm, target, test_size=0.3) #use predictors_norm

    #build the model
    modelA = regression_model()

    #fit the model
    modelA.fit(X_train, Y_train, epochs=50, verbose=2)

    #make prediction
    Y_pred = modelA.predict(X_test)
    print('Y_pred: ', Y_pred)

    #evaluate the model
    mse = root_mean_squared_error(Y_test, Y_pred)
    mse_list.append(mse)

    #print successful end of iteration
    print('Iteration number {} has been completed.'.format(i))

mse_list = np.array(mse_list)

std_of_mse = np.std(mse_list)
mean_of_mse = np.mean(mse_list)

#print data
print('Model with normalization of data after 50 iterations of 50 epochs for each')
print('Length of MSE list: {}'.format(len(mse_list)))
print('Std of MSE: {:.2f}'.format(std_of_mse))
print('Mean of MSE: {:.2f}'.format(mean_of_mse))

Epoch 1/50
23/23 - 3s - 115ms/step - loss: 1630.1085
Epoch 2/50
23/23 - 0s - 3ms/step - loss: 1611.4648
Epoch 3/50
23/23 - 0s - 3ms/step - loss: 1593.1276
Epoch 4/50
23/23 - 0s - 3ms/step - loss: 1575.6387
Epoch 5/50
23/23 - 0s - 3ms/step - loss: 1558.1033
Epoch 6/50
23/23 - 0s - 3ms/step - loss: 1540.7900
Epoch 7/50
23/23 - 0s - 3ms/step - loss: 1523.3582
Epoch 8/50
23/23 - 0s - 2ms/step - loss: 1505.8903
Epoch 9/50
23/23 - 0s - 3ms/step - loss: 1488.0581
Epoch 10/50
23/23 - 0s - 3ms/step - loss: 1469.7026
Epoch 11/50
23/23 - 0s - 3ms/step - loss: 1451.3727
Epoch 12/50
23/23 - 0s - 3ms/step - loss: 1432.1534
Epoch 13/50
23/23 - 0s - 2ms/step - loss: 1412.6385
Epoch 14/50
23/23 - 0s - 3ms/step - loss: 1392.3943
Epoch 15/50
23/23 - 0s - 2ms/step - loss: 1371.4858
Epoch 16/50
23/23 - 0s - 3ms/step - loss: 1350.0288
Epoch 17/50
23/23 - 0s - 2ms/step - loss: 1328.3792
Epoch 18/50
23/23 - 0s - 2ms/step - loss: 1305.3970
Epoch 19/50
23/23 - 0s - 3ms/step - loss: 1282.5510
Epoch 20/50
23/23 -

### C. Increate the number of epochs (5 marks)

In [40]:
#create list for mse values
mse_list = []

for i in range(1, 51):
    #splitting the data
    X_train, X_test, Y_train, Y_test = train_test_split(predictors_norm, target, test_size=0.3) #use predictors_norm

    #build the model
    modelA = regression_model()

    #fit the model
    modelA.fit(X_train, Y_train, epochs=100, verbose=2)

    #make prediction
    Y_pred = modelA.predict(X_test)
    print('Y_pred: ', Y_pred)

    #evaluate the model
    mse = root_mean_squared_error(Y_test, Y_pred)
    mse_list.append(mse)

    #print successful end of iteration
    print('Iteration number {} has been completed.'.format(i))

mse_list = np.array(mse_list)

std_of_mse = np.std(mse_list)
mean_of_mse = np.mean(mse_list)

#print data
print('Model with normalization of data after 50 iterations of 100 epochs for each')
print('Length of MSE list: {}'.format(len(mse_list)))
print('Std of MSE: {:.2f}'.format(std_of_mse))
print('Mean of MSE: {:.2f}'.format(mean_of_mse))

Epoch 1/100
23/23 - 2s - 72ms/step - loss: 1588.5750
Epoch 2/100
23/23 - 0s - 4ms/step - loss: 1571.2708
Epoch 3/100
23/23 - 0s - 3ms/step - loss: 1554.2478
Epoch 4/100
23/23 - 0s - 4ms/step - loss: 1537.0444
Epoch 5/100
23/23 - 0s - 3ms/step - loss: 1519.9403
Epoch 6/100
23/23 - 0s - 4ms/step - loss: 1502.5659
Epoch 7/100
23/23 - 0s - 3ms/step - loss: 1485.0950
Epoch 8/100
23/23 - 0s - 3ms/step - loss: 1467.1216
Epoch 9/100
23/23 - 0s - 3ms/step - loss: 1448.7161
Epoch 10/100
23/23 - 0s - 3ms/step - loss: 1429.6111
Epoch 11/100
23/23 - 0s - 3ms/step - loss: 1410.2468
Epoch 12/100
23/23 - 0s - 3ms/step - loss: 1390.0996
Epoch 13/100
23/23 - 0s - 3ms/step - loss: 1369.3260
Epoch 14/100
23/23 - 0s - 3ms/step - loss: 1347.9296
Epoch 15/100
23/23 - 0s - 3ms/step - loss: 1326.0791
Epoch 16/100
23/23 - 0s - 3ms/step - loss: 1303.4011
Epoch 17/100
23/23 - 0s - 3ms/step - loss: 1279.8854
Epoch 18/100
23/23 - 0s - 3ms/step - loss: 1255.7335
Epoch 19/100
23/23 - 0s - 3ms/step - loss: 1231.3640
E

### D. Increase the number of hidden layers (5 marks)

In [41]:
# define regression model
def regression_model_D():
    # create model
    model = Sequential()
    model.add(Input(shape=(n_cols,)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [42]:
#create list for mse values
mse_list = []

for i in range(1, 51):
    #splitting the data
    X_train, X_test, Y_train, Y_test = train_test_split(predictors_norm, target, test_size=0.3) #use predictors_norm

    #build the model
    modelA = regression_model_D()

    #fit the model
    modelA.fit(X_train, Y_train, epochs=50, verbose=2)

    #make prediction
    Y_pred = modelA.predict(X_test)
    print('Y_pred: ', Y_pred)

    #evaluate the model
    mse = root_mean_squared_error(Y_test, Y_pred)
    mse_list.append(mse)

    #print successful end of iteration
    print('Iteration number {} has been completed.'.format(i))

mse_list = np.array(mse_list)

std_of_mse = np.std(mse_list)
mean_of_mse = np.mean(mse_list)

#print data
print('Model with normalization of data after 50 iterations of 50 epochs for each and 3 hidden layers')
print('Length of MSE list: {}'.format(len(mse_list)))
print('Std of MSE: {:.2f}'.format(std_of_mse))
print('Mean of MSE: {:.2f}'.format(mean_of_mse))

Epoch 1/50
23/23 - 2s - 104ms/step - loss: 1552.6335
Epoch 2/50
23/23 - 0s - 3ms/step - loss: 1520.7571
Epoch 3/50
23/23 - 0s - 3ms/step - loss: 1474.1007
Epoch 4/50
23/23 - 0s - 3ms/step - loss: 1404.1620
Epoch 5/50
23/23 - 0s - 3ms/step - loss: 1304.7783
Epoch 6/50
23/23 - 0s - 3ms/step - loss: 1173.0258
Epoch 7/50
23/23 - 0s - 3ms/step - loss: 1002.8491
Epoch 8/50
23/23 - 0s - 3ms/step - loss: 812.7543
Epoch 9/50
23/23 - 0s - 3ms/step - loss: 615.1315
Epoch 10/50
23/23 - 0s - 3ms/step - loss: 453.0013
Epoch 11/50
23/23 - 0s - 3ms/step - loss: 338.0174
Epoch 12/50
23/23 - 0s - 3ms/step - loss: 275.2330
Epoch 13/50
23/23 - 0s - 3ms/step - loss: 242.9730
Epoch 14/50
23/23 - 0s - 3ms/step - loss: 223.8299
Epoch 15/50
23/23 - 0s - 3ms/step - loss: 211.0849
Epoch 16/50
23/23 - 0s - 3ms/step - loss: 201.0006
Epoch 17/50
23/23 - 0s - 2ms/step - loss: 193.2718
Epoch 18/50
23/23 - 0s - 2ms/step - loss: 186.3120
Epoch 19/50
23/23 - 0s - 2ms/step - loss: 180.7222
Epoch 20/50
23/23 - 0s - 3ms/st

### Summary

#### Part A

Model without normalization of data after 50 iterations of 50 epochs for each
<br>
Length of MSE list: 50
<br>
Std of MSE: 8.00
<br>
Mean of MSE: 17.15
<br>

#### Part B

Model with normalization of data after 50 iterations of 50 epochs for each
<br>
Length of MSE list: 50
<br>
Std of MSE: 1.98
<br>
Mean of MSE: 18.68
<br>

#### How does the mean of the mean squared errors compare to that from Step A?
Std of MSE for model B is lower than model A. Mean of MSE's is roughly the same.
<br>

#### Part C

Model with normalization of data after 50 iterations of 100 epochs for each
<br>
Length of MSE list: 50
<br>
Std of MSE: 0.53
<br>
Mean of MSE: 12.81
<br>

#### How does the mean of the mean squared errors compare to that from Step B?
Std of MSE for model C is lower than model B. Mean of MSE's for model C is lower than model B as well.
<br>

#### Part D

Model with normalization of data after 50 iterations of 50 epochs for each and 3 hidden layers
<br>
Length of MSE list: 50
<br>
Std of MSE: 0.59
<br>
Mean of MSE: 11.30
<br>

#### How does the mean of the mean squared errors compare to that from Step B?
Std of MSE for model D is lower than model B. Mean of MSE's for model D is lower than model B as well.
<br>