In [24]:
import pandas as pd 
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split

### Load the Dataset

In [5]:
concrete_data = pd.read_csv('concrete_data.csv')
concrete_data.head(10)

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3
5,266.0,114.0,0.0,228.0,0.0,932.0,670.0,90,47.03
6,380.0,95.0,0.0,228.0,0.0,932.0,594.0,365,43.7
7,380.0,95.0,0.0,228.0,0.0,932.0,594.0,28,36.45
8,266.0,114.0,0.0,228.0,0.0,932.0,670.0,28,45.85
9,475.0,0.0,0.0,228.0,0.0,932.0,594.0,28,39.29


In [6]:
# show the data shape
concrete_data.shape

(1030, 9)

In [7]:
# show the summaries of data
concrete_data.describe()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,35.817961
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,16.705742
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,2.33
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,23.71
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,34.445
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,46.135
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,82.6


In [None]:
# check null values of each columns
concrete_data.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

In [12]:
# Divide the data into predictors/features and target 
predictors = concrete_data[concrete_data.columns[concrete_data.columns != 'Strength']]
target = concrete_data['Strength']

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


In [33]:
# get the input shape for Neural Networks
n_cols = predictors.shape[1]
print(n_cols)

8


# A. Build a Baseline Model 

In [37]:
# create and compile Neural Network model with 1 hidden layer of 10 nodes and ReLu activation
def regression_model_1():
    model = Sequential()
    model.add(Dense(10, activation = 'relu', input_shape=(n_cols,)))
    model.add(Dense(1))

    model.compile(optimizer='adam', loss='mean_squared_error')
    return model 

In [25]:
# Split the model into training and test data 
X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size= 0.3,shuffle=True, random_state=35)

In [53]:
mse_test = []
model_1 = regression_model_1()
# train the model using 50 epochs. Evaluate it's mse and store it on list
for i in range(50):
    # fit the model
    model_1.fit(X_train, y_train, epochs=50, verbose=2)

    mse_test.append(model_1.evaluate(X_test, y_test, verbose=0))

Epoch 1/50
23/23 - 0s - loss: 7854.4526 - 492ms/epoch - 21ms/step
Epoch 2/50
23/23 - 0s - loss: 2060.6052 - 30ms/epoch - 1ms/step
Epoch 3/50
23/23 - 0s - loss: 726.7102 - 14ms/epoch - 629us/step
Epoch 4/50
23/23 - 0s - loss: 524.4814 - 24ms/epoch - 1ms/step
Epoch 5/50
23/23 - 0s - loss: 444.7377 - 23ms/epoch - 992us/step
Epoch 6/50
23/23 - 0s - loss: 408.4376 - 33ms/epoch - 1ms/step
Epoch 7/50
23/23 - 0s - loss: 391.3077 - 35ms/epoch - 2ms/step
Epoch 8/50
23/23 - 0s - loss: 373.6928 - 31ms/epoch - 1ms/step
Epoch 9/50
23/23 - 0s - loss: 357.0819 - 35ms/epoch - 2ms/step
Epoch 10/50
23/23 - 0s - loss: 335.4656 - 46ms/epoch - 2ms/step
Epoch 11/50
23/23 - 0s - loss: 313.8058 - 46ms/epoch - 2ms/step
Epoch 12/50
23/23 - 0s - loss: 294.8991 - 32ms/epoch - 1ms/step
Epoch 13/50
23/23 - 0s - loss: 279.3205 - 35ms/epoch - 2ms/step
Epoch 14/50
23/23 - 0s - loss: 265.8289 - 33ms/epoch - 1ms/step
Epoch 15/50
23/23 - 0s - loss: 253.9833 - 33ms/epoch - 1ms/step
Epoch 16/50
23/23 - 0s - loss: 247.7630 -

In [56]:
# print the MSE values
print(f'Length of the Mean Squared Error on Testing Data: {len(mse_test)}')
print(f'Mean Squared Error on Testing Data: {mse_test}')

Length of the Mean Squared Error on Testing Data: 50
Mean Squared Error on Testing Data: [166.815673828125, 133.4031219482422, 129.51123046875, 130.17295837402344, 126.93428802490234, 125.98748779296875, 127.03550720214844, 126.26776123046875, 131.8694305419922, 128.63973999023438, 130.61331176757812, 126.93844604492188, 126.12234497070312, 125.5816421508789, 125.64351654052734, 125.5307846069336, 128.03533935546875, 125.53284454345703, 127.48933410644531, 126.06848907470703, 132.87474060058594, 129.180908203125, 128.40170288085938, 126.18888092041016, 125.54662322998047, 125.74403381347656, 126.49089813232422, 130.65826416015625, 125.88864135742188, 125.52574157714844, 126.34455871582031, 128.96788024902344, 141.51419067382812, 125.7817153930664, 126.41471862792969, 126.5291748046875, 125.92900848388672, 125.85660552978516, 127.38487243652344, 126.81639099121094, 125.86624145507812, 139.1544189453125, 126.512451171875, 130.18992614746094, 126.40203094482422, 127.52295684814453, 126.22

In [57]:
# function to calculate mean and std
def calculate_stats_pandas(values):
    series = pd.Series(values)
    mean = series.mean()
    std_dev = series.std()
    return mean, std_dev

In [58]:
mean, std_dev = calculate_stats_pandas(mse_test)
print(f'Mean: {mean}, Standard Deviation: {std_dev}')

Mean: 128.76924819946288, Standard Deviation: 6.368793029445932


### B. Normalize the Data 

In [59]:
#normalize the data 
predictors_norm = (predictors - predictors.mean()) / predictors.std()
predictors_norm.head(10)

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069
5,-0.145138,0.464818,-0.846733,2.174405,-1.038638,-0.526262,-1.291914,0.701883
6,0.945704,0.244603,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
7,0.945704,0.244603,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,-0.279597
8,-0.145138,0.464818,-0.846733,2.174405,-1.038638,-0.526262,-1.291914,-0.279597
9,1.85474,-0.856472,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,-0.279597


In [60]:
# Split the normalized data into training and testing
X_train_norm, X_test_norm, y_train, y_test = train_test_split(predictors_norm, target, test_size= 0.3,shuffle=True, random_state=35)

In [61]:
mse_test_norm = []
model_1 = regression_model_1()
# train the model using 50 epochs. Evaluate it's mse and store it on list
for i in range(50):
    # fit the model
    model_1.fit(X_train_norm, y_train, epochs=50, verbose=2)

    mse_test_norm.append(model_1.evaluate(X_test_norm, y_test, verbose=0))

Epoch 1/50
23/23 - 0s - loss: 1579.8844 - 436ms/epoch - 19ms/step
Epoch 2/50
23/23 - 0s - loss: 1563.8160 - 33ms/epoch - 1ms/step
Epoch 3/50
23/23 - 0s - loss: 1548.3138 - 34ms/epoch - 1ms/step
Epoch 4/50
23/23 - 0s - loss: 1533.1370 - 34ms/epoch - 1ms/step
Epoch 5/50
23/23 - 0s - loss: 1518.0408 - 33ms/epoch - 1ms/step
Epoch 6/50
23/23 - 0s - loss: 1502.9729 - 27ms/epoch - 1ms/step
Epoch 7/50
23/23 - 0s - loss: 1487.8984 - 33ms/epoch - 1ms/step
Epoch 8/50
23/23 - 0s - loss: 1472.4619 - 37ms/epoch - 2ms/step
Epoch 9/50
23/23 - 0s - loss: 1456.7521 - 29ms/epoch - 1ms/step
Epoch 10/50
23/23 - 0s - loss: 1440.7041 - 34ms/epoch - 1ms/step
Epoch 11/50
23/23 - 0s - loss: 1424.0223 - 34ms/epoch - 1ms/step
Epoch 12/50
23/23 - 0s - loss: 1406.5713 - 32ms/epoch - 1ms/step
Epoch 13/50
23/23 - 0s - loss: 1388.6024 - 40ms/epoch - 2ms/step
Epoch 14/50
23/23 - 0s - loss: 1369.6981 - 21ms/epoch - 897us/step
Epoch 15/50
23/23 - 0s - loss: 1349.9269 - 33ms/epoch - 1ms/step
Epoch 16/50
23/23 - 0s - loss:

In [62]:
# print the MSE values
print(f'Lenght of the Mean Squared Error on Testing Data: {len(mse_test_norm)}')
print(f'Mean Squared Error on Testing Data: {mse_test_norm}')

Lenght of the Mean Squared Error on Testing Data: 50
Mean Squared Error on Testing Data: [447.8238220214844, 176.2723846435547, 135.54185485839844, 96.69510650634766, 78.44879150390625, 65.59135437011719, 57.647735595703125, 52.70603942871094, 50.060516357421875, 48.292781829833984, 47.13467788696289, 46.4781494140625, 46.20644760131836, 45.99002456665039, 45.66661834716797, 45.53849411010742, 45.60004806518555, 45.696102142333984, 45.435665130615234, 45.29425048828125, 45.30937576293945, 44.819068908691406, 44.30056381225586, 43.85103225708008, 43.57088851928711, 43.50185775756836, 43.16643142700195, 43.16703796386719, 43.19932556152344, 43.01506423950195, 43.0103874206543, 42.907779693603516, 42.98337173461914, 43.152679443359375, 43.247406005859375, 43.150306701660156, 43.319915771484375, 43.158355712890625, 43.272010803222656, 43.15522766113281, 43.161312103271484, 43.11011505126953, 43.25502014160156, 43.154563903808594, 43.205020904541016, 43.13936233520508, 43.101016998291016, 4

In [63]:
mean_norm, std_dev_norm = calculate_stats_pandas(mse_test_norm)
print(f'Mean: {mean_norm}, Standard Deviation: {std_dev_norm}')

Mean: 59.346998138427736, Standard Deviation: 60.94216089529918


# C. Increase the Number of Epochs

In [64]:
mse_test_norm_100 = []
model_1 = regression_model_1()
# train the model using 50 epochs. Evaluate it's mse and store it on list
for i in range(100):
    # fit the model
    model_1.fit(X_train_norm, y_train, epochs=50, verbose=2)

    mse_test_norm_100.append(model_1.evaluate(X_test_norm, y_test, verbose=0))

Epoch 1/50
23/23 - 0s - loss: 1563.0756 - 423ms/epoch - 18ms/step
Epoch 2/50
23/23 - 0s - loss: 1545.5836 - 18ms/epoch - 801us/step
Epoch 3/50
23/23 - 0s - loss: 1528.2236 - 42ms/epoch - 2ms/step
Epoch 4/50
23/23 - 0s - loss: 1511.2690 - 22ms/epoch - 951us/step
Epoch 5/50
23/23 - 0s - loss: 1494.1089 - 42ms/epoch - 2ms/step
Epoch 6/50
23/23 - 0s - loss: 1476.6835 - 27ms/epoch - 1ms/step
Epoch 7/50
23/23 - 0s - loss: 1458.8641 - 45ms/epoch - 2ms/step
Epoch 8/50
23/23 - 0s - loss: 1440.0626 - 33ms/epoch - 1ms/step
Epoch 9/50
23/23 - 0s - loss: 1420.9838 - 34ms/epoch - 1ms/step
Epoch 10/50
23/23 - 0s - loss: 1400.6422 - 33ms/epoch - 1ms/step
Epoch 11/50
23/23 - 0s - loss: 1379.7677 - 35ms/epoch - 2ms/step
Epoch 12/50
23/23 - 0s - loss: 1358.0315 - 31ms/epoch - 1ms/step
Epoch 13/50
23/23 - 0s - loss: 1335.0195 - 33ms/epoch - 1ms/step
Epoch 14/50
23/23 - 0s - loss: 1311.3224 - 36ms/epoch - 2ms/step
Epoch 15/50
23/23 - 0s - loss: 1286.9192 - 51ms/epoch - 2ms/step
Epoch 16/50
23/23 - 0s - los

In [65]:
# print the MSE values
print(f'Lenght of the Mean Squared Error on Testing Data: {len(mse_test_norm_100)}')
print(f'Mean Squared Error on Testing Data: {mse_test_norm_100}')

Lenght of the Mean Squared Error on Testing Data: 100
Mean Squared Error on Testing Data: [328.67919921875, 164.63722229003906, 127.77179718017578, 98.89012908935547, 78.58258056640625, 66.49052429199219, 60.13283920288086, 56.5690803527832, 53.91598892211914, 52.1021614074707, 50.755615234375, 49.709869384765625, 48.97825241088867, 47.94762420654297, 47.442840576171875, 47.46665954589844, 47.20407485961914, 46.73418045043945, 46.783355712890625, 46.68372344970703, 46.509315490722656, 46.377281188964844, 46.225189208984375, 46.206634521484375, 46.26139450073242, 46.22874450683594, 46.22720718383789, 46.156089782714844, 46.20582962036133, 46.344459533691406, 46.21070861816406, 46.22690200805664, 46.361610412597656, 46.2373161315918, 46.32127380371094, 46.36445617675781, 46.236114501953125, 46.19859313964844, 45.97062301635742, 45.85920333862305, 45.88783264160156, 45.845542907714844, 45.759403228759766, 45.74543762207031, 45.902984619140625, 45.77463150024414, 45.85932159423828, 46.0264

In [68]:
mean_norm, std_dev_norm = calculate_stats_pandas(mse_test_norm_100)
print(f'Mean: {mean_norm}, Standard Deviation: {std_dev_norm}')

Mean: 51.86822536468506, Standard Deviation: 32.1082756557209


# D. Increase the number of hidden layers

In [70]:
# create and compile Neural Network model with 3 hidden layer of 10 nodes and ReLu activation
def regression_model_2():
    model = Sequential()
    model.add(Dense(10, activation = 'relu', input_shape=(n_cols,)))
    model.add(Dense(10, activation = 'relu'))
    model.add(Dense(10, activation = 'relu'))
    model.add(Dense(1))

    model.compile(optimizer='adam', loss='mean_squared_error')
    return model 

In [71]:
mse_test_norm_2 = []
model_2 = regression_model_2()
# train the model using 50 epochs. Evaluate it's mse and store it on list
for i in range(50):
    # fit the model
    model_2.fit(X_train_norm, y_train, epochs=50, verbose=2)

    mse_test_norm_2.append(model_2.evaluate(X_test_norm, y_test, verbose=0))

Epoch 1/50
23/23 - 1s - loss: 1572.3689 - 678ms/epoch - 29ms/step
Epoch 2/50
23/23 - 0s - loss: 1552.6641 - 17ms/epoch - 722us/step
Epoch 3/50
23/23 - 0s - loss: 1533.8820 - 41ms/epoch - 2ms/step
Epoch 4/50
23/23 - 0s - loss: 1507.2904 - 26ms/epoch - 1ms/step
Epoch 5/50
23/23 - 0s - loss: 1464.8226 - 47ms/epoch - 2ms/step
Epoch 6/50
23/23 - 0s - loss: 1393.9875 - 31ms/epoch - 1ms/step
Epoch 7/50
23/23 - 0s - loss: 1286.5745 - 17ms/epoch - 757us/step
Epoch 8/50
23/23 - 0s - loss: 1131.7271 - 33ms/epoch - 1ms/step
Epoch 9/50
23/23 - 0s - loss: 934.8096 - 36ms/epoch - 2ms/step
Epoch 10/50
23/23 - 0s - loss: 723.2739 - 36ms/epoch - 2ms/step
Epoch 11/50
23/23 - 0s - loss: 545.6943 - 34ms/epoch - 1ms/step
Epoch 12/50
23/23 - 0s - loss: 424.9596 - 22ms/epoch - 952us/step
Epoch 13/50
23/23 - 0s - loss: 356.0887 - 51ms/epoch - 2ms/step
Epoch 14/50
23/23 - 0s - loss: 308.5545 - 39ms/epoch - 2ms/step
Epoch 15/50
23/23 - 0s - loss: 269.0582 - 23ms/epoch - 984us/step
Epoch 16/50
23/23 - 0s - loss: 

In [72]:
# print the MSE values
print(f'Lenght of the Mean Squared Error on Testing Data: {len(mse_test_norm_2)}')
print(f'Mean Squared Error on Testing Data: {mse_test_norm_2}')

Lenght of the Mean Squared Error on Testing Data: 50
Mean Squared Error on Testing Data: [137.7278594970703, 82.26424407958984, 62.85215759277344, 54.1864013671875, 49.655677795410156, 48.40275573730469, 47.87594985961914, 47.08615493774414, 45.96503829956055, 46.7481803894043, 46.10151672363281, 46.486000061035156, 46.00464630126953, 46.116939544677734, 46.14372253417969, 48.943389892578125, 47.483001708984375, 46.751625061035156, 47.26566696166992, 47.139278411865234, 48.29911804199219, 47.43731689453125, 48.39555358886719, 47.82919692993164, 47.60038375854492, 48.44916915893555, 47.29981231689453, 47.35810852050781, 47.667259216308594, 47.259361267089844, 48.00358200073242, 49.590599060058594, 47.23174285888672, 47.90895080566406, 47.15968704223633, 48.83523178100586, 47.67036819458008, 48.541969299316406, 47.37833023071289, 46.788421630859375, 47.5616455078125, 46.23583984375, 46.308048248291016, 48.01246643066406, 48.46013641357422, 47.86968231201172, 47.40244674682617, 47.6074295

In [73]:
mean_norm, std_dev_norm = calculate_stats_pandas(mse_test_norm_2)
print(f'Mean: {mean_norm}, Standard Deviation: {std_dev_norm}')

Mean: 50.46172912597656, Standard Deviation: 13.72380737336105
