#**Predictive Modeling of Concrete Compressive Strength**

###Importing Relevant Pakages

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from matplotlib import pyplot as plt
%matplotlib inline

###Loading & reading samples of the Data

In [2]:
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/concrete_data.csv')
df.sample(6)

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
434,178.0,129.8,118.6,179.9,3.6,1007.3,746.8,28,39.16
349,213.5,0.0,174.2,154.6,11.7,1052.3,775.5,3,17.37
720,203.5,305.3,0.0,203.5,0.0,963.4,630.0,90,51.86
307,277.1,0.0,97.4,160.6,11.8,973.9,875.6,56,51.04
325,252.3,0.0,98.8,146.3,14.2,987.8,889.0,14,42.29
656,200.0,133.0,0.0,192.0,0.0,965.4,806.2,3,11.41


In [6]:
df.shape

(1030, 9)

In [7]:
df.describe()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,35.817961
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,16.705742
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,2.33
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,23.71
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,34.445
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,46.135
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,82.6


In [8]:
df.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

###Splitting data into independent and dependent variables

In [3]:
# input variable
X = df.drop(['Strength'], axis=1)

# target variable
y = df.Strength

In [4]:
# view samples of input variable
X.sample(6)

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
812,310.0,0.0,0.0,192.0,0.0,970.0,850.0,90
245,238.1,0.0,94.1,186.7,7.0,949.9,847.0,14
325,252.3,0.0,98.8,146.3,14.2,987.8,889.0,14
305,277.1,0.0,97.4,160.6,11.8,973.9,875.6,14
987,162.0,190.1,148.1,178.8,18.8,838.1,741.4,28
19,475.0,0.0,0.0,228.0,0.0,932.0,594.0,180


In [5]:
# view samples of traget variable
y.sample(6)

293    48.67
37     37.72
38     42.23
274    17.22
886    17.95
294     7.40
Name: Strength, dtype: float64

In [9]:
# number of predictors
X_cols = X.shape[1]
X_cols

8

###**Baseline Model**

In [11]:
# number of iteration
num_iter = 50

# list of mean square errors
mean_squared_errors = []

In [14]:
for _ in range(num_iter):
    # split the Data randomly
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30)

    # Build the neural network model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(X_cols,)))
    model.add(Dense(1))

    # Compile the model
    model.compile(optimizer='adam', loss='mean_squared_error')

    # Train the model
    model.fit(X_train, y_train, epochs=50, verbose=2)

    # Evaluate the model
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mean_squared_errors.append(mse)
    print('*****************************************************************************')

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
*****************************************************************************
Epoch 1/50
23/23 - 1s - loss: 246787.0625 - 666ms/epoch - 29ms/step
Epoch 2/50
23/23 - 0s - loss: 151910.9062 - 46ms/epoch - 2ms/step
Epoch 3/50
23/23 - 0s - loss: 92601.7734 - 46ms/epoch - 2ms/step
Epoch 4/50
23/23 - 0s - loss: 53071.3359 - 46ms/epoch - 2ms/step
Epoch 5/50
23/23 - 0s - loss: 27181.8535 - 45ms/epoch - 2ms/step
Epoch 6/50
23/23 - 0s - loss: 12485.8242 - 45ms/epoch - 2ms/step
Epoch 7/50
23/23 - 0s - loss: 5842.5664 - 45ms/epoch - 2ms/step
Epoch 8/50
23/23 - 0s - loss: 3474.1033 - 45ms/epoch - 2ms/step
Epoch 9/50
23/23 - 0s - loss: 2919.5842 - 45ms/epoch - 2ms/step
Epoch 10/50
23/23 - 0s - loss: 2780.4807 - 45ms/epoch - 2ms/step
Epoch 11/50
23/23 - 0s - loss: 2693.9453 - 46ms/epoch - 2ms/step
Epoch 12/50
23/23 - 0s - loss: 2610.3940 - 46ms/epoch - 2ms/step
Epoch 13/50
23/23 - 0s - loss: 2523.7661 - 45ms/epoch - 2ms/step
Epoch 14/50

In [15]:
# Report the mean and standard deviation of the mean squared errors
mean_mse = np.mean(mean_squared_errors)
std_dev_mse = np.std(mean_squared_errors)

In [16]:
print("Mean of Mean Squared Errors:", mean_mse)

Mean of Mean Squared Errors: 395.8481982132552


In [17]:
print("Standard Deviation of Mean Squared Errors:", std_dev_mse)

Standard Deviation of Mean Squared Errors: 522.239192131198


##**Data Normalization**

In [18]:
X_norm = (X - X.mean()) / X.std()
X_norm.sample(6)

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
940,0.141926,-0.856472,0.820533,1.865333,0.719025,-1.976992,0.060112,-0.279597
714,-1.226412,0.325735,-0.846733,0.488555,-1.038638,-1.092149,2.114343,-0.612034
950,0.433774,-0.856472,1.308056,0.816359,0.769244,-2.209777,0.235975,-0.279597
298,-1.074268,-0.367363,1.095546,-1.089587,0.769244,1.387467,0.282123,0.860186
842,-1.24555,-0.856472,2.028402,-0.682173,0.970119,0.232542,-0.019713,-0.279597
639,1.13708,-0.856472,-0.846733,0.25441,-1.038638,0.669819,-0.356472,-0.279597


In [19]:
for _ in range(num_iter):
    # split the Data randomly
    X_train, X_test, y_train, y_test = train_test_split(X_norm, y, test_size=0.30)

    # Build the neural network model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(X_cols,)))
    model.add(Dense(1))

    # Compile the model
    model.compile(optimizer='adam', loss='mean_squared_error')

    # Train the model
    model.fit(X_train, y_train, epochs=50, verbose=2)

    # Evaluate the model
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mean_squared_errors.append(mse)
    print('*****************************************************************************')

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
*****************************************************************************
Epoch 1/50
23/23 - 1s - loss: 1648.6448 - 682ms/epoch - 30ms/step
Epoch 2/50
23/23 - 0s - loss: 1628.4122 - 47ms/epoch - 2ms/step
Epoch 3/50
23/23 - 0s - loss: 1609.2195 - 46ms/epoch - 2ms/step
Epoch 4/50
23/23 - 0s - loss: 1590.9175 - 46ms/epoch - 2ms/step
Epoch 5/50
23/23 - 0s - loss: 1573.4176 - 46ms/epoch - 2ms/step
Epoch 6/50
23/23 - 0s - loss: 1556.6089 - 45ms/epoch - 2ms/step
Epoch 7/50
23/23 - 0s - loss: 1540.0013 - 48ms/epoch - 2ms/step
Epoch 8/50
23/23 - 0s - loss: 1523.9275 - 45ms/epoch - 2ms/step
Epoch 9/50
23/23 - 0s - loss: 1508.0991 - 44ms/epoch - 2ms/step
Epoch 10/50
23/23 - 0s - loss: 1492.5205 - 46ms/epoch - 2ms/step
Epoch 11/50
23/23 - 0s - loss: 1476.7174 - 47ms/epoch - 2ms/step
Epoch 12/50
23/23 - 0s - loss: 1460.8276 - 46ms/epoch - 2ms/step
Epoch 13/50
23/23 - 0s - loss: 1444.9706 - 48ms/epoch - 2ms/step
Epoch 14/50
23/23 -

In [20]:
# Report the mean and standard deviation of the mean squared errors
mean_mse = np.mean(mean_squared_errors)
std_dev_mse = np.std(mean_squared_errors)

In [21]:
print("Mean of Mean Squared Errors:", mean_mse)

Mean of Mean Squared Errors: 395.83949862846436


In [22]:
print("Standard Deviation of Mean Squared Errors:", std_dev_mse)

Standard Deviation of Mean Squared Errors: 436.6803303394965


The mean of the mean squared error in the baseline model is sligthly higher than that of the mean of the mean squared error after normalization indicating a sligth improvement.

##**Increasing number of epoch**

In [23]:
for _ in range(num_iter):
    # split the Data randomly
    X_train, X_test, y_train, y_test = train_test_split(X_norm, y, test_size=0.30)

    # Build the neural network model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(X_cols,)))
    model.add(Dense(1))

    # Compile the model
    model.compile(optimizer='adam', loss='mean_squared_error')

    # Train the model
    model.fit(X_train, y_train, epochs=100, verbose=2)

    # Evaluate the model
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mean_squared_errors.append(mse)
    print('*****************************************************************************')

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 26/100
23/23 - 0s - loss: 1077.5750 - 45ms/epoch - 2ms/step
Epoch 27/100
23/23 - 0s - loss: 1050.5529 - 46ms/epoch - 2ms/step
Epoch 28/100
23/23 - 0s - loss: 1022.8411 - 46ms/epoch - 2ms/step
Epoch 29/100
23/23 - 0s - loss: 994.6829 - 46ms/epoch - 2ms/step
Epoch 30/100
23/23 - 0s - loss: 966.5614 - 46ms/epoch - 2ms/step
Epoch 31/100
23/23 - 0s - loss: 937.9660 - 47ms/epoch - 2ms/step
Epoch 32/100
23/23 - 0s - loss: 909.3209 - 45ms/epoch - 2ms/step
Epoch 33/100
23/23 - 0s - loss: 880.9496 - 45ms/epoch - 2ms/step
Epoch 34/100
23/23 - 0s - loss: 851.9911 - 47ms/epoch - 2ms/step
Epoch 35/100
23/23 - 0s - loss: 823.7481 - 46ms/epoch - 2ms/step
Epoch 36/100
23/23 - 0s - loss: 795.6357 - 46ms/epoch - 2ms/step
Epoch 37/100
23/23 - 0s - loss: 767.7329 - 47ms/epoch - 2ms/step
Epoch 38/100
23/23 - 0s - loss: 739.6974 - 45ms/epoch - 2ms/step
Epoch 39/100
23/23 - 0s - loss: 713.3749 - 44ms/epoch - 2ms/step
Epoch 40/100
23/23 - 0

In [24]:
# Report the mean and standard deviation of the mean squared errors
mean_mse = np.mean(mean_squared_errors)
std_dev_mse = np.std(mean_squared_errors)

In [25]:
print("Mean of Mean Squared Errors:", mean_mse)

Mean of Mean Squared Errors: 340.6318539759733


In [26]:
print("Standard Deviation of Mean Squared Errors:", std_dev_mse)

Standard Deviation of Mean Squared Errors: 393.1419720334168


The mean of mean square error after normalization with 50 epochs is higher than that of the mean of the mean square error with 100 epochs indicating a good improvement from 395.84 to 340.63

##**Increasing number of Hidden Layers**

In [27]:
for _ in range(num_iter):
    # split the Data randomly
    X_train, X_test, y_train, y_test = train_test_split(X_norm, y, test_size=0.30)

    # Build the neural network model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(X_cols,)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))

    # Compile the model
    model.compile(optimizer='adam', loss='mean_squared_error')

    # Train the model
    model.fit(X_train, y_train, epochs=50, verbose=2)

    # Evaluate the model
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mean_squared_errors.append(mse)
    print('*****************************************************************************')

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
*****************************************************************************
Epoch 1/50
23/23 - 1s - loss: 1536.4608 - 1s/epoch - 51ms/step
Epoch 2/50
23/23 - 0s - loss: 1518.8156 - 58ms/epoch - 3ms/step
Epoch 3/50
23/23 - 0s - loss: 1506.1130 - 57ms/epoch - 2ms/step
Epoch 4/50
23/23 - 0s - loss: 1491.7346 - 58ms/epoch - 3ms/step
Epoch 5/50
23/23 - 0s - loss: 1467.2740 - 71ms/epoch - 3ms/step
Epoch 6/50
23/23 - 0s - loss: 1415.5104 - 58ms/epoch - 3ms/step
Epoch 7/50
23/23 - 0s - loss: 1318.8536 - 57ms/epoch - 2ms/step
Epoch 8/50
23/23 - 0s - loss: 1165.3186 - 58ms/epoch - 3ms/step
Epoch 9/50
23/23 - 0s - loss: 963.1011 - 56ms/epoch - 2ms/step
Epoch 10/50
23/23 - 0s - loss: 750.6574 - 58ms/epoch - 3ms/step
Epoch 11/50
23/23 - 0s - loss: 571.6301 - 56ms/epoch - 2ms/step
Epoch 12/50
23/23 - 0s - loss: 455.4113 - 58ms/epoch - 3ms/step
Epoch 13/50
23/23 - 0s - loss: 384.0781 - 56ms/epoch - 2ms/step
Epoch 14/50
23/23 - 0s - lo

In [28]:
# Report the mean and standard deviation of the mean squared errors
mean_mse = np.mean(mean_squared_errors)
std_dev_mse = np.std(mean_squared_errors)

In [29]:
print("Mean of Mean Squared Errors:", mean_mse)

Mean of Mean Squared Errors: 299.8551186865432


In [30]:
print("Standard Deviation of Mean Squared Errors:", std_dev_mse)

Standard Deviation of Mean Squared Errors: 362.7220557080756


The mean of mean square error after normalization with 50 epochs is higher than that of the mean of the mean square error with increased number of hidden layers indicating a great improvement from 395.84 to 299.86