In [1]:
import pandas as pd
import numpy as np

Load Concrete date

In [2]:
concrete_data = pd.read_csv('https://cocl.us/concrete_data/concrete_data.csv')
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


Check the size of the data

In [3]:
concrete_data.shape

(1030, 9)

Check columns and contents

In [4]:
concrete_data.describe

<bound method NDFrame.describe of       Cement  Blast Furnace Slag  Fly Ash  Water  Superplasticizer  \
0      540.0                 0.0      0.0  162.0               2.5   
1      540.0                 0.0      0.0  162.0               2.5   
2      332.5               142.5      0.0  228.0               0.0   
3      332.5               142.5      0.0  228.0               0.0   
4      198.6               132.4      0.0  192.0               0.0   
5      266.0               114.0      0.0  228.0               0.0   
6      380.0                95.0      0.0  228.0               0.0   
7      380.0                95.0      0.0  228.0               0.0   
8      266.0               114.0      0.0  228.0               0.0   
9      475.0                 0.0      0.0  228.0               0.0   
10     198.6               132.4      0.0  192.0               0.0   
11     198.6               132.4      0.0  192.0               0.0   
12     427.5                47.5      0.0  228.0        

Let's check if we are missing values

In [5]:
concrete_data.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

Predictors are all columns exceft for Strength which is the target

In [6]:
concrete_data_cols = concrete_data.columns
predictors = concrete_data[concrete_data_cols[concrete_data_cols != 'Strength']]
target = concrete_data['Strength']

Normalize the data

In [7]:
predictors_norm = (predictors - predictors.mean()) / predictors.std()
predictors_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


In [8]:
n_cols = predictors_norm.shape[1] # number of predictors to be used to build the NN

Let's get started with Keras

In [9]:
import keras

Using TensorFlow backend.


In [10]:
from keras.models import Sequential
from keras.layers import Dense

Let's define a regression model, using 1 layer with 10 nodes, the ReLU activation function, adam as the optimizer and the mean squared error as the loss function

In [11]:
def regression_model():
    model = Sequential()
    model.add(Dense(10,activation='relu',input_shape=(n_cols,)))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

Train and test the network

In [12]:
model = regression_model()
print(model.metrics_names)

Instructions for updating:
Colocations handled automatically by placer.
['loss']


Train the model using 50 epochs - loop 50 times reporting the mean squared errors

In [13]:
from sklearn.model_selection import train_test_split
scores = []
# fit the model
for i in range(50):
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.30, random_state=24)
    model.fit(predictors_norm, target, validation_split=0.3, epochs=100, verbose=2)
    ll = model.evaluate(X_test, y_test, verbose=0)
    scores.append(ll)
print(scores)
 

Instructions for updating:
Use tf.cast instead.
Train on 721 samples, validate on 309 samples
Epoch 1/100
 - 7s - loss: 1700.3725 - val_loss: 1215.9317
Epoch 2/100
 - 1s - loss: 1683.6511 - val_loss: 1203.8953
Epoch 3/100
 - 0s - loss: 1666.3435 - val_loss: 1191.6269
Epoch 4/100
 - 3s - loss: 1648.5649 - val_loss: 1179.3256
Epoch 5/100
 - 0s - loss: 1629.9793 - val_loss: 1166.4942
Epoch 6/100
 - 0s - loss: 1610.2907 - val_loss: 1153.3162
Epoch 7/100
 - 0s - loss: 1589.4978 - val_loss: 1139.7673
Epoch 8/100
 - 0s - loss: 1567.4039 - val_loss: 1125.4118
Epoch 9/100
 - 0s - loss: 1543.7777 - val_loss: 1110.5960
Epoch 10/100
 - 0s - loss: 1519.0121 - val_loss: 1094.9142
Epoch 11/100
 - 0s - loss: 1492.2582 - val_loss: 1078.6970
Epoch 12/100
 - 3s - loss: 1464.6044 - val_loss: 1061.9844
Epoch 13/100
 - 0s - loss: 1435.3541 - val_loss: 1044.2799
Epoch 14/100
 - 0s - loss: 1404.1178 - val_loss: 1026.0636
Epoch 15/100
 - 0s - loss: 1372.1164 - val_loss: 1006.6781
Epoch 16/100
 - 0s - loss: 133

Report mean and standard deviation of fitting iterates

In [14]:
import statistics as st
print(scores)
print("Mean loss is %f" % st.mean(scores))
print("With standard deviation %f" % st.stdev(scores))

[158.7778529688764, 110.65009883152243, 87.39101116094002, 74.7746073404948, 68.00199677797583, 64.85019133237573, 63.048426162077774, 60.354476718840864, 59.15689953554024, 58.31346231602542, 58.30685393941441, 58.841600782277126, 59.64688290592922, 60.40010589303322, 61.53562485284404, 62.52782283400255, 63.397343904069324, 64.25651876208852, 64.47174569781158, 65.60850063188177, 66.87738061799972, 67.75426833760777, 69.36499334539025, 70.13086986850381, 71.15185833285928, 71.9120979926347, 71.89967346191406, 72.50558775374033, 72.81433772114875, 72.61287617606253, 73.17353598585406, 72.87313163782015, 73.6789102400005, 73.85693702574301, 73.53408983841683, 74.03650127028185, 73.74006539255284, 74.51835308877395, 74.4971510507528, 74.72851799529732, 74.76518271733256, 75.57095788751991, 75.71264623746903, 75.53446308617453, 76.22345844750265, 75.40158545237915, 76.24621584500309, 76.3856807572942, 76.8938848748562, 76.27556308882136]
Mean loss is 72.579656
With standard deviation 15.