In [217]:
import pandas as pd
import numpy as np

In [218]:
concrete_data = pd.read_csv('https://ibm.box.com/shared/static/svl8tu7cmod6tizo6rk0ke4sbuhtpdfx.csv')

#### Data Cleaning Process

In [219]:
concrete_data.columns

Index(['Cement', 'Blast Furnace Slag', 'Fly Ash', 'Water', 'Superplasticizer',
       'Coarse Aggregate', 'Fine Aggregate', 'Age', 'Strength'],
      dtype='object')

In [220]:
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [221]:
concrete_data.describe()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,35.817961
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,16.705742
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,2.33
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,23.71
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,34.445
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,46.135
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,82.6


#### Y is Strength as we need to predict it

In [222]:
concrete_data.shape

(1030, 9)

In [223]:
concrete_data.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

In [224]:
concrete_data_columns = concrete_data.columns

In [225]:
#X = concrete_data[concrete_data_columns[concrete_data_columns!='Strength']]
X = concrete_data.drop('Strength', axis=1)

In [226]:
X.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


In [227]:
y = concrete_data['Strength']

In [228]:
y.describe()

count    1030.000000
mean       35.817961
std        16.705742
min         2.330000
25%        23.710000
50%        34.445000
75%        46.135000
max        82.600000
Name: Strength, dtype: float64

## Normalize the data X

In [229]:
X_norm = ((X - X.mean()) / X.std())

In [230]:
X_norm.describe()
# Mean and Standard Deviation will be minimized

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,2.828374e-15,1.3796950000000001e-17,4.139084e-17,1.214131e-15,8.278168e-17,6.55355e-15,1.226549e-14,7.58832e-17
std,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
min,-1.714421,-0.8564718,-0.8467326,-2.798851,-1.038638,-2.211064,-2.239829,-0.707016
25%,-0.8496407,-0.8564718,-0.8467326,-0.7805147,-1.038638,-0.5262618,-0.5317114,-0.612034
50%,-0.0791135,-0.6014861,-0.8467326,0.1607513,0.0326992,-0.06326279,0.07383152,-0.2795973
75%,0.6586406,0.8003558,1.001791,0.4885554,0.6688058,0.7264077,0.6288606,0.1636517
max,2.476712,3.309068,2.279976,3.064159,4.351528,2.213149,2.731735,5.055221


## Building Keras Model

In [231]:
n_cols = X_norm.shape[1] # Try printing X_norm.shape. It will give yoyu (1030,8). We just need 8. Number of columns

In [232]:
n_cols

8

In [233]:
import keras

In [234]:
from keras.models import Sequential

In [235]:
from keras.layers import Dense

In [248]:
# define regression model
def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(20, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(15, activation='relu'))
    model.add(Dense(5, activation='relu'))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [249]:
model = regression_model()

In [250]:
model.fit(X_norm, y, validation_split=0.3, epochs=50, verbose=2)

Train on 721 samples, validate on 309 samples
Epoch 1/50
 - 1s - loss: 1673.7900 - val_loss: 1210.6601
Epoch 2/50
 - 0s - loss: 1631.7443 - val_loss: 1177.9703
Epoch 3/50
 - 0s - loss: 1569.1149 - val_loss: 1128.7707
Epoch 4/50
 - 0s - loss: 1469.8308 - val_loss: 1051.0586
Epoch 5/50
 - 0s - loss: 1316.8231 - val_loss: 936.8368
Epoch 6/50
 - 0s - loss: 1105.4815 - val_loss: 784.2602
Epoch 7/50
 - 0s - loss: 849.2804 - val_loss: 607.6491
Epoch 8/50
 - 0s - loss: 601.0583 - val_loss: 444.3466
Epoch 9/50
 - 0s - loss: 418.1742 - val_loss: 335.7986
Epoch 10/50
 - 0s - loss: 330.1068 - val_loss: 270.9540
Epoch 11/50
 - 0s - loss: 281.9047 - val_loss: 238.4585
Epoch 12/50
 - 0s - loss: 255.0258 - val_loss: 217.7050
Epoch 13/50
 - 0s - loss: 236.5423 - val_loss: 203.2766
Epoch 14/50
 - 0s - loss: 222.5480 - val_loss: 193.7009
Epoch 15/50
 - 0s - loss: 212.2560 - val_loss: 184.9948
Epoch 16/50
 - 0s - loss: 202.8586 - val_loss: 180.3649
Epoch 17/50
 - 0s - loss: 195.4806 - val_loss: 174.9196
E

<keras.callbacks.History at 0x7f5d7073a2b0>