In [44]:
import pandas as pd
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [45]:
concrete_data = pd.read_csv('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv')
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [46]:
concrete_data_columns = concrete_data.columns

predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']] # all columns except Strength
target = concrete_data['Strength'] # Strength column

In [47]:
predictors_norm = (predictors - predictors.mean()) / predictors.std()
n_cols = predictors_norm.shape[1] # number of predictors

In [48]:
target.head()

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64

In [54]:
# define regression model
def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [55]:
errors = []

for i in range(50):
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3, random_state=42)
    # build the model
    model = regression_model()
    # fit the model
    model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, verbose=2)
    # predict test data and compute error
    y_pred = model.predict(X_test)
    error = mean_squared_error(y_test, y_pred)
    errors.append(error)
    
print(errors)

Train on 721 samples, validate on 309 samples
Epoch 1/100
 - 6s - loss: 1630.5039 - val_loss: 1518.7762
Epoch 2/100
 - 0s - loss: 1601.1909 - val_loss: 1498.2448
Epoch 3/100
 - 0s - loss: 1581.4783 - val_loss: 1482.7708
Epoch 4/100
 - 0s - loss: 1564.4748 - val_loss: 1466.9401
Epoch 5/100
 - 0s - loss: 1544.8499 - val_loss: 1446.2228
Epoch 6/100
 - 0s - loss: 1517.8513 - val_loss: 1416.3577
Epoch 7/100
 - 0s - loss: 1478.0743 - val_loss: 1370.9118
Epoch 8/100
 - 0s - loss: 1415.1036 - val_loss: 1298.3458
Epoch 9/100
 - 0s - loss: 1314.6553 - val_loss: 1180.7697
Epoch 10/100
 - 0s - loss: 1157.1194 - val_loss: 1009.2302
Epoch 11/100
 - 0s - loss: 942.9294 - val_loss: 790.0608
Epoch 12/100
 - 0s - loss: 691.4447 - val_loss: 567.3171
Epoch 13/100
 - 0s - loss: 472.8861 - val_loss: 401.1506
Epoch 14/100
 - 0s - loss: 335.4817 - val_loss: 314.4849
Epoch 15/100
 - 0s - loss: 269.4416 - val_loss: 267.6315
Epoch 16/100
 - 0s - loss: 235.1219 - val_loss: 236.1459
Epoch 17/100
 - 0s - loss: 213.

In [56]:
#increasing the number of layers also improved the result
print(np.mean(errors), np.std(errors))

87.30498563896677 21.429912809027215
