In [13]:
# Import the necessary libraries
import pandas as pd
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [14]:
# Download the datasets
concrete_data = pd.read_csv('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv')
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [15]:
# Split the data into predictors and target
concrete_data_columns = concrete_data.columns

predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']] # all columns except Strength
target = concrete_data['Strength'] # Strength column

In [16]:
# Save the number of predictors
n_cols = predictors.shape[1]

In [17]:
# Build the model
def regression_model():
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

model = regression_model()

Experiement 1: Evaluate the model without normalizing the data

In [19]:
# Evaluate the model using mean squared error
def evaluate_model(predictors, model, num_of_ephoch):
  list_of_mean_squared_error = []
  for cycle in range(10):
    X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3)
    #Train and test the model at the same time and then Find mean_squared_error as last value in history
    mean_squared_error = model.fit(X_train, y_train, epochs=num_of_ephoch, verbose=0, validation_data=(X_test, y_test)).history['val_loss'][-1] 
    list_of_mean_squared_error.append(mean_squared_error)
    print('Cycle #{}: mean_squared_error {}'.format(cycle+1, mean_squared_error))
  print('The mean of the mean squared errors: {}'.format(np.mean(list_of_mean_squared_error)))
  print('The standard deviation of the mean squared errors: {}'.format(np.std(list_of_mean_squared_error)))

evaluate_model(predictors, model, 50)

Cycle #1: mean_squared_error 51.6797981262207
Cycle #2: mean_squared_error 59.267547607421875
Cycle #3: mean_squared_error 51.118648529052734
Cycle #4: mean_squared_error 56.66215515136719
Cycle #5: mean_squared_error 53.25983428955078
Cycle #6: mean_squared_error 49.9633903503418
Cycle #7: mean_squared_error 52.92972183227539
Cycle #8: mean_squared_error 53.59840393066406
Cycle #9: mean_squared_error 50.236854553222656
Cycle #10: mean_squared_error 49.43723678588867
The mean of the mean squared errors: 52.815359115600586
The standard deviation of the mean squared errors: 2.959980066389309


Experiemt 2: Evaluate the model after normalizing the data

In [20]:
# Normalize the data
predictors_norm = (predictors - predictors.mean()) / predictors.std()
predictors_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


In [21]:
evaluate_model(predictors_norm, model, 50)

Cycle #1: mean_squared_error 189.99476623535156
Cycle #2: mean_squared_error 141.95314025878906
Cycle #3: mean_squared_error 84.03067779541016
Cycle #4: mean_squared_error 84.41038513183594
Cycle #5: mean_squared_error 70.16065216064453
Cycle #6: mean_squared_error 52.967586517333984
Cycle #7: mean_squared_error 50.33774948120117
Cycle #8: mean_squared_error 46.861167907714844
Cycle #9: mean_squared_error 42.88547897338867
Cycle #10: mean_squared_error 40.91372299194336
The mean of the mean squared errors: 80.45153274536133
The standard deviation of the mean squared errors: 46.56341846735766


Experiement 3: Increase the number of epochs

In [23]:
evaluate_model(predictors_norm, model, 100)

Cycle #1: mean_squared_error 42.09233474731445
Cycle #2: mean_squared_error 42.12956237792969
Cycle #3: mean_squared_error 33.9011116027832
Cycle #4: mean_squared_error 34.44487380981445
Cycle #5: mean_squared_error 36.823822021484375
Cycle #6: mean_squared_error 36.806396484375
Cycle #7: mean_squared_error 38.90013885498047
Cycle #8: mean_squared_error 37.26557159423828
Cycle #9: mean_squared_error 33.609107971191406
Cycle #10: mean_squared_error 38.53830337524414
The mean of the mean squared errors: 37.45112228393555
The standard deviation of the mean squared errors: 2.8974998245020953


Experiement 4: Increase the layers of the model

In [25]:
def regression_model2():
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

model2 = regression_model2()
evaluate_model(predictors_norm, model2, 50)

Cycle #1: mean_squared_error 96.51993560791016
Cycle #2: mean_squared_error 65.3797836303711
Cycle #3: mean_squared_error 49.16946029663086
Cycle #4: mean_squared_error 47.03170394897461
Cycle #5: mean_squared_error 43.9094352722168
Cycle #6: mean_squared_error 41.528907775878906
Cycle #7: mean_squared_error 37.78551483154297
Cycle #8: mean_squared_error 31.747276306152344
Cycle #9: mean_squared_error 33.96318054199219
Cycle #10: mean_squared_error 32.822845458984375
The mean of the mean squared errors: 47.98580436706543
The standard deviation of the mean squared errors: 18.74304420165298
