In [1]:
#import libraries
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Input
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

#import data and set up predictors and target
concrete_data = pd.read_csv('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv')
predictors = concrete_data[concrete_data.columns[concrete_data.columns != 'Strength']]
target = concrete_data['Strength']
predictors.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


## Part A: Build a Baseline Model

In [2]:
# define regression model
def create_model(nHiddenLayers,n_cols):
    model = Sequential()
    model.add(Input(shape=(n_cols,)))
    for i in range(nHiddenLayers):
        model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

mse = []

for i in range(10):
    # Split data as instructed in the assignment
    X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3)
    # Create and train the model
    modelA = create_model(1, predictors.shape[1])
    history = modelA.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, verbose=0)
    # Save and print the final loss (optional)
    mse.append(history.history['loss'][-1])
    #print("Mean Squared Error for Run", i, ":", mse[i])

print("Average Mean Squared Error:", np.mean(mse))
print("Standard Deviation of Mean Squared Error:", np.std(mse))


Average Mean Squared Error: 442.6389617919922
Standard Deviation of Mean Squared Error: 576.3282161354755


The mean and the standard deviation of the MSE are 442 and 576 respectively. As the standard deviation is high compared with the mean, the quality of the models fluctuates greatly.

## Part B: Normalize the data

In [3]:
predictors_norm = (predictors - predictors.mean()) / predictors.std()

mse = []

for i in range(50):
    # Split data as instructed in the assignment
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3)
    # Create and train the model
    modelA = create_model(1, predictors_norm.shape[1])
    history = modelA.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, verbose=0)
    # Save and print the final loss (optional)
    mse.append(history.history['loss'][-1])
    #print("Mean Squared Error for Run", i, ":", mse[i])

print("Average Mean Squared Error:", np.mean(mse))
print("Standard Deviation of Mean Squared Error:", np.std(mse))



Average Mean Squared Error: 390.9417498779297
Standard Deviation of Mean Squared Error: 104.46469435893562


The mean and the standard deviation of the MSE are both lower than Part A, meaning that normalizing the data has improved the quality of the models.

## Part C: Increase the number of epochs


In [4]:
mse = []

for i in range(50):
    # Split data as instructed in the assignment
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3)
    # Create and train the model
    modelA = create_model(1, predictors_norm.shape[1])
    history = modelA.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, verbose=0)
    # Save and print the final loss (optional)
    mse.append(history.history['loss'][-1])
    #print("Mean Squared Error for Run", i, ":", mse[i])

print("Average Mean Squared Error:", np.mean(mse))
print("Standard Deviation of Mean Squared Error:", np.std(mse))


Average Mean Squared Error: 165.20567962646484
Standard Deviation of Mean Squared Error: 20.412178633849763


The mean and the standard deviation of the MSE are both lower than Part B, meaning that increasing the number of epochs has improved the quality of the models.

## Part D: Increase the number of hidden layers

In [5]:
mse = []

for i in range(50):
    # Split data as instructed in the assignment
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3)
    # Create and train the model
    modelD = create_model(3, predictors_norm.shape[1])
    history = modelD.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, verbose=0)
    # Save and print the final loss (optional)
    mse.append(history.history['loss'][-1])
    #print("Mean Squared Error for Run", i, ":", mse[i])

print("Average Mean Squared Error:", np.mean(mse))
print("Standard Deviation of Mean Squared Error:", np.std(mse))



Average Mean Squared Error: 119.15071426391602
Standard Deviation of Mean Squared Error: 11.670073163200593


The mean and the standard deviation of the MSE are both lower than Part B, meaning that increasing the number of hidden layers has improved the quality of the models.