# A. Building a baseline model

### Importing numpy & pandas libraries, and some packages from the keras and scikit-learn library

In [1]:
import numpy as np
import pandas as pd
import keras
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split

### Downloading the data

In [2]:
conc_dataset = pd.read_csv('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv')
conc_dataset.head(10)

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3
5,266.0,114.0,0.0,228.0,0.0,932.0,670.0,90,47.03
6,380.0,95.0,0.0,228.0,0.0,932.0,594.0,365,43.7
7,380.0,95.0,0.0,228.0,0.0,932.0,594.0,28,36.45
8,266.0,114.0,0.0,228.0,0.0,932.0,670.0,28,45.85
9,475.0,0.0,0.0,228.0,0.0,932.0,594.0,28,39.29


### Splitting the data into predictors and Target

In [3]:
#Target is the "Strength" feature 
target_group = conc_dataset['Strength']

#predictors group are all columns exept the Target
predictors_group = conc_dataset[['Cement', 'Blast Furnace Slag', 'Fly Ash', 'Water', 'Superplasticizer','Coarse Aggregate', 'Fine Aggregate', 'Age']]

### Creating a function that defines the regression model of the neural network

In [4]:
def regression_model():
    #create model
    model = Sequential()
    #creating the first layer
    model.add(Dense(10, activation='relu', input_shape=(predictors_group.shape[1],)))
    model.add(Dense(1))
    #compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

### Build the model

In [5]:
my_model = regression_model()

### Train the model on the training data using 50 epochs

In [6]:
#creating a list to save the values of the mean squared errors
mse_list = []
#creating a list to save the value of the least  mean squared errors
least_mse = []
#a loop to repeat the splitting and fitting prossess 
for i in range(50):
    #Randomly split the data by holding 30% of the data for testing  
    x_train, x_test, y_train, y_test = train_test_split(predictors_group, target_group, test_size=0.3)
    #Train the model
    result = my_model.fit(x_train, y_train, epochs=50, verbose=0, validation_data=(x_test, y_test))
    #Find mean squared error as the last value
    mean_squared_error = result.history['val_loss'][-1]
    #Adding the value of the mean squared error to the list
    mse_list.append(mean_squared_error)
    print('Trial {}: Mean_Squared_Error= {}'.format(i+1, mean_squared_error))

Trial 1: Mean_Squared_Error= 361.363525390625
Trial 2: Mean_Squared_Error= 205.0968017578125
Trial 3: Mean_Squared_Error= 117.73285675048828
Trial 4: Mean_Squared_Error= 102.76799011230469
Trial 5: Mean_Squared_Error= 88.3550796508789
Trial 6: Mean_Squared_Error= 95.01924896240234
Trial 7: Mean_Squared_Error= 84.6137924194336
Trial 8: Mean_Squared_Error= 75.61719512939453
Trial 9: Mean_Squared_Error= 79.95269012451172
Trial 10: Mean_Squared_Error= 61.43897247314453
Trial 11: Mean_Squared_Error= 62.70022964477539
Trial 12: Mean_Squared_Error= 52.21890640258789
Trial 13: Mean_Squared_Error= 51.973655700683594
Trial 14: Mean_Squared_Error= 57.11802291870117
Trial 15: Mean_Squared_Error= 45.801780700683594
Trial 16: Mean_Squared_Error= 50.247982025146484
Trial 17: Mean_Squared_Error= 48.79481506347656
Trial 18: Mean_Squared_Error= 57.71278381347656
Trial 19: Mean_Squared_Error= 49.47074508666992
Trial 20: Mean_Squared_Error= 59.49860763549805
Trial 21: Mean_Squared_Error= 45.51941299438476

### Save the (least) Mean squared error of the best model in a list 

In [7]:
best_model = min(mse_list)
least_mse.append(best_model)
print("The best model has MSE of {}".format(best_model))

The best model has MSE of 43.93061828613281


In [8]:
print("The best model has MSE of {}".format(min(mse_list)))

The best model has MSE of 43.93061828613281


### The mean and the standard deviation of the mean squared errors

In [9]:
mean = np.mean(mse_list)
standard_deviation = np.std(mse_list)
print("The mean of the mean squared errors: {}".format(mean))
print("The standard deviation of the mean squared errors: {}".format(standard_deviation))

The mean of the mean squared errors: 66.94618522644043
The standard deviation of the mean squared errors: 49.26155614145892


# B. Normalize the data

### Normalize the data by subtracting the mean from the individual predictors and dividing by the standard deviation

In [10]:
norm_predictors_group = (predictors_group - predictors_group.mean())/predictors_group.std()
norm_predictors_group.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


### Re-build the model using the normalized data

In [11]:
def norm_regression_model():
    norm_model = Sequential()
    norm_model.add(Dense(10, activation='relu', input_shape=(norm_predictors_group.shape[1],)))
    norm_model.add(Dense(1))
    norm_model.compile(optimizer='adam', loss='mean_squared_error')
    return norm_model

In [12]:
my_model2 = norm_regression_model()

### Train the model on the normalized data using 50 epochs

In [13]:
norm_mse_list = []
for i in range(50):
    x_train, x_test, y_train, y_test = train_test_split(norm_predictors_group, target_group, test_size=0.3)
    result2 = my_model2.fit(x_train, y_train, epochs=50, verbose=0, validation_data=(x_test, y_test))
    norm_mean_squared_error = result2.history['val_loss'][-1]
    norm_mse_list.append(norm_mean_squared_error)
    print('Trial {}: Mean_Squared_Error= {}'.format(i+1, norm_mean_squared_error))  

Trial 1: Mean_Squared_Error= 338.5841369628906
Trial 2: Mean_Squared_Error= 172.47506713867188
Trial 3: Mean_Squared_Error= 111.29866790771484
Trial 4: Mean_Squared_Error= 85.40751647949219
Trial 5: Mean_Squared_Error= 63.31898498535156
Trial 6: Mean_Squared_Error= 50.841529846191406
Trial 7: Mean_Squared_Error= 54.464115142822266
Trial 8: Mean_Squared_Error= 54.92283630371094
Trial 9: Mean_Squared_Error= 51.69728469848633
Trial 10: Mean_Squared_Error= 42.894588470458984
Trial 11: Mean_Squared_Error= 51.44646072387695
Trial 12: Mean_Squared_Error= 51.52364730834961
Trial 13: Mean_Squared_Error= 38.75411605834961
Trial 14: Mean_Squared_Error= 43.13811492919922
Trial 15: Mean_Squared_Error= 41.55244445800781
Trial 16: Mean_Squared_Error= 37.06069564819336
Trial 17: Mean_Squared_Error= 42.796146392822266
Trial 18: Mean_Squared_Error= 40.457942962646484
Trial 19: Mean_Squared_Error= 39.9007682800293
Trial 20: Mean_Squared_Error= 37.617671966552734
Trial 21: Mean_Squared_Error= 38.949745178

### The mean and the standard deviation of the mean squared errors

In [14]:
mean2 = np.mean(norm_mse_list)
standard_deviation2 = np.std(norm_mse_list)
print("The mean of the mean squared errors: {}".format(mean2))
print("The standard deviation of the mean squared errors: {}".format(standard_deviation2))

The mean of the mean squared errors: 51.69189743041992
The standard deviation of the mean squared errors: 46.65467409080513


### As noteced: the mean of the mean squared errors *Decreased* compare to that from Step A.

# C. Increate the number of epochs

### Build a model

In [15]:
def norm_regression_model2():
    norm_model2 = Sequential()
    norm_model2.add(Dense(10, activation='relu', input_shape=(norm_predictors_group.shape[1],)))
    norm_model2.add(Dense(1))
    norm_model2.compile(optimizer='adam', loss='mean_squared_error')
    return norm_model2

In [16]:
my_model3 = norm_regression_model2()

### Train the model on the training data using 100 epochs

In [17]:
norm_mse_list2 = []
for i in range(50):  
    x_train, x_test, y_train, y_test = train_test_split(norm_predictors_group, target_group, test_size=0.3)
    #Train the model by 100 epochs
    result3 = my_model3.fit(x_train, y_train, epochs=100, verbose=0, validation_data=(x_test, y_test))
    norm_mean_squared_error2 = result3.history['val_loss'][-1]
    norm_mse_list2.append(norm_mean_squared_error2)
    print('Trial {}: Mean_Squared_Error= {}'.format(i+1, norm_mean_squared_error2))

Trial 1: Mean_Squared_Error= 142.4388885498047
Trial 2: Mean_Squared_Error= 106.27892303466797
Trial 3: Mean_Squared_Error= 59.50465774536133
Trial 4: Mean_Squared_Error= 48.6295166015625
Trial 5: Mean_Squared_Error= 46.90736389160156
Trial 6: Mean_Squared_Error= 44.44423294067383
Trial 7: Mean_Squared_Error= 47.41929244995117
Trial 8: Mean_Squared_Error= 40.99114990234375
Trial 9: Mean_Squared_Error= 42.116676330566406
Trial 10: Mean_Squared_Error= 40.61408233642578
Trial 11: Mean_Squared_Error= 42.98280334472656
Trial 12: Mean_Squared_Error= 37.788787841796875
Trial 13: Mean_Squared_Error= 35.175262451171875
Trial 14: Mean_Squared_Error= 39.14115905761719
Trial 15: Mean_Squared_Error= 35.52425765991211
Trial 16: Mean_Squared_Error= 36.46026611328125
Trial 17: Mean_Squared_Error= 40.53184509277344
Trial 18: Mean_Squared_Error= 39.26422882080078
Trial 19: Mean_Squared_Error= 36.87031173706055
Trial 20: Mean_Squared_Error= 38.363224029541016
Trial 21: Mean_Squared_Error= 44.152812957763

### The mean and the standard deviation of the mean squared error

In [18]:
mean3 = np.mean(norm_mse_list2)
standard_deviation3 = np.std(norm_mse_list2)
print("The mean of the mean squared errors: {}".format(mean3))
print("The standard deviation of the mean squared errors: {}".format(standard_deviation3))

The mean of the mean squared errors: 43.27115364074707
The standard deviation of the mean squared errors: 17.525611951789827


### As noteced: the mean of the mean squared errors keeps Decreasing as we doubled the epoches number

# D. Increase the number of hidden layers

### Create a new model with three hidden layers

In [19]:
def norm_regression_model3():
    norm_model3 = Sequential()
    #creating the first layer
    norm_model3.add(Dense(10, activation='relu', input_shape=(norm_predictors_group.shape[1],)))
    #creating the second layer
    norm_model3.add(Dense(10, activation='relu'))
    #creating the third layer
    norm_model3.add(Dense(10, activation='relu'))
    norm_model3.add(Dense(1))
    norm_model3.compile(optimizer='adam', loss='mean_squared_error')
    return norm_model3

In [20]:
my_model4 = norm_regression_model3()

### Train the model on the training data using 50 epochs

In [21]:
norm_mse_list3 = [] 
for i in range(50):  
    x_train, x_test, y_train, y_test = train_test_split(norm_predictors_group, target_group, test_size=0.3)
    result4 = my_model4.fit(x_train, y_train, epochs=50, verbose=0, validation_data=(x_test, y_test))
    norm_mean_squared_error3 = result4.history['val_loss'][-1]
    norm_mse_list3.append(norm_mean_squared_error3)
    print('Trial {}: Mean_Squared_Error= {}'.format(i+1, norm_mean_squared_error3))

Trial 1: Mean_Squared_Error= 122.92982482910156
Trial 2: Mean_Squared_Error= 74.23184967041016
Trial 3: Mean_Squared_Error= 51.39278793334961
Trial 4: Mean_Squared_Error= 47.779754638671875
Trial 5: Mean_Squared_Error= 34.919151306152344
Trial 6: Mean_Squared_Error= 40.46145248413086
Trial 7: Mean_Squared_Error= 38.42198181152344
Trial 8: Mean_Squared_Error= 29.904451370239258
Trial 9: Mean_Squared_Error= 35.60575485229492
Trial 10: Mean_Squared_Error= 31.198833465576172
Trial 11: Mean_Squared_Error= 31.262636184692383
Trial 12: Mean_Squared_Error= 30.03436851501465
Trial 13: Mean_Squared_Error= 27.6962890625
Trial 14: Mean_Squared_Error= 31.024824142456055
Trial 15: Mean_Squared_Error= 29.752666473388672
Trial 16: Mean_Squared_Error= 26.372848510742188
Trial 17: Mean_Squared_Error= 24.042396545410156
Trial 18: Mean_Squared_Error= 28.375654220581055
Trial 19: Mean_Squared_Error= 26.94432258605957
Trial 20: Mean_Squared_Error= 29.36333465576172
Trial 21: Mean_Squared_Error= 25.114419937

### The mean and the standard deviation of the mean squared error

In [22]:
mean4 = np.mean(norm_mse_list3)
standard_deviation4 = np.std(norm_mse_list3)
print("The mean of the mean squared errors: {}".format(mean4))
print("The standard deviation of the mean squared errors: {}".format(standard_deviation4))

The mean of the mean squared errors: 29.84962589263916
The standard deviation of the mean squared errors: 16.232435935939538


### The mean of the mean squared errors in case D is the least compaired to cases A, B and C. As the neural network became a little bit deeper.