# Part 1 

<h3>Import necessary libraries</h3>

In [2]:
# import numpy
import numpy as np
# import pandas
import pandas as pd
# import keras
from keras.models import Sequential
from keras.layers import Dense
# import train_test_split
from sklearn.model_selection import train_test_split
# import mean_squared_error 
from sklearn.metrics import mean_squared_error

### Gather and prepare data

In [3]:
# read data and show head
df = pd.read_csv('concrete_data.csv')
df.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [4]:
# filter the features or X
features = df.loc[:, df.columns != 'Strength']
# filter the target or y
target = df['Strength']

In [5]:
# input amount
n_cols = features.shape[1]

### Prepare model

In [6]:
def regression_model(hidden_layers = 1):
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    # add layers
    while hidden_layers > 1:
        hidden_layers -= 1
        model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

### Fit and predict model

In [9]:
# create numpy array for thge mean squared error
mse = np.array([])
# repeat the process 50 times
for i in range(50):
    # split the data into train and test
    X_train, X_test, y_train, y_test = train_test_split(features, target,
                                                        test_size=0.3)
    # build fit and predict with the model
    model = regression_model()
    model.fit(X_train, y_train, epochs=50, verbose = 0)
    y_pred = model.predict(X_test)
    # calculate the mean squared error
    mse_sample = mean_squared_error(y_test, y_pred)
    # append to np array
    mse = np.append(mse, mse_sample)

2021-12-17 14:01:27.269730: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [10]:
print('The mean for the mean squared error is:',mse.mean())
print('The standard deviation for the mean squared error is:',mse.std())

The mean for the mean squared error is: 454.58146209451775
The standard deviation for the mean squared error is: 709.1830363478795


# Part 2

In [11]:
# normalize the data
normalized_features = (features - features.mean()) / features.std()

In [12]:
# create numpy array for thge mean squared error
mse_norm = np.array([])
# repeat the process 50 times
for i in range(50):
    # split the data into train and test this time with the normalized features
    X_train, X_test, y_train, y_test = train_test_split(normalized_features, target,
                                                        test_size=0.3)
    # build fit and predict with the model
    model = regression_model()
    model.fit(X_train, y_train, epochs=50, verbose = 0)
    y_pred = model.predict(X_test)
    # calculate the mean squared error
    mse_sample = mean_squared_error(y_test, y_pred)
    # append to np array
    mse_norm = np.append(mse_norm , mse_sample)

In [13]:
print('The mean for the mean squared error when normalized is:',mse_norm.mean())
print('The standard deviation for the mean squared error when normalized is:',mse_norm.std())

The mean for the mean squared error when normalized is: 386.35213955986234
The standard deviation for the mean squared error when normalized is: 123.29635040625611


In [14]:
# create results Dataframe
results = pd.DataFrame({'Mean': [mse.mean(), mse_norm.mean()],
              'STD': [mse.std(), mse_norm.std()]}, index=['Non-norm','Normalized'])

In [15]:
# look at results
results

Unnamed: 0,Mean,STD
Non-norm,454.581462,709.183036
Normalized,386.35214,123.29635


<h2>Observation </h2>
As can be seen, the Mean of the Mean Squred Error is significanlty lower when the data is normalized, meaning that our models are more accurate.
<br><br>
So too, the Standard Deviation of the Mean Squred Error is much smaller, meaning our models are more consistent. 

# Part 3

In [16]:
# create numpy array for thge mean squared error
mse_100e = np.array([])
# repeat the process 50 times
for i in range(50):
    # split the data into train and test this time with the normalized features
    X_train, X_test, y_train, y_test = train_test_split(normalized_features, target,
                                                        test_size=0.3)
    # build fit and predict with the model
    model = regression_model()
    # fit with 100 epochs
    model.fit(X_train, y_train, epochs=100, verbose = 0)
    y_pred = model.predict(X_test)
    # calculate the mean squared error
    mse_sample = mean_squared_error(y_test, y_pred)
    # append to np array
    mse_100e = np.append(mse_100e , mse_sample)

In [17]:
# create Dataframe with 100 epochs
epochs_100 = pd.DataFrame({'Mean': mse_100e.mean(),
              'STD': mse_100e.std()}, index=['100 Epochs'])
# append to results Dataframe
results = results.append(epochs_100)

In [18]:
# look at results
results

Unnamed: 0,Mean,STD
Non-norm,454.581462,709.183036
Normalized,386.35214,123.29635
100 Epochs,169.933014,28.235675


<h2>Observation </h2>
As can be seen, the Mean of the Mean Squred Error is significanlty lower when 100 Epochs are used, meaning that our models are more accurate.
<br><br>
So too, the Standard Deviation of the Mean Squred Error is much smaller, meaning our models are more consistent.

# Part 4 

In [19]:
# create numpy array for thge mean squared error
mse_3_layers = np.array([])
# repeat the process 50 times
for i in range(50):
    # split the data into train and test this time with the normalized features
    X_train, X_test, y_train, y_test = train_test_split(normalized_features, target,
                                                        test_size=0.3)
    # build fit and predict with the model with three hidden layers
    model = regression_model(3)
    model.fit(X_train, y_train, epochs=100, verbose = 0)
    y_pred = model.predict(X_test)
    # calculate the mean squared error
    mse_sample = mean_squared_error(y_test, y_pred)
    # append to np array
    mse_3_layers = np.append(mse_3_layers , mse_sample)

In [20]:
# create Dataframe with 3 hidden layers
three_layers = pd.DataFrame({'Mean': mse_3_layers.mean(),
              'STD': mse_3_layers.std()}, index=['3 Hidden layers'])
# append to results Dataframe
results = results.append(three_layers)

In [21]:
# look at results
results

Unnamed: 0,Mean,STD
Non-norm,454.581462,709.183036
Normalized,386.35214,123.29635
100 Epochs,169.933014,28.235675
3 Hidden layers,87.030097,21.694391


<h2>Observation </h2>
As can be seen, the Mean of the Mean Squred Error is significanlty lower when we use 3 hidden layers, meaning that our models are more accurate.
<br><br>
So too, the Standard Deviation of the Mean Squred Error is much smaller, meaning our models are more consistent.