# Part A

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error


In [2]:
df = pd.read_csv('concrete_data.csv')
df.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [3]:
X = df.drop(columns='Strength')
y = df.Strength.values
n_cols = X.shape[1]

### Randomly split the data into a training and test sets by holding 30% of the data for testing.

In [4]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.3 , random_state = 42)

In [5]:
import keras
from keras.models import Sequential
from keras.layers import Dense

Using TensorFlow backend.


### One hidden layer of 10 nodes, and a ReLU activation function

In [6]:
# define regression model
def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

### Train the model on the training data using 50 epochs.
###  Evaluate the model on the test data and compute the mean squared error between the predicted concrete strength and the actual concrete strength
###  Repeat steps 1 - 3, 50 times, i.e., create a list of 50 mean squared errors.

In [7]:
mse = []
for i in range(50):
    model = regression_model()
    model.fit(X_train, y_train, validation_data=(X_test,y_test), epochs=50, verbose=0)
    y_pred = model.predict(X_test)
    mse.append(mean_squared_error(y_test,y_pred))















###  Report the mean and the standard deviation of the mean squared errors.

In [8]:
mse = np.array(mse)
print('average of MSE= ',mse.mean())
print('standard deviation of MSE=' ,mse.std())

average of MSE=  394.980977099044
standard deviation of MSE= 409.29264859332187


# Part B
###  Repeat Part A but use a normalized version of the data. Recall that one way to normalize the data is by subtracting the mean from the individual predictors and dividing by the standard deviation.

In [9]:
X_norm = (X - X.mean()) / X.std()
X_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


In [10]:
X_train,X_test,y_train,y_test = train_test_split(X_norm,y,test_size = 0.3 , random_state = 42)

In [11]:
mse_norm = []
for i in range(50):
    model = regression_model()
    model.fit(X_train, y_train, validation_data=(X_test,y_test), epochs=50, verbose=0)
    y_pred = model.predict(X_test)
    mse_norm.append(mean_squared_error(y_test,y_pred))

In [12]:
mse_norm = np.array(mse_norm)
print('average of MSE with normalized data= ',mse_norm.mean())
print('standard deviation of MSE with normalized data= ' ,mse_norm.std())

average of MSE with normalized data=  351.18544646077515
standard deviation of MSE with normalized data=  119.93293482286714


### How does the mean of the mean squared errors compare to that from Step A?

In [13]:
print('the difference between original data and normalized data in MSE is = ',np.absolute(mse_norm.mean()- mse.mean()))

the difference between original data and normalized data in MSE is =  43.79553063826887


# Part C
### Repeat Part B but use 100 epochs this time for training.

In [14]:
mse_norm_100 = []
for i in range(50):
    model = regression_model()
    model.fit(X_train, y_train, validation_data=(X_test,y_test), epochs=100, verbose=0)
    y_pred = model.predict(X_test)
    mse_norm_100.append(mean_squared_error(y_test,y_pred))

In [15]:
mse_norm_100 = np.array(mse_norm_100)
print('average of MSE with normalized data= ',mse_norm_100.mean())
print('standard deviation of MSE with normalized data= ' ,mse_norm_100.std())

average of MSE with normalized data=  154.35332252168268
standard deviation of MSE with normalized data=  9.404532412033921


### How does the mean of the mean squared errors compare to that from Step B?

In [16]:
print('the difference between normalized data with 100 epoche and with 50 epoche in MSE is = ', \
      np.absolute(mse_norm_100.mean()- mse_norm.mean()))

the difference between normalized data with 100 epoche and with 50 epoche in MSE is =  207.94901198305638


# Part D
### Repeat part B but use a neural network with the following instead:

- Three hidden layers, each of 10 nodes and ReLU activation function.

In [17]:
# define regression model
def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [18]:
mse_norm_3 = []
for i in range(50):
    model = regression_model()
    model.fit(X_train, y_train, validation_data=(X_test,y_test), epochs=50, verbose=0)
    y_pred = model.predict(X_test)
    mse_norm_3.append(mean_squared_error(y_test,y_pred))

In [19]:
mse_norm_3 = np.array(mse_norm_3)
print('average of MSE with normalized data= ',mse_norm_3.mean())
print('standard deviation of MSE with normalized data= ' ,mse_norm_3.std())

average of MSE with normalized data=  123.81288020974885
standard deviation of MSE with normalized data=  13.889504006639319


### How does the mean of the mean squared errors compare to that from Step B?

In [20]:
print('the difference between normalized data with 1 hidden layer and with 3 hidden layers in MSE is = ', \
      np.absolute(mse_norm_3.mean()- mse.mean()))

the difference between normalized data with 1 hidden layer and with 3 hidden layers in MSE is =  187.25082190134674
