# Import Keras and Packages


In [1]:
import numpy as np
import pandas as pd
import keras 
from keras.models import Sequential
from keras.layers import Dense, Input

# Import concrete strength data 

In [2]:
df = pd.read_csv('https://cocl.us/concrete_data')
df.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


### We will conduct a short EDA (Exploratory Data Analysis) 

In [3]:
# Checking for missing values 
df.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

In [4]:
#Checking the descriptive statistics
df.describe()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,35.817961
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,16.705742
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,2.33
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,23.71
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,34.445
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,46.135
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,82.6


# Let's move on to the tasks 

# ___A. Build a baseline model (5 marks)___

Use the Keras library to build a neural network with the following:

- One hidden layer of 10 nodes, and a ReLU activation function

- Use the adam optimizer and the mean squared error  as the loss function.

1. Randomly split the data into a training and test sets by holding 30% of the data for testing. You can use the 
train_test_split
helper function from Scikit-learn.

2. Train the model on the training data using 50 epochs.

3. Evaluate the model on the test data and compute the mean squared error between the predicted concrete strength and the actual concrete strength. You can use the mean_squared_error function from Scikit-learn.

4. Repeat steps 1 - 3, 50 times, i.e., create a list of 50 mean squared errors.

5. Report the mean and the standard deviation of the mean squared errors.

### Split the data into features and target

In [5]:
features = df.copy().drop(columns=['Strength'])
target = df['Strength'].copy()

num_features = features.shape[1]

print(f'The features shape are {features.shape}')
print(f'The target shape is {target.shape}')

The features shape are (1030, 8)
The target shape is (1030,)


### Tast 1
Randomly split the data into a training and test sets by holding 30% of the data for testing. You can use the train_test_split helper function from Scikit-learn.

In [6]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test =  train_test_split(features, target, test_size=0.3, random_state = 10)

#Checking the shape
for sample, name in zip([x_train, x_test, y_train, y_test], ['train (x)', 'test (x)', 'train (y)', 'test(y)']):
    print(f'The shape of {name} is {sample.shape}')

The shape of train (x) is (721, 8)
The shape of test (x) is (309, 8)
The shape of train (y) is (721,)
The shape of test(y) is (309,)


#### Building the model

In [7]:
num_features = features.shape[1]

# Creating the object ANN 
model = Sequential()
#Adding the layers
#Adding the input layer
model.add(Input(shape=(num_features,)))
#Adding the hidden layer
model.add(Dense(10, activation='relu'))
#Adding the output layer 
model.add(Dense(1))
    
model.compile(optimizer = 'adam', loss = 'mean_squared_error')

### Task 2
Train the model on the training data using 50 epochs.

In [8]:
model.fit(x_train, y_train, validation_split=0.3, epochs=50, verbose=2)

Epoch 1/50
16/16 - 1s - 56ms/step - loss: 85593.4766 - val_loss: 65214.9688
Epoch 2/50
16/16 - 0s - 7ms/step - loss: 47880.9805 - val_loss: 34697.1328
Epoch 3/50
16/16 - 0s - 7ms/step - loss: 23993.5938 - val_loss: 16263.1143
Epoch 4/50
16/16 - 0s - 7ms/step - loss: 10308.2451 - val_loss: 6866.3740
Epoch 5/50
16/16 - 0s - 10ms/step - loss: 4314.4106 - val_loss: 3562.8284
Epoch 6/50
16/16 - 0s - 7ms/step - loss: 2755.7798 - val_loss: 2901.9707
Epoch 7/50
16/16 - 0s - 7ms/step - loss: 2428.4307 - val_loss: 2673.9377
Epoch 8/50
16/16 - 0s - 7ms/step - loss: 2211.2080 - val_loss: 2485.1560
Epoch 9/50
16/16 - 0s - 7ms/step - loss: 2023.3802 - val_loss: 2320.0491
Epoch 10/50
16/16 - 0s - 7ms/step - loss: 1865.6243 - val_loss: 2171.6340
Epoch 11/50
16/16 - 0s - 7ms/step - loss: 1732.2678 - val_loss: 2026.8043
Epoch 12/50
16/16 - 0s - 7ms/step - loss: 1613.8970 - val_loss: 1908.8574
Epoch 13/50
16/16 - 0s - 7ms/step - loss: 1513.9452 - val_loss: 1804.7732
Epoch 14/50
16/16 - 0s - 6ms/step - lo

<keras.src.callbacks.history.History at 0x28e5e3f0680>

### Task 3 
Evaluate the model on the test data and compute the mean squared error between the predicted concrete strength and the actual concrete strength. You can use the mean_squared_error function from Scikit-learn.

In [9]:
from sklearn.metrics import mean_squared_error

predicted = model.predict(x_test)

mse = mean_squared_error(y_test, predicted)

print(round(mse, 2))

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step 
514.02


### Task 4 
Repeat steps 1 - 3, 50 times, i.e., create a list of 50 mean squared errors.


In [10]:
list_of_mse = []

for i in range(50):
    #step 1
    x_train, x_test, y_train, y_test =  train_test_split(features, target, test_size=0.3)
    #step 2
    model.fit(x_train, y_train, validation_split=0.3, epochs=50, verbose=0)
    #step 3
    predicted = model.predict(x_test)
    mse = mean_squared_error(y_test, predicted)
    list_of_mse.append(mse)

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━

### Task 5
Report the mean and the standard deviation of the mean squared errors.

In [11]:
list_of_mse = np.array(list_of_mse)

mean_of_mse = round(list_of_mse.mean(), 2)
std_of_mse = round(list_of_mse.std(),2)

print(f'The mean of the list MSE is {mean_of_mse}. The standand deviation of the list MSE is {std_of_mse}.')

The mean of the list MSE is 59.15. The standand deviation of the list MSE is 21.7.


# ___B. Normalize the data (5 marks)___ 

Repeat Part A but use a normalized version of the data. Recall that one way to normalize the data is by subtracting the mean from the individual predictors and dividing by the standard deviation.

How does the mean of the mean squared errors compare to that from Step A?

### Standardisation of fearures

In [12]:
from sklearn.preprocessing import StandardScaler

standardisation = StandardScaler()
norm_features = standardisation.fit_transform(features)


print(f'This is typical example of standardisation of fearures: {norm_features[0]}.')

This is typical example of standardisation of fearures: [ 2.47791487 -0.85688789 -0.84714393 -0.91676439 -0.62044832  0.86315424
 -1.21767004 -0.27973311].


In [13]:
#repeat the process with standardisation of fearures
norm_list_of_mse = []

for i in range(50):
    #step 1
    x_train, x_test, y_train, y_test =  train_test_split(norm_features, target, test_size=0.3)
    #step 2
    model.fit(x_train, y_train, validation_split=0.3, epochs=50, verbose=0)
    #step 3
    predicted = model.predict(x_test)
    mse = mean_squared_error(y_test, predicted)
    norm_list_of_mse.append(mse)

norm_list_of_mse = np.array(norm_list_of_mse)

# Mean of mse using standardisation of fearures
norm_mean_of_mse = round(norm_list_of_mse.mean(), 2)

# Std of mse using standardisation of fearures
norm_std_of_mse = round(norm_list_of_mse.std(),2)

print(f'The mean of the list MSE with standardisation of fearures is {norm_mean_of_mse}')
print(f'The standand deviation of the list MSE with standardisation of fearures is: {norm_std_of_mse}.')

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━

In [14]:
print('We can see than mean and standand deviation of MSE with and without standardisation of fearures')
print('Compare:')
print(f'Without standardisation: MSE: {mean_of_mse}, StD: {std_of_mse}')
print(f'With standardisation: MSE: {norm_mean_of_mse}, StD: {norm_std_of_mse}')
print('So, MSE was decreased, but StD was increased ')

We can see than mean and standand deviation of MSE with and without standardisation of fearures
Compare:
Without standardisation: MSE: 59.15, StD: 21.7
With standardisation: MSE: 53.79, StD: 63.91
So, MSE was decreased, but StD was increased 


# C. Increate the number of epochs (5 marks)

Repeat Part B but use 100 epochs this time for training.

How does the mean of the mean squared errors compare to that from Step B?

In [15]:
list_of_mse_100 = []

for i in range(50):
    #step 1
    x_train, x_test, y_train, y_test =  train_test_split(norm_features, target, test_size=0.3)
    #step 2
    model.fit(x_train, y_train, validation_split=0.3, epochs=100, verbose=0)
    #step 3
    predicted = model.predict(x_test)
    mse = mean_squared_error(y_test, predicted)
    list_of_mse_100.append(mse)

list_of_mse_100 = np.array(list_of_mse_100)

# Mean of mse using 100 epochs
mean_of_mse_100 = round(list_of_mse_100.mean(), 2)

# Std of mse using 100 epochs
std_of_mse_100 = round(list_of_mse_100.std(),2)

print(f'The mean of the list MSE with standardisation of fearures is {mean_of_mse_100}')
print(f'The standand deviation of the list MSE with standardisation of fearures is: {std_of_mse_100}.')

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━

In [16]:
print('We can see than mean and standand deviation of MSE with 50 and 100 epochs')
print('Compare:')
print(f'With 50 epochs: MSE {norm_mean_of_mse} , StD: {norm_std_of_mse}')
print(f'With 100 epochs: MSE {mean_of_mse_100} , StD: {std_of_mse_100}')
print('So, MSE and StD are decreased ')

We can see than mean and standand deviation of MSE with 50 and 100 epochs
Compare:
With 50 epochs: MSE 53.79 , StD: 63.91
With 100 epochs: MSE 29.64 , StD: 3.11
So, MSE and StD are decreased 


# D. Increase the number of hidden layers (5 marks)

Repeat part B but use a neural network with the following instead:

- Three hidden layers, each of 10 nodes and ReLU activation function.

### Building the model

In [17]:
# Creating the object ANN 
model = Sequential()

#Adding the layers
#Adding the input layer
model.add(Input(shape=(num_features,)))
#Adding the three hidden layers
model.add(Dense(10, activation='relu'))
model.add(Dense(10, activation='relu'))
model.add(Dense(10, activation='relu'))
#Adding the output layer         
model.add(Dense(1))

model.compile(optimizer = 'adam', loss = 'mean_squared_error')

In [18]:
list_of_mse_3_layers = []

for i in range(50):
    #step 1
    x_train, x_test, y_train, y_test =  train_test_split(norm_features, target, test_size=0.3)
    #step 2
    model.fit(x_train, y_train, validation_split=0.3, epochs=50, verbose=0)
    #step 3
    predicted = model.predict(x_test)
    mse = mean_squared_error(y_test, predicted)
    list_of_mse_3_layers.append(mse)

list_of_mse_3_layers = np.array(list_of_mse_3_layers)

# Mean of mse using 100 epochs
mean_of_mse_3_layers = round(list_of_mse_3_layers.mean(), 2)

# Std of mse using 100 epochs
std_of_mse_3_layers = round(list_of_mse_3_layers.std(),2)

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━

In [19]:
print('We can see than mean and standand deviation of MSE with 1 and 3 hidden layers')
print('Compare:')
print(f'With 1 hidden layer: MSE {norm_mean_of_mse} , StD: {norm_std_of_mse}')
print(f'With 3 1 hidden layers {mean_of_mse_3_layers} , StD: {std_of_mse_3_layers}')
print('So, MSE and StD are decreased ')

We can see than mean and standand deviation of MSE with 1 and 3 hidden layers
Compare:
With 1 hidden layer: MSE 53.79 , StD: 63.91
With 3 1 hidden layers 36.35 , StD: 29.12
So, MSE and StD are decreased 


In [20]:
metrics = pd.DataFrame({'MSE': [mean_of_mse, norm_mean_of_mse, mean_of_mse_100, mean_of_mse_3_layers], 
                       'StD': [std_of_mse, norm_std_of_mse, std_of_mse_100, std_of_mse_3_layers]},
                        index = ['Task A', 'Task B', 'Task C', 'Task D'])

metrics.head()

Unnamed: 0,MSE,StD
Task A,59.15,21.7
Task B,53.79,63.91
Task C,29.64,3.11
Task D,36.35,29.12


### We can conclude that in Task C we have the best model