A. Build a baseline model (5 marks)
Use the Keras library to build a neural network with the following:
- One hidden layer of 10 nodes, and a ReLU activation function
- Use the adam optimizer and the mean squared error  as the loss function.

0. Reload the Dataset

In [17]:
import pandas as pd
import warnings
# Suppress the specific warning related to input_shape/input_dim
warnings.filterwarnings('ignore', category=UserWarning, module='keras')
# Load the dataset
file_path = 'concrete_data.csv'  
data = pd.read_csv(file_path)
data.head()


Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


1. Preprocess the Data and Split into Train/Test Sets

In [18]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Features (all columns except 'Strength') and target ('Strength')
X = data.drop('Strength', axis=1)
y = data['Strength']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data: 70% training, 30% testing
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

 2. Train the model on the training data using 50 epochs.

In [23]:
from keras.models import Sequential
from keras.layers import Dense, Input

# Build the neural network model
def build_model():
    model = Sequential()
    model.add(Dense(10, input_dim=X_train.shape[1], activation='relu'))  # One hidden layer with 10 nodes
    model.add(Dense(1))  # Output layer with one node (for regression)
    model.compile(optimizer='adam', loss='mean_squared_error')  # Using Adam optimizer and MSE loss
    return model

# Train the model
model = build_model()
history = model.fit(X_train, y_train, epochs=50, verbose=0)

# Evaluate the model on the test set
loss = model.evaluate(X_test, y_test)
print(f'Mean Squared Error on Test Set: {loss}')


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 625us/step - loss: 321.2509
Mean Squared Error on Test Set: 305.8531188964844


In [None]:
3. Evaluate the model on the test data and compute the mean squared error between the predicted concrete strength and the actual concrete strength. You can use the mean_squared_error function from Scikit-learn.

In [24]:
from sklearn.metrics import mean_squared_error

# Make predictions on the test set
y_pred = model.predict(X_test)
# Calculate Mean Squared Error
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
Mean Squared Error: 305.85313725320515


4. Repeat steps 1 - 3, 50 times, i.e., create a list of 50 mean squared errors.

In [25]:
import numpy as np

mse_list = []

for i in range(50):
    # Split the data (random seed changes in each loop)
    X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=i)
    
    # Build and train the model
    model = Sequential()
    model.add(Input(shape=(X_train.shape[1],)))  # Input layer
    model.add(Dense(10, activation='relu'))  # Hidden layer with 10 nodes
    model.add(Dense(1))  # Output layer for regression
    model.compile(optimizer='adam', loss='mean_squared_error')
    
    # Train the model
    model.fit(X_train, y_train, epochs=50, verbose=0)
    
    # Evaluate the model
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_list.append(mse)

# Calculate the mean and standard deviation of the 50 MSE values
mean_mse = np.mean(mse_list)
std_mse = np.std(mse_list)

print(f'Mean MSE over 50 runs: {mean_mse}')
print(f'Standard Deviation of MSE: {std_mse}')


[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━

# B. Normalize the data
Repeat Part A but use a normalized version of the data. Recall that one way to normalize the data is by subtracting the mean from the individual predictors and dividing by the standard deviation.

In [27]:
from sklearn.preprocessing import StandardScaler
# Normalize the features (X) and target (y)
scaler_X = StandardScaler()
X_normalized = scaler_X.fit_transform(X)

mse_list_normalized = []

for i in range(50):
    # Split the data (random seed changes in each loop)
    X_train, X_test, y_train, y_test = train_test_split(X_normalized, y, test_size=0.3, random_state=i)
    # Build and train the model
    model = Sequential()
    model.add(Input(shape=(X_train.shape[1],))) 
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error')
    # Train the model
    model.fit(X_train, y_train, epochs=50, verbose=0)
    # Evaluate the model
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_list_normalized.append(mse)

# Calculate the mean and standard deviation of the 50 MSE values
mean_mse_normalized = np.mean(mse_list_normalized)
std_mse_normalized = np.std(mse_list_normalized)

print(f'Mean MSE over 50 runs (Normalized Data - Step B): {mean_mse_normalized}')
print(f'Standard Deviation of MSE (Normalized Data - Step B): {std_mse_normalized}')

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━

Comparison of Step A and Step B

In [28]:
# Output the results from Step A (already calculated in the previous cell)
print(f'Mean MSE over 50 runs (Un-normalized Data - Step A): {mean_mse}')
print(f'Standard Deviation of MSE (Un-normalized Data - Step A): {std_mse}')

# Output the results from Step B (Normalized Data)
print(f'\nMean MSE over 50 runs (Normalized Data - Step B): {mean_mse_normalized}')
print(f'Standard Deviation of MSE (Normalized Data - Step B): {std_mse_normalized}')

# Calculate and output the differences
mean_difference = mean_mse - mean_mse_normalized
std_difference = std_mse - std_mse_normalized

print(f'\nDifference in Mean MSE (Step A - Step B): {mean_difference}')
print(f'Difference in Standard Deviation of MSE (Step A - Step B): {std_difference}')

Mean MSE over 50 runs (Un-normalized Data - Step A): 368.02635609752986
Standard Deviation of MSE (Un-normalized Data - Step A): 107.21681345426178

Mean MSE over 50 runs (Normalized Data - Step B): 351.85762978212784
Standard Deviation of MSE (Normalized Data - Step B): 95.89137081179463

Difference in Mean MSE (Step A - Step B): 16.16872631540201
Difference in Standard Deviation of MSE (Step A - Step B): 11.325442642467152


# C. Increate the number of epochs (5 marks) Repeat Part B but use 100 epochs this time for training.

In [29]:
# List to store the MSE values for 100 epochs
mse_list_100_epochs = []
for i in range(50):
    # Split the normalized data (random seed changes in each loop)
    X_train, X_test, y_train, y_test = train_test_split(X_normalized, y, test_size=0.3, random_state=i)
    # Build and train the model
    model = Sequential()
    model.add(Input(shape=(X_train.shape[1],))) 
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1)) 
    model.compile(optimizer='adam', loss='mean_squared_error')
    # Train the model for 100 epochs
    model.fit(X_train, y_train, epochs=100, verbose=0) 
    # Evaluate the model
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_list_100_epochs.append(mse)

# Calculate the mean and standard deviation of the 50 MSE values (for 100 epochs)
mean_mse_100_epochs = np.mean(mse_list_100_epochs)
std_mse_100_epochs = np.std(mse_list_100_epochs)

print(f'Mean MSE over 50 runs (100 Epochs - Step C): {mean_mse_100_epochs}')
print(f'Standard Deviation of MSE (100 Epochs - Step C): {std_mse_100_epochs}')

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━

In [None]:
Comparison of Step B (50 Epochs) and Step C (100 Epochs):

In [30]:
# Output the results from Step B (Normalized Data, 50 epochs)
print(f'\nMean MSE over 50 runs (50 Epochs - Step B): {mean_mse_normalized}')
print(f'Standard Deviation of MSE (50 Epochs - Step B): {std_mse_normalized}')

# Output the results from Step C (Normalized Data, 100 epochs)
print(f'\nMean MSE over 50 runs (100 Epochs - Step C): {mean_mse_100_epochs}')
print(f'Standard Deviation of MSE (100 Epochs - Step C): {std_mse_100_epochs}')

# Calculate and output the differences
mean_difference_epochs = mean_mse_normalized - mean_mse_100_epochs
std_difference_epochs = std_mse_normalized - std_mse_100_epochs

print(f'\nDifference in Mean MSE (50 Epochs - 100 Epochs): {mean_difference_epochs}')
print(f'Difference in Standard Deviation of MSE (50 Epochs - 100 Epochs): {std_difference_epochs}')



Mean MSE over 50 runs (50 Epochs - Step B): 351.85762978212784
Standard Deviation of MSE (50 Epochs - Step B): 95.89137081179463

Mean MSE over 50 runs (100 Epochs - Step C): 167.75647889687124
Standard Deviation of MSE (100 Epochs - Step C): 18.558949378349848

Difference in Mean MSE (50 Epochs - 100 Epochs): 184.1011508852566
Difference in Standard Deviation of MSE (50 Epochs - 100 Epochs): 77.33242143344478


# D: Increase the Number of Hidden Layers to Three
Repeat part B but use a neural network with the following instead: Three hidden layers, each of 10 nodes and ReLU activation function.

In [31]:
# List to store the MSE values for 50 epochs with 3 hidden layers (Step D)
mse_list_3_layers = []

for i in range(50):
    # Split the normalized data (random seed changes in each loop)
    X_train, X_test, y_train, y_test = train_test_split(X_normalized, y, test_size=0.3, random_state=i)
    
    # Build and train the model with 3 hidden layers
    model = Sequential()
    model.add(Input(shape=(X_train.shape[1],))) 
    model.add(Dense(10, activation='relu')) 
    model.add(Dense(10, activation='relu')) 
    model.add(Dense(10, activation='relu'))  
    model.add(Dense(1)) 
    model.compile(optimizer='adam', loss='mean_squared_error')
    
    # Train the model for 50 epochs
    model.fit(X_train, y_train, epochs=50, verbose=0)
    
    # Evaluate the model
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mse_list_3_layers.append(mse)

# Calculate the mean and standard deviation of the 50 MSE values (for 3 hidden layers)
mean_mse_3_layers = np.mean(mse_list_3_layers)
std_mse_3_layers = np.std(mse_list_3_layers)

print(f'Mean MSE over 50 runs (Three Hidden Layers - Step D): {mean_mse_3_layers}')
print(f'Standard Deviation of MSE (Three Hidden Layers - Step D): {std_mse_3_layers}')

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━

In [None]:
Comparison of Step B (One Hidden Layer) and Step D (Three Hidden Layers):

In [32]:
# Output the results from Step B (One Hidden Layer, 50 epochs)
print(f'\nMean MSE over 50 runs (One Hidden Layer - Step B): {mean_mse_normalized}')
print(f'Standard Deviation of MSE (One Hidden Layer - Step B): {std_mse_normalized}')
# Output the results from Step D (Three Hidden Layers, 50 epochs)
print(f'\nMean MSE over 50 runs (Three Hidden Layers - Step D): {mean_mse_3_layers}')
print(f'Standard Deviation of MSE (Three Hidden Layers - Step D): {std_mse_3_layers}')
# Calculate and output the differences
mean_difference_layers = mean_mse_normalized - mean_mse_3_layers
std_difference_layers = std_mse_normalized - std_mse_3_layers
print(f'\nDifference in Mean MSE (One Hidden Layer - Three Hidden Layers): {mean_difference_layers}')
print(f'Difference in Standard Deviation of MSE (One Hidden Layer - Three Hidden Layers): {std_difference_layers}')


Mean MSE over 50 runs (One Hidden Layer - Step B): 351.85762978212784
Standard Deviation of MSE (One Hidden Layer - Step B): 95.89137081179463

Mean MSE over 50 runs (Three Hidden Layers - Step D): 128.67373084907055
Standard Deviation of MSE (Three Hidden Layers - Step D): 18.927269532781448

Difference in Mean MSE (One Hidden Layer - Three Hidden Layers): 223.1838989330573
Difference in Standard Deviation of MSE (One Hidden Layer - Three Hidden Layers): 76.96410127901318
