## Predicting Concrete Compressive Strength Using Deep Learning: A Regression Approach with Keras

In [None]:
# All Libraries required for this lab are listed below.

!pip install numpy==2.0.2
!pip install pandas==2.2.2
!pip install tensorflow_cpu==2.18.0

In [28]:
import pandas as pd
import numpy as np
import keras

In [None]:
filepath='data/concrete_data.csv'
concrete_data = pd.read_csv(filepath)

concrete_data.head()

In [None]:
concrete_data.shape

In [None]:
concrete_data.describe()


In [None]:
concrete_data.isnull().sum()

In [33]:
concrete_data_columns = concrete_data.columns

In [34]:
predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']] # all columns except Strength
target = concrete_data['Strength'] # Strength column

In [None]:
predictors.head()

In [None]:
target.head()

In [None]:
predictors_norm = (predictors - predictors.mean()) / predictors.std()
predictors_norm.head()

In [38]:
n_cols = predictors_norm.shape[1] # number of predictors

##  Import Keras Packages

In [39]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Input

## Build a Neural Network

In [40]:
# define regression model
def regression_model():
    # create model
    model = Sequential()
    model.add(Input(shape=(n_cols,)))  # Input layer
    model.add(Dense(10, activation='relu'))  # One hidden layer with 10 nodes and ReLU activation
    model.add(Dense(1))  # Output layer for regression (single output node)
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')  # Adam optimizer and MSE loss function
    return model

## Train and Test the Network

In [41]:
# build the model
baseline_model = regression_model()

In [None]:
# fit the model
baseline_model.fit(predictors_norm, target, validation_split=0.3, epochs=100, verbose=2)

## Randomly split the data into a training and test sets by holding 30% of the data for testing

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3, random_state=42)

# Fit the model on the training data
baseline_model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, verbose=2)

In [44]:
# Function to train and evaluate the model
def train_and_evaluate():
    # Split the data into training and test sets (30% test set)
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3, random_state=None)
    
    # Build the model
    model = regression_model()
    
    # Train the model on the training data
    model.fit(X_train, y_train, epochs=50, verbose=0)  # Train for 50 epochs
    
    # Predict on the test set
    predictions = model.predict(X_test)
    
    # Compute the mean squared error
    mse = mean_squared_error(y_test, predictions)
    return mse

In [None]:
# Repeat the process 50 times
mse_list = [train_and_evaluate() for _ in range(50)]

# Compute the mean and standard deviation of the MSEs
mean_mse = np.mean(mse_list)
std_mse = np.std(mse_list)

# Report the results
print(f"Mean of Mean Squared Errors: {mean_mse}")
print(f"Standard Deviation of Mean Squared Errors: {std_mse}")

## B. Normalize the data 

In [46]:
from sklearn.preprocessing import StandardScaler

In [None]:
# Normalize the data
scaler = StandardScaler()
predictors_norm = scaler.fit_transform(predictors)  # Normalize predictors

# Function to train and evaluate the model on normalized data
def train_and_evaluate_normalized():
    # Split the data into training and test sets (30% test set)
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3, random_state=None)
    
    # Build the model
    model = regression_model()
    
    # Train the model on the training data
    model.fit(X_train, y_train, epochs=50, verbose=0)  # Train for 50 epochs
    
    # Predict on the test set
    predictions = model.predict(X_test)
    
    # Compute the mean squared error
    mse = mean_squared_error(y_test, predictions)
    return mse

# Repeat the process 50 times with normalized data
mse_list_normalized = [train_and_evaluate_normalized() for _ in range(50)]

# Compute the mean and standard deviation of the MSEs for normalized data
mean_mse_normalized = np.mean(mse_list_normalized)
std_mse_normalized = np.std(mse_list_normalized)

# Report the results
print(f"Mean of Mean Squared Errors (Normalized): {mean_mse_normalized}")
print(f"Standard Deviation of Mean Squared Errors (Normalized): {std_mse_normalized}")

#### The normalized data yields a more accurate model with a mean MSE approximately 7.59% lower than the non-normalized data.

## C. Increase the number of epochs to 100

In [None]:
# Function to train and evaluate the model on normalized data
def train_and_evaluate_normalized_100_epochs():
    # Split the data into training and test sets (30% test set)
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3, random_state=None)
    
    # Build the model
    model = regression_model()
    
    # Train the model on the training data
    model.fit(X_train, y_train, epochs=100, verbose=0)  # Train for 100 epochs
    
    # Predict on the test set
    predictions = model.predict(X_test)
    
    # Compute the mean squared error
    mse = mean_squared_error(y_test, predictions)
    return mse

# Repeat the process 50 times with normalized data and 100 epochs
mse_list_normalized_100_epochs = [train_and_evaluate_normalized_100_epochs() for _ in range(50)]

# Compute the mean and standard deviation of the MSEs for normalized data
mean_mse_normalized_100_epochs = np.mean(mse_list_normalized_100_epochs)
std_mse_normalized_100_epochs = np.std(mse_list_normalized_100_epochs)

# Report the results
print(f"Mean of Mean Squared Errors (Normalized, 100 Epochs): {mean_mse_normalized_100_epochs}")
print(f"Standard Deviation of Mean Squared Errors (Normalized, 100 Epochs): {std_mse_normalized_100_epochs}")

#### Increasing the number of epochs from 50 to 100 led to a substantial reduction in mean MSE (approximately 54.18% improvement). This shows that additional training epochs helped the model learn more effectively, but care should be taken to ensure further increases do not lead to overfitting.

## D. Increase the number of hidden layers

In [49]:
def regression_model_three_layers():
    model = Sequential()
    model.add(Input(shape=(predictors_norm.shape[1],)))  # Input layer
    model.add(Dense(10, activation='relu'))  # First hidden layer
    model.add(Dense(10, activation='relu'))  # Second hidden layer
    model.add(Dense(10, activation='relu'))  # Third hidden layer
    model.add(Dense(1))  # Output layer for regression
    
    # Compile the model with the Adam optimizer and mean squared error loss function
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [None]:
# Function to train and evaluate the model on normalized data
def train_and_evaluate_normalized_100_epochs_three_layers():
    # Split the data into training and test sets (30% test set)
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3, random_state=None)
    
    # Build the model
    model = regression_model_three_layers()
    
    # Train the model on the training data
    model.fit(X_train, y_train, epochs=100, verbose=0)  # Train for 100 epochs
    
    # Predict on the test set
    predictions = model.predict(X_test)
    
    # Compute the mean squared error
    mse = mean_squared_error(y_test, predictions)
    return mse

# Repeat the process 50 times with normalized data and 100 epochs
mse_list_normalized_100_epochs_three_layers = [train_and_evaluate_normalized_100_epochs_three_layers() for _ in range(50)]

# Compute the mean and standard deviation of the MSEs for normalized data
mean_mse_normalized_100_epochs_three_layers = np.mean(mse_list_normalized_100_epochs_three_layers)
std_mse_normalized_100_epochs_three_layers = np.std(mse_list_normalized_100_epochs_three_layers)

# Report the results
print(f"Mean of Mean Squared Errors (Normalized, 100 Epochs, 3 Layers): {mean_mse_normalized_100_epochs_three_layers}")
print(f"Standard Deviation of Mean Squared Errors (Normalized, 100 Epochs, 3 Layers): {std_mse_normalized_100_epochs_three_layers}")

#### The Mean Squared Error decreased from 363.039 (50 epochs, 1 layer) to 90.310 (100 epochs, 3 layers), reflecting a 75.12% improvement. This demonstrates that a deeper neural network trained for more epochs can significantly enhance the predictive power of the model in regression tasks. However, monitoring validation metrics is crucial to avoid overfitting with deeper networks and longer training.