# Load the dataset

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import pandas as pd
import numpy as np

df = pd.read_csv('concrete_data.csv')
df.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [3]:
df.shape  #check Number of datapoints

(1030, 9)

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1030 entries, 0 to 1029
Data columns (total 9 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Cement              1030 non-null   float64
 1   Blast Furnace Slag  1030 non-null   float64
 2   Fly Ash             1030 non-null   float64
 3   Water               1030 non-null   float64
 4   Superplasticizer    1030 non-null   float64
 5   Coarse Aggregate    1030 non-null   float64
 6   Fine Aggregate      1030 non-null   float64
 7   Age                 1030 non-null   int64  
 8   Strength            1030 non-null   float64
dtypes: float64(8), int64(1)
memory usage: 72.5 KB


there is no null values

# Split data into X and y

In [5]:
X = df.drop('Strength', axis=1)
y = df['Strength']

#  normalize the data by substracting the mean and dividing by the standard deviation

In [6]:
X_norm= (X - X.mean()) / X.std()
X_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


# Use the Keras library to build a neural network with the following:

- Three hidden layers, each of 10 nodes and ReLU activation function.

- Use the adam optimizer and the mean squared error  as the loss function

In [22]:
import keras
from keras.models import Sequential
from keras.layers import Dense

def ANN_model():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(X.shape[1],)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

#  split the data into a training and test sets by holding 30% of the data for testing

In [18]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_norm, y, test_size=0.3, random_state=42)


#  call the function now to create our model. 

In [23]:
model = ANN_model()
epochs = 50
model.fit(X_train, y_train, epochs=epochs, verbose=1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x1f92a483e80>

#  Evaluate the model on the test data 

In [24]:
loss_val = model.evaluate(X_test, y_test)
y_pred = model.predict(X_test)
loss_val



132.2569580078125

In [25]:
from sklearn.metrics import mean_squared_error
mean_square_error = mean_squared_error(y_test, y_pred)
mean = np.mean(mean_square_error)
standard_deviation = np.std(mean_square_error)
print(mean, standard_deviation)

132.25696130721505 0.0


In [26]:
# Number of repetitions
repetitions = 50

# Lists to store mean squared errors
mse_list = []

# Repeat the process 50 times
for _ in range(repetitions):
    # Split the data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X_norm, y, test_size=0.3, random_state=42)

    model = ANN_model()
    # Train the model
    model.fit(X_train, y_train, epochs=50, verbose=0)

    # Predict on the test set
    y_pred = model.predict(X_test)

    # Calculate mean squared error and append to the list
    mse = mean_squared_error(y_test, y_pred)
    mse_list.append(mse)




#  mean and the standard deviation of the mean squared errors 

In [27]:
mean_squared_errors = np.array(mse_list)
mean = np.mean(mean_squared_errors)
standard_deviation = np.std(mse_list)

print("Below is the mean and standard deviation of the Normalized Data " + "\n")

print("Mean: "+str(mean))
print("Standard Deviation: "+str(standard_deviation))

Below is the mean and standard deviation of the Normalized Data 

Mean: 124.34538037998668
Standard Deviation: 13.810829624819563
