In [63]:
import copy

import pandas as pd
import numpy as np
import tensorflow

from keras import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split

# Read in, Normalize and Split Data

In [64]:
target_variable = 'Strength'

In [65]:
df_cement = pd.read_csv('concrete_data.csv')

print(sorted(list(df_cement)))

['Age', 'Blast Furnace Slag', 'Cement', 'Coarse Aggregate', 'Fine Aggregate', 'Fly Ash', 'Strength', 'Superplasticizer', 'Water']


In [66]:

def normalize(data_to_normalize):
    data_to_normalize_scaled = copy.copy(data_to_normalize)
    for feature in data_to_normalize.columns:
        mean_value_of_feature = np.mean(data_to_normalize[feature].values)
        std_dev_value_of_feature = np.std(data_to_normalize[feature].values)

        data_to_normalize_scaled[feature] = (data_to_normalize[feature] - mean_value_of_feature) / std_dev_value_of_feature

    return data_to_normalize_scaled

In [67]:
# Create a features and target dataframe, where df_features does not contain target variable and df_target contains only target variable
df_features = df_cement.drop(target_variable, axis=1)

df_target = df_cement[target_variable]

df_features_normalized = normalize(data_to_normalize=df_features)
df_features_normalized

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.477915,-0.856888,-0.847144,-0.916764,-0.620448,0.863154,-1.217670,-0.279733
1,2.477915,-0.856888,-0.847144,-0.916764,-0.620448,1.056164,-1.217670,-0.279733
2,0.491425,0.795526,-0.847144,2.175461,-1.039143,-0.526517,-2.240917,3.553066
3,0.491425,0.795526,-0.847144,2.175461,-1.039143,-0.526517,-2.240917,5.057677
4,-0.790459,0.678408,-0.847144,0.488793,-1.039143,0.070527,0.647884,4.978487
...,...,...,...,...,...,...,...,...
1025,-0.045645,0.488235,0.564545,-0.092171,0.451410,-1.323005,-0.065893,-0.279733
1026,0.392819,-0.856888,0.960068,0.676200,0.702626,-1.994680,0.496893,-0.279733
1027,-1.270088,0.759579,0.850635,0.521589,-0.017528,-1.036064,0.080107,-0.279733
1028,-1.168610,1.308065,-0.847144,-0.279579,0.853356,0.214641,0.191166,-0.279733


In [68]:
#Obtain split data in dataframe format, by sending df_features and df_target to helper function
df_features_train, df_features_test, df_target_train, df_target_test = train_test_split(df_features_normalized, df_target, test_size=0.3)

# Build Keras Model

In [69]:
input_shape = (df_features_train.shape[1],)

## Create Model Architecture

In [70]:
model = Sequential()
#Single hidden layer, 
model.add(Dense(10, input_shape=input_shape, activation="relu"))
model.add(Dense(1))

# Compile and Fit Model

In [71]:
model.compile(optimizer='adam', loss='mse', metrics=['mse'])

In [72]:
model.fit(df_features_train, df_target_train, epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x29d57d450>

In [73]:
results = model.evaluate(x=df_features_test, y=df_target_test, return_dict=True)
print(results['mse'])

279.9909362792969


# Retrain Model Split and Train 50 Times

In [74]:
ls_mse_values = []

for iteration in range(0, 50):
    #Obtain split data in dataframe format, by sending df_features and df_target to helper function
    df_features_train, df_features_test, df_target_train, df_target_test = train_test_split(df_features, df_target, test_size=0.3)

    # Create a features and target dataframe, where df_features does not contain target variable and df_target contains only target variable
    df_features = df_cement.drop(target_variable, axis=1)

    df_target = df_cement[target_variable]

    df_features_normalized = normalize(data_to_normalize=df_features)

    #Obtain split data in dataframe format, by sending df_features and df_target to helper function
    df_features_train, df_features_test, df_target_train, df_target_test = train_test_split(df_features, df_target, test_size=0.3)

    input_shape = (df_features_train.shape[1],)

    #Create Model
    model = Sequential()
    #Single hidden layer, 
    model.add(Dense(10, input_shape=input_shape, activation="relu"))
    model.add(Dense(1))

    model.compile(optimizer='adam', loss='mse', metrics=['mse'])

    model.fit(df_features_train, df_target_train, epochs=50, verbose=0)

    results = model.evaluate(x=df_features_test, y=df_target_test, return_dict=True)

    ls_mse_values.append(results['mse'])



## Get Mean and Standard Deviations of 50 Runs

In [77]:
avg_mse = np.mean(np.array(ls_mse_values))
std_dev_mse = np.std(np.array(ls_mse_values))

print("For the 50 runs, the mean MSE is 344.12 with a standard deviation of 361.04")
print("For the 50 runs, the mean MSE is {:.2f} with a standard deviation of {:.2f}".format(avg_mse, std_dev_mse))

For the 50 runs, the mean MSE is 344.12 with a standard deviation of 361.04
For the 50 runs, the mean MSE is 293.03 with a standard deviation of 374.86
