In [45]:
#!pip install numpy==1.21.4
#!pip install pandas==1.3.4
!pip install keras==2.1.6



# Import Libraries

In [46]:
import pandas as pd
import numpy as np

import warnings
warnings.simplefilter('ignore', FutureWarning)

In [47]:
import keras 

In [48]:
#import libraries for models
from keras.models import Sequential

In [49]:
# import layers 
from keras.layers import Dense

In [50]:
#train test split
from sklearn.model_selection import train_test_split

In [51]:
#import metrics MSE
from sklearn.metrics import mean_squared_error

# Load data in panda datafrane

In [52]:
df=pd.read_csv('concrete_data.csv')
df.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [53]:
#column names
col_names= df.columns
col_names

Index(['Cement', 'Blast Furnace Slag', 'Fly Ash', 'Water', 'Superplasticizer',
       'Coarse Aggregate', 'Fine Aggregate', 'Age', 'Strength'],
      dtype='object')

# Split in predictors  and target

In [54]:
#target 
target = df['Strength']

In [55]:
#predictors
predictors =df[col_names[col_names != 'Strength']] # all columns except Strength
predictors.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


# Check if all the data is there

In [56]:
#check if all the data is there
df.isnull().count()

Cement                1030
Blast Furnace Slag    1030
Fly Ash               1030
Water                 1030
Superplasticizer      1030
Coarse Aggregate      1030
Fine Aggregate        1030
Age                   1030
Strength              1030
dtype: int64

# How many columns / features?

In [57]:
df.shape

(1030, 9)

<!-- # instantiate model -->

# compute input shape

In [32]:
#number of column / features
ncols=predictors.shape[1]
ncols

8

# Regression model function

In [58]:
def regression_model():

    # create model
    model = Sequential()
    # input layer with input shape of the pretoctors +  hidden layer with 10 nodes 
    model.add(Dense(10, activation='relu', input_shape=(ncols,)))
    model.add(Dense(1))#output
    
    # compile model usign adam optimaizer 
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

# B: Normalizing data set(Predictors)



In [59]:
#normalizing predictors
predictors_normalized= ( predictors - predictors.mean() ) / predictors.std()

# function of process 1-3 combined

In [60]:
# the step 1-3
def reg_model_process_normalized():
    #Randomly split the data into a training and test sets by holding 30% of the data for testing.
    X_train_norm, X_test_norm, y_train, y_test = train_test_split(predictors_normalized, target, test_size=0.30, random_state=42)
    #build model
    model=regression_model()  
    #fit with 50 epochs
    model.fit(X_train_norm ,y_train, validation_split=0.3, epochs=50, verbose=2)
    #predict
    y_pred=model.predict(X_test_norm)
    #compute the mean squared error between the predicted concrete strength and the actual concrete strength using the mean_squared_error
    mse = mean_squared_error(y_test, y_pred)
    return mse




In [61]:
mses_normalized=[]
i=0
for i in range(50):
    mses_normalized.append(reg_model_process_normalized())

Train on 504 samples, validate on 217 samples
Epoch 1/50
 - 2s - loss: 1636.4301 - val_loss: 1511.6440
Epoch 2/50
 - 0s - loss: 1623.0813 - val_loss: 1499.7216
Epoch 3/50
 - 0s - loss: 1610.1554 - val_loss: 1487.8621
Epoch 4/50
 - 0s - loss: 1597.4573 - val_loss: 1476.0742
Epoch 5/50
 - 0s - loss: 1584.7556 - val_loss: 1464.2788
Epoch 6/50
 - 0s - loss: 1571.7664 - val_loss: 1452.6988
Epoch 7/50
 - 0s - loss: 1559.0971 - val_loss: 1440.7689
Epoch 8/50
 - 0s - loss: 1546.1071 - val_loss: 1428.6096
Epoch 9/50
 - 0s - loss: 1532.8864 - val_loss: 1416.0301
Epoch 10/50
 - 0s - loss: 1519.2311 - val_loss: 1403.5235
Epoch 11/50
 - 0s - loss: 1505.4489 - val_loss: 1390.6468
Epoch 12/50
 - 0s - loss: 1491.4783 - val_loss: 1377.3670
Epoch 13/50
 - 0s - loss: 1476.9678 - val_loss: 1363.8371
Epoch 14/50
 - 0s - loss: 1462.1173 - val_loss: 1350.0174
Epoch 15/50
 - 0s - loss: 1447.0533 - val_loss: 1335.7797
Epoch 16/50
 - 0s - loss: 1431.5558 - val_loss: 1321.3101
Epoch 17/50
 - 0s - loss: 1416.0091


# How does the mean of the mean squared errors compare to that from Step A?

In [62]:
df_mses_normalized = pd.DataFrame(data=mses_normalized, columns=['MSE with Normalized_Predictors(StepA)'])


In [63]:
# Mean of Normalized predictor MSE - Step B
df_mses_normalized.mean()

MSE with Normalized_Predictors(StepA)    633.157048
dtype: float64

### Step A's MSE:650.679096 and MSE with Normalized_Predictors: 641.064394

Normalized number gives better score (less error)