# Predict Cement Strength

In [12]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical

concrete_data = pd.read_csv('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv')

## A (baseline model)

### Explore Data

In [3]:
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [5]:
concrete_data.describe()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,35.817961
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,16.705742
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,2.33
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,23.71
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,34.445
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,46.135
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,82.6


### Identify Predictors and Targets

In [119]:
predictor_cols = concrete_data.columns[:-1]
target_cols = concrete_data.columns[-1]
print(f'features: {predictor_cols}\n\ntarget: {target_cols}')

features: Index(['Cement', 'Blast Furnace Slag', 'Fly Ash', 'Water', 'Superplasticizer',
       'Coarse Aggregate', 'Fine Aggregate', 'Age'],
      dtype='object')

target: Strength


In [120]:
predictors = concrete_data[predictor_cols]
targets = concrete_data[target_cols]

In [121]:
n_cols = len(predictor_cols)
predictors.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


In [122]:
targets.head()

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64

### Create Network

In [136]:
def network():
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error')

    return model    

### Create Train\Test Split

In [137]:
X_train, X_test, y_train, y_test = train_test_split(predictors, targets,test_size=0.3, train_size=0.7)

In [138]:
print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)

(721, 8) (309, 8)
(721,) (309,)


### Train

In [139]:
model = network()
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, verbose=1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.src.callbacks.History at 0x7f0339545300>

### Evaluate Model

In [140]:
# evaluate the model
MSE = model.evaluate(X_test, y_test, verbose=0)
print('Mean Squared Error: {}'.format(MSE))  
# model.predict(X_test) # If you want to see individual predictions  

Mean Squared Error: 158.55186462402344


In [157]:
def repeat(num_MSEs=50):
    MSEs = []
    for i in range(num_MSEs):
        print(f'Calculating MSE {i+1} of {num_MSEs}')
        X_train, X_test, y_train, y_test = train_test_split(predictors, targets,test_size=0.3, train_size=0.7)
        reg_model = network()
        reg_model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, verbose=0)
        MSE = reg_model.evaluate(X_test, y_test, verbose=0)
        print(f'MSE: {MSE}')
        MSEs.append(MSE)
    avg_MSE = sum(MSEs) / num_MSEs
    return avg_MSE, MSEs
    
num_MSEs = 50 # 50
avg_MSE, MSEs = repeat(num_MSEs)
print(f'Average MSE for {num_MSEs}: {avg_MSE}')
print(f'Standard Deviation of MSEs: {np.std(MSEs)}')

Calculating MSE 1 of 50
MSE: 1097.5281982421875
Calculating MSE 2 of 50
MSE: 253.56646728515625
Calculating MSE 3 of 50
MSE: 261.31787109375
Calculating MSE 4 of 50
MSE: 211.30838012695312
Calculating MSE 5 of 50
MSE: 1110.8089599609375
Calculating MSE 6 of 50
MSE: 629.0878295898438
Calculating MSE 7 of 50
MSE: 94.53973388671875
Calculating MSE 8 of 50
MSE: 480.4676513671875
Calculating MSE 9 of 50
MSE: 110.02056884765625
Calculating MSE 10 of 50
MSE: 350.3126525878906
Calculating MSE 11 of 50
MSE: 94.47972106933594
Calculating MSE 12 of 50
MSE: 330.2684326171875
Calculating MSE 13 of 50
MSE: 144.80242919921875
Calculating MSE 14 of 50
MSE: 127.58815002441406
Calculating MSE 15 of 50
MSE: 314.4067077636719
Calculating MSE 16 of 50
MSE: 102.36164855957031
Calculating MSE 17 of 50
MSE: 270.2301330566406
Calculating MSE 18 of 50
MSE: 112.58744049072266
Calculating MSE 19 of 50
MSE: 203.0125732421875
Calculating MSE 20 of 50
MSE: 189.75669860839844
Calculating MSE 21 of 50
MSE: 335.7880249

In [152]:
print(f'MSEs list: {MSEs}')

MSEs list: [289.27825927734375, 138.51829528808594, 160.9105682373047, 1550.7999267578125, 340.1033630371094]


## B (Normalize Data)

### Normalize Predictors

In [153]:
predictors_norm = (predictors - predictors.mean()) / predictors.std()
predictors_norm

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.795140,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.551340
3,0.491187,0.795140,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069
...,...,...,...,...,...,...,...,...
1025,-0.045623,0.487998,0.564271,-0.092126,0.451190,-1.322363,-0.065861,-0.279597
1026,0.392628,-0.856472,0.959602,0.675872,0.702285,-1.993711,0.496651,-0.279597
1027,-1.269472,0.759210,0.850222,0.521336,-0.017520,-1.035561,0.080068,-0.279597
1028,-1.168042,1.307430,-0.846733,-0.279443,0.852942,0.214537,0.191074,-0.279597


In [158]:
def repeat(num_MSEs=50):
    MSEs = []
    for i in range(num_MSEs):
        print(f'Calculating MSE {i+1} of {num_MSEs}')
        X_train_norm, X_test_norm, y_train, y_test = train_test_split(predictors_norm, targets, test_size=0.3, train_size=0.7)
        reg_model = network()
        reg_model.fit(X_train_norm, y_train, validation_data=(X_test_norm, y_test), epochs=50, verbose=0)
        MSE = reg_model.evaluate(X_test_norm, y_test, verbose=0)
        print(f'MSE: {MSE}')
        MSEs.append(MSE)
    avg_MSE = sum(MSEs) / num_MSEs
    return avg_MSE, MSEs
    
num_MSEs = 50 # 50
avg_MSE, MSEs = repeat(num_MSEs)
print(f'Average MSE for {num_MSEs}: {avg_MSE}')
print(f'Standard Deviation of MSEs: {np.std(MSEs)}')

Calculating MSE 1 of 50
MSE: 239.16571044921875
Calculating MSE 2 of 50
MSE: 383.5220947265625
Calculating MSE 3 of 50
MSE: 295.8877258300781
Calculating MSE 4 of 50
MSE: 322.47125244140625
Calculating MSE 5 of 50
MSE: 339.86761474609375
Calculating MSE 6 of 50
MSE: 259.4003601074219
Calculating MSE 7 of 50
MSE: 298.04547119140625
Calculating MSE 8 of 50
MSE: 298.1614990234375
Calculating MSE 9 of 50
MSE: 286.5115051269531
Calculating MSE 10 of 50
MSE: 313.9725341796875
Calculating MSE 11 of 50
MSE: 285.58587646484375
Calculating MSE 12 of 50
MSE: 341.1405334472656
Calculating MSE 13 of 50
MSE: 295.13653564453125
Calculating MSE 14 of 50
MSE: 287.7839050292969
Calculating MSE 15 of 50
MSE: 337.8100891113281
Calculating MSE 16 of 50
MSE: 355.80804443359375
Calculating MSE 17 of 50
MSE: 378.502197265625
Calculating MSE 18 of 50
MSE: 419.2716369628906
Calculating MSE 19 of 50
MSE: 511.1379089355469
Calculating MSE 20 of 50
MSE: 306.6430969238281
Calculating MSE 21 of 50
MSE: 412.862701416

## C (Increase epochs)

### Increase epochs

In [159]:
epochs = 100

In [None]:
def repeat(num_MSEs=50, epochs=50):
    MSEs = []
    for i in range(num_MSEs):
        print(f'Calculating MSE {i+1} of {num_MSEs}')
        X_train_norm, X_test_norm, y_train, y_test = train_test_split(predictors_norm, targets, test_size=0.3, train_size=0.7)
        reg_model = network()
        reg_model.fit(X_train_norm, y_train, validation_data=(X_test_norm, y_test), epochs=epochs, verbose=0)
        MSE = reg_model.evaluate(X_test_norm, y_test, verbose=0)
        print(f'MSE: {MSE}')
        MSEs.append(MSE)
    avg_MSE = sum(MSEs) / num_MSEs
    return avg_MSE, MSEs
    
num_MSEs = 50 # 50
avg_MSE, MSEs = repeat(num_MSEs, epochs=epochs)
print(f'Average MSE for {num_MSEs}: {avg_MSE}')
print(f'Standard Deviation of MSEs: {np.std(MSEs)}')

## D (Hidden Layers)

### Hidden Layers

In [163]:
def network():
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mean_squared_error')

    return model    


def repeat(num_MSEs=50, epochs=50):
    MSEs = []
    for i in range(num_MSEs):
        print(f'Calculating MSE {i+1} of {num_MSEs}')
        X_train_norm, X_test_norm, y_train, y_test = train_test_split(predictors_norm, targets, test_size=0.3, train_size=0.7)
        reg_model = network()
        reg_model.fit(X_train_norm, y_train, validation_data=(X_test_norm, y_test), epochs=epochs, verbose=0)
        MSE = reg_model.evaluate(X_test_norm, y_test, verbose=0)
        print(f'MSE: {MSE}')
        MSEs.append(MSE)
    avg_MSE = sum(MSEs) / num_MSEs
    return avg_MSE, MSEs
    
num_MSEs = 50 # 50
epochs = 50
avg_MSE, MSEs = repeat(num_MSEs, epochs=epochs)
print(f'Average MSE for {num_MSEs}: {avg_MSE}')
print(f'Standard Deviation of MSEs: {np.std(MSEs)}')

Calculating MSE 1 of 50
MSE: 133.46131896972656
Calculating MSE 2 of 50
MSE: 173.45700073242188
Calculating MSE 3 of 50
MSE: 134.04684448242188
Calculating MSE 4 of 50
MSE: 129.70494079589844
Calculating MSE 5 of 50
MSE: 112.15840148925781
Calculating MSE 6 of 50
MSE: 129.77734375
Calculating MSE 7 of 50
MSE: 126.45160675048828
Calculating MSE 8 of 50
MSE: 146.4435272216797
Calculating MSE 9 of 50
MSE: 146.7511749267578
Calculating MSE 10 of 50
MSE: 138.53726196289062
Calculating MSE 11 of 50
MSE: 144.56845092773438
Calculating MSE 12 of 50
MSE: 134.08062744140625
Calculating MSE 13 of 50
MSE: 129.0662078857422
Calculating MSE 14 of 50
MSE: 139.9160919189453
Calculating MSE 15 of 50
MSE: 135.40008544921875
Calculating MSE 16 of 50
MSE: 151.23500061035156
Calculating MSE 17 of 50
MSE: 137.7567901611328
Calculating MSE 18 of 50
MSE: 97.76695251464844
Calculating MSE 19 of 50
MSE: 150.49826049804688
Calculating MSE 20 of 50
MSE: 119.78180694580078
Calculating MSE 21 of 50
MSE: 140.0752105