In [1]:
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical

import pandas as pd
import numpy as np
import statistics
import matplotlib.pyplot as plt
from matplotlib.ticker import NullFormatter
import itertools
import matplotlib.ticker as ticker
%matplotlib inline

from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn import metrics

Using TensorFlow backend.


In [2]:
# Reading the data

data = pd.read_csv('concrete_data.csv')
data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [3]:
# Null Counts

data.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

In [4]:
# Data Shape

data.shape

(1030, 9)

In [5]:
# Data description

data.describe()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,35.817961
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,16.705742
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,2.33
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,23.71
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,34.445
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,46.135
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,82.6


In [6]:
# Splitting the data into features and target variables

concrete_data_columns = data.columns

X = data[concrete_data_columns[concrete_data_columns != 'Strength']] # all columns except Strength
y = data['Strength'] # Strength column

In [7]:
n_cols = X.shape[1] # number of predictors

In [8]:
X.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


# PART A - Build a baseline model

## Dynamic model for Parts - A,B,C,D 

In [9]:
# Repeating the steps 1-3 50 times
# Build model by specifying epochs, hidden layers and nodes
# Using ADAM optimizer and mean squared error loss with metrics to evaluate - accuracy
# Using rectified linear activation function

def deep_model(epochs, hidden_layers, nodes):
    mse = []
    model = Sequential()
    # Input layer
    model.add(Dense(5, activation = 'relu', input_shape = (n_cols,)))
    # Hidden layer
    for layer in range(0, int(hidden_layers)):
        model.add(Dense(int(nodes), activation = 'relu'))
    # Output layer
    model.add(Dense(1, activation = 'relu'))
        
    for i in range (0, 50):
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 4)
        # Compiling the model
        model.compile(optimizer = 'adam', loss = 'mse', metrics = ['accuracy'])
        # Fitting the model
        model.fit(X_train, y_train, epochs = epochs, verbose = 0)
        # Making predictions on test data
        y_pred = model.predict(X_test)
        # Calculating mean squared error between actual and predicted output
        mse.append(mean_squared_error(y_test, y_pred))
    return mse

# Part A - Results

### No normalization, 10 nodes in hidden layer, 50 epochs, 70-30 split, ADAM optimizer, loss - 'mse'

In [10]:
# Model with 50 epochs, 1 hidden layer, 10 nodes in hidden layer
A = deep_model(50, 1, 10)

W0518 00:05:53.926729 13960 deprecation_wrapper.py:119] From C:\ProgramData\Anaconda3\envs\deepai\lib\site-packages\keras\backend\tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0518 00:05:53.952297 13960 deprecation_wrapper.py:119] From C:\ProgramData\Anaconda3\envs\deepai\lib\site-packages\keras\backend\tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0518 00:05:53.957310 13960 deprecation_wrapper.py:119] From C:\ProgramData\Anaconda3\envs\deepai\lib\site-packages\keras\backend\tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0518 00:05:54.026998 13960 deprecation_wrapper.py:119] From C:\ProgramData\Anaconda3\envs\deepai\lib\site-packages\keras\optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

W0518 00:05:54.447614 13960 depr

In [11]:
print(A)
print('Mean of the 50 mean squared values is: ', statistics.mean(A))
print('Standard deviation of the 50 mean squared values is: ', statistics.stdev(A))

[120.40174333143992, 119.58039242072309, 124.93540963751859, 116.15032766527676, 115.46008428994139, 115.3323405516632, 116.89720295101627, 124.41677611769818, 103.61566845687615, 95.29378829634535, 93.33303467679197, 93.521451540664, 88.11340830605148, 87.92756248935, 88.60022290150704, 87.56772657494648, 90.52108871959055, 87.58057521677067, 87.70157025165, 91.7943769263405, 87.9348998666848, 87.35547232062653, 90.07011503702843, 86.9174346265975, 91.3153520556695, 86.60066679728683, 86.4177672719714, 84.73354980013772, 78.91353578181119, 71.3564455244895, 69.92334267264043, 68.26158067030889, 66.5526646316043, 61.84913427759023, 57.07851193877983, 54.257322936991216, 51.38239972543208, 49.69673445651606, 48.586293394617265, 48.878597709932166, 48.76285348642812, 47.69769991475013, 46.97124622224978, 47.18275185422689, 48.22216970173637, 47.23005405647586, 48.8421831289067, 47.25762387351184, 46.71574277292711, 48.59237327035506]
Mean of the 50 mean squared values is:  79.08602542200

# Part B - Using normalized version of data in X

In [12]:
# Model with 50 epochs, 1 hidden layer, 10 nodes in hidden layer and NORMALIZED data

X= preprocessing.StandardScaler().fit(X).transform(X)
B = deep_model(50, 1, 10)

In [13]:
print(B)
print('Mean of the 50 mean squared values is: ', statistics.mean(B))
print('Standard deviation of the 50 mean squared values is: ', statistics.stdev(B))

[183.12820774540745, 144.01187541189296, 125.25752288478814, 104.87452239709569, 80.21793064029384, 51.36884444962975, 45.85585882960775, 44.5341971562685, 43.720876293540286, 42.936776569180445, 42.09694617380269, 41.748371987277, 41.648552910376665, 41.68121533228257, 41.75695064870134, 41.50243562575346, 41.506070175979254, 41.947713220783264, 42.2117663930607, 42.748887782008595, 43.04718803694988, 42.8350444029872, 42.88347096972558, 42.979351382056926, 43.02047407352882, 42.98489401402928, 42.82017083480734, 42.839252740719, 42.53375381336428, 42.88189203209825, 42.92271979932429, 43.13971507470601, 42.9924744894949, 43.0218451234622, 43.074665563073914, 43.004753427040356, 42.860694253198325, 42.978999497113165, 42.98631435318016, 42.80232640252092, 42.76997201034852, 42.82751892109299, 42.16959904755944, 42.003010440736155, 42.14154716916302, 42.215770613019906, 41.97964671572024, 41.978035501664294, 42.073464756300126, 42.04832352990923]
Mean of the 50 mean squared values is: 

# Part C - Using 100 epochs, normalized version as in part B

In [14]:
# Model with 100 epochs, 1 hidden layer, 10 nodes in hidden layer and NORMALIZED data

C = deep_model(100, 1, 10)

In [15]:
print(C)
print('Mean of the 50 mean squared values is: ', statistics.mean(C))
print('Standard deviation of the 50 mean squared values is: ', statistics.stdev(C))

[133.77563786959362, 86.30963407371623, 48.20862047109878, 45.362296878041036, 43.929379715580374, 43.047918187334346, 43.29908745237857, 42.913190338761616, 42.94471743766458, 43.078064511676196, 42.74582585514485, 42.71165939076556, 43.54368718660766, 43.66623360625244, 43.873647282167965, 43.74150194377218, 44.056635909232696, 43.98193363438435, 43.90032036022947, 43.86335839390571, 43.75055545245054, 43.90110675477446, 43.821229180822904, 43.875283129940115, 43.84259387366033, 43.817079219835094, 43.898266934376664, 43.982609335689766, 43.89645730502285, 44.0609782076701, 44.17268333018463, 44.03301712720115, 44.2480407734384, 44.2488215890341, 44.28830457299446, 44.3509927963913, 44.29804834938841, 44.45181633096316, 44.20681226609291, 44.353022905205, 44.403674689964475, 44.55810731467934, 44.39258628482559, 44.53690394519882, 44.4070559203296, 44.48814421596825, 44.45212671004311, 44.45780276014926, 44.43776290413315, 44.45806045065717]
Mean of the 50 mean squared values is:  46

# Part D - 3 hidden layers with 10 nodes each and 100 epochs

In [16]:
# Model with 100 epochs, 3 hidden layer, 10 nodes in hidden layer and NORMALIZED data

D = deep_model(100, 3, 10)

In [17]:
print(D)
print('Mean of the 50 mean squared values is: ', statistics.mean(D))
print('Standard deviation of the 50 mean squared values is: ', statistics.stdev(D))

[128.42685829908245, 65.45586684738362, 46.71365336319597, 38.976332880126634, 37.666032220363235, 38.17997680138321, 37.13070795780619, 38.27433011588387, 37.34790940571843, 36.57042442366691, 36.39581998844115, 36.79137006525362, 36.45437685517406, 36.412303787569336, 36.49258568695622, 36.27058179348957, 35.48173106730136, 35.67390292545351, 35.67324316538575, 35.33624804680893, 35.577909526543614, 36.13203572485806, 35.4660188460776, 35.34628764190403, 34.81996984786653, 35.21163991109687, 33.98652786035466, 34.008129675210476, 34.02010588742941, 33.63388075856855, 33.783374986377176, 34.118767727266956, 33.585764712625426, 33.91899022500759, 33.9911149486202, 33.77668510490143, 33.86868396584175, 33.87957862505754, 33.22981273006945, 32.601226640241755, 32.81990343475184, 32.41683691907132, 32.29825831575743, 32.78832847746887, 33.01446714508953, 32.321052626840924, 31.922299532252005, 32.41685256792165, 32.347997683985994, 32.209950458262284]
Mean of the 50 mean squared values is