#Building a Regression model with keras library

### Importing the required lib and preprocessing the data

In [2]:
#i am importing all the imports required for the upcoming code
import numpy as np
import pandas as pd

In [4]:
concrete_data = pd.read_csv('concrete_data.csv')
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [5]:
#describing the data
concrete_data.describe()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,35.817961
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,16.705742
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,2.33
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,23.71
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,34.445
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,46.135
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,82.6


In [6]:
concrete_data.shape

(1030, 9)

In [7]:
#checking if there is any null values
concrete_data.isnull().sum()

Cement                0
Blast Furnace Slag    0
Fly Ash               0
Water                 0
Superplasticizer      0
Coarse Aggregate      0
Fine Aggregate        0
Age                   0
Strength              0
dtype: int64

As the data is clean without any empty values lets go ahead and start working on spliting out data into predictors and targets

In [8]:
concrete_data_cols = concrete_data.columns
predictors = concrete_data[concrete_data_cols[concrete_data_cols != 'Strength']]
target = concrete_data['Strength']

# Using the sklearn model to split the test and train model

In [10]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(predictors,target, test_size=0.30, random_state=8)

## Building the Model

In [17]:
#Import required from keras
from keras.models import Sequential
from keras.layers import Dense

#Creating the model
def regression_model():
    model = Sequential()
    model.add(Dense(10, activation = 'relu', input_shape = (predictors.shape[1],)))
    model.add(Dense(1))
    model.compile(optimizer = 'adam', loss = 'mean_squared_error')
    return model

model = regression_model()

#Fit the model
fitted = model.fit(X_train, y_train, validation_data = (X_test, y_test),epochs = 50, verbose = 0)

#Puting loss into Dataframe
loss = pd.DataFrame(fitted.history)

##Printing the result of the regression model

In [31]:
#printing the results
print('train [standard deviation / mean]:',round(loss['loss'].std(),2),'/',round(loss['loss'].mean(),2),
      '&','test [standart deviation / mean]:',round(loss['val_loss'].std(),2),'/',round(loss['val_loss'].mean(),2))

train [standard deviation / mean]: 6147.56 / 1583.3 & test [standart deviation / mean]: 3842.21 / 1070.63


#2.Normilizing the data

In [29]:
#Normalize predictors
TrainNorm = (X_train - X_train.mean())/X_train.std()
TestNorm = (X_test - X_test.mean())/X_test.std()

#Fit model on normalized data
fittedNorm = model.fit(TrainNorm, y_train,
                   validation_data = (TestNorm, y_test),
                   epochs = 50, verbose = 0)

dfNorm = pd.DataFrame(fittedNorm.history)

In [32]:
#printing result of the normilized data
print('train [standard deviation / mean]:',round(dfNorm['loss'].std(),2),'/',round(dfNorm['loss'].mean(),2),
      '&','test [standart deviation / mean]:',round(dfNorm['val_loss'].std(),2),'/',round(dfNorm['val_loss'].mean(),2))


train [standard deviation / mean]: 12.64 / 130.88 & test [standart deviation / mean]: 12.36 / 148.08


#3.Increate the number of epochs

In [34]:
#changing the epochs size to 100
fittedNorm_100 = model.fit(TrainNorm, y_train,
                   validation_data = (TestNorm, y_test),
                   epochs = 100, verbose = 0)
#now storing the history into a variable
dfNorm_100 = pd.DataFrame(fittedNorm_100.history)


In [35]:
#printing the result of the program with epochs_100
print('train [standard deviation / mean]:',round(dfNorm_100['loss'].std(),2),'/',round(dfNorm_100['loss'].mean(),2),
      '&','test [standart deviation / mean]:',round(dfNorm_100['val_loss'].std(),2),'/',round(dfNorm_100['val_loss'].mean(),2))


train [standard deviation / mean]: 4.16 / 69.58 & test [standart deviation / mean]: 4.58 / 86.64


#4.Increase the number of hidden layers

In [36]:
def regression_model():
    model_2 = Sequential()
    model_2.add(Dense(10, activation = 'relu', input_shape = (predictors.shape[1],)))
    model_2.add(Dense(10, activation = 'relu'))
    model_2.add(Dense(10, activation = 'relu'))
    model_2.add(Dense(1))
    model_2.compile(optimizer = 'adam', loss = 'mean_squared_error')
    return model_2

model_2 = regression_model()

#Fit the model
fitted_model_2 = model_2.fit(X_train, y_train, validation_data = (X_test, y_test),epochs = 50, verbose = 0)

#Puting loss into Dataframe
loss_model_2 = pd.DataFrame(fitted_model_2.history)

In [38]:
#printing the results of the model built
print('train [standard deviation / mean]:',round(loss_model_2['loss'].std(),2),'/',round(loss_model_2['loss'].mean(),2),
      '&','test [standart deviation / mean]:',round(loss_model_2['val_loss'].std(),2),'/',round(loss_model_2['val_loss'].mean(),2))


train [standard deviation / mean]: 7705.25 / 1555.62 & test [standart deviation / mean]: 2951.72 / 752.71
