# Keras Project

import, data load and processing, initialize the first model

In [1]:
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.utils import to_categorical
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import numpy as np

Using TensorFlow backend.


In [2]:
#load the data
concrete_data = pd.read_csv('https://cocl.us/concrete_data')
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [3]:
#split into features and target
concrete_data_columns = concrete_data.columns
features = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']] # all columns except Strength
target = concrete_data['Strength']

In [4]:
#init the model as per instruction
def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(8,)))
    #1 hidden layer with 10 nodes as per instruction
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

### First let's do it manually to see if everything works

In [5]:
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.3)

In [6]:
model = regression_model()

In [7]:
model.fit(X_train, y_train, epochs=50, verbose=2)

Epoch 1/50
 - 0s - loss: 30784.5894
Epoch 2/50
 - 0s - loss: 15551.0500
Epoch 3/50
 - 0s - loss: 13056.3726
Epoch 4/50
 - 0s - loss: 10937.3023
Epoch 5/50
 - 0s - loss: 9267.1968
Epoch 6/50
 - 0s - loss: 8018.7064
Epoch 7/50
 - 0s - loss: 7063.0099
Epoch 8/50
 - 0s - loss: 6297.5538
Epoch 9/50
 - 0s - loss: 5642.6012
Epoch 10/50
 - 0s - loss: 5077.2302
Epoch 11/50
 - 0s - loss: 4567.1308
Epoch 12/50
 - 0s - loss: 4127.1998
Epoch 13/50
 - 0s - loss: 3695.7099
Epoch 14/50
 - 0s - loss: 3303.1441
Epoch 15/50
 - 0s - loss: 2955.5620
Epoch 16/50
 - 0s - loss: 2626.0471
Epoch 17/50
 - 0s - loss: 2337.8230
Epoch 18/50
 - 0s - loss: 2113.1054
Epoch 19/50
 - 0s - loss: 1890.5546
Epoch 20/50
 - 0s - loss: 1715.4171
Epoch 21/50
 - 0s - loss: 1566.7228
Epoch 22/50
 - 0s - loss: 1435.3728
Epoch 23/50
 - 0s - loss: 1314.0665
Epoch 24/50
 - 0s - loss: 1209.2034
Epoch 25/50
 - 0s - loss: 1119.1495
Epoch 26/50
 - 0s - loss: 1024.0677
Epoch 27/50
 - 0s - loss: 945.2876
Epoch 28/50
 - 0s - loss: 874.6202

<keras.callbacks.History at 0x7f061dd70940>

In [8]:
model.evaluate(X_test, y_test)



168.70741721341525

# Part A

In [9]:
#define a split & train function on non-normalized data, return the mean square error from the test data
model = regression_model()
def split_train_evaluate():
    #1 random split
    X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.3)
    #fit with 50 epochs
    model.fit(X_train, y_train, epochs=50, verbose=0)
    #return the mean square error based on the test data
    return model.evaluate(X_test, y_test)

In [10]:
errors = []
#split, train and evaluate the model 50 times
for i in range(50):
    errors.append(split_train_evaluate())



In [11]:
#report the results
print("The mean of the mean square errors is:", np.mean(errors))
print("The standard deviation of the mean square errors is:", np.std(errors))

The mean of the mean square errors is: 46.802694510179045
The standard deviation of the mean square errors is: 10.138076957262614


# Part B

In [12]:
#normalize the data using MinMaxScaler from sklearn preprocessing
scaler = StandardScaler()
transform = scaler.fit_transform(features)
features_normalized = pd.DataFrame(transform,columns=features.columns)

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


In [13]:
#define a split & train function on normalized data, return the mean square error from the test data
model = regression_model()
def split_train_evaluate():
    #1 random split
    X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.3)
    #fit with 50 epochs
    model.fit(X_train, y_train, epochs=50, verbose=0)
    #return the mean square error based on the test data
    return model.evaluate(X_test, y_test)

In [14]:
errors = []
#split, train and evaluate the model 50 times
for i in range(50):
    errors.append(split_train_evaluate())



In [15]:
#report the results
print("The mean of the mean square errors is:", np.mean(errors))
print("The standard deviation of the mean square errors is:", np.std(errors))

The mean of the mean square errors is: 73.41971399387495
The standard deviation of the mean square errors is: 31.492504945316803


# Part C

In [16]:
#repeat normalization for full run
scaler = StandardScaler()
transform = scaler.fit_transform(features)
features_normalized = pd.DataFrame(transform,columns=features.columns)

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


In [17]:
#define a split & train function on normalized data and with a 100 epochs, return the mean square error from the test data
model = regression_model()
def split_train_evaluate():
    #1 random split
    X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.3)
    #fit with a 100 epochs
    model.fit(X_train, y_train, epochs=100, verbose=0)
    #return the mean square error based on the test data
    return model.evaluate(X_test, y_test)

In [18]:
errors = []
#split, train and evaluate the model 50 times
for i in range(50):
    errors.append(split_train_evaluate())



In [19]:
#report the results
print("The mean of the mean square errors is:", np.mean(errors))
print("The standard deviation of the mean square errors is:", np.std(errors))

The mean of the mean square errors is: 65.34272963465224
The standard deviation of the mean square errors is: 30.17768826081141


# Part D

In [20]:
#repeat normalization for full run
scaler = StandardScaler()
transform = scaler.fit_transform(features)
features_normalized = pd.DataFrame(transform,columns=features.columns)

  return self.partial_fit(X, y)
  return self.fit(X, **fit_params).transform(X)


In [21]:
#init the model as per instruction, this time with 3 hidden layers
def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(8,)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [22]:
#define a split & train function on normalized data and with a 50 epochs, return the mean square error from the test data
model = regression_model()
def split_train_evaluate():
    #1 random split
    X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.3)
    #fit with 50 epochs
    model.fit(X_train, y_train, epochs=50, verbose=0)
    #return the mean square error based on the test data
    return model.evaluate(X_test, y_test)

In [23]:
errors = []
#split, train and evaluate the model 50 times
for i in range(50):
    errors.append(split_train_evaluate())



In [24]:
#report the results
print("The mean of the mean square errors is:", np.mean(errors))
print("The standard deviation of the mean square errors is:", np.std(errors))

The mean of the mean square errors is: 38.12203058792163
The standard deviation of the mean square errors is: 9.60381859939918
