## A. Build a baseline model

In [37]:
# Import all the required libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense
from sklearn.metrics import mean_squared_error


# Download data from the given link
concrete_data = pd.read_csv('https://cocl.us/concrete_data')
concrete_data.shape

(1030, 9)

In [38]:
# Separating data into predictors and target
concrete_data_columns = concrete_data.columns
predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']]
target = concrete_data['Strength']
predictors.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


In [39]:
# Note columns to build a model
n_cols = predictors.shape[1]
n_cols

8

In [40]:
# Build a model

def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

# build the model
model = regression_model()


In [41]:
total_rounds = 50
mean_squared_errors = []
for i in range(0, total_rounds):
    # Split the labelled data into Train set & Train set
    X_train, X_test, y_train, y_test = train_test_split( predictors, target, test_size=0.3, random_state=i+2)
    
    #Train the model
    model.fit(X_train, y_train, validation_split=0.3, epochs=50, verbose=2)

    # evaluate the model
    mse = model.evaluate(X_test, y_test, verbose=0)
    print("MSE for Round "+str(i+1)+": "+str(mse)+"\n")
    y_pred = model.predict(X_test)
    mean_square_error = mean_squared_error(y_test, y_pred)
    mean_squared_errors.append(mean_square_error)

Train on 504 samples, validate on 217 samples
Epoch 1/50
 - 0s - loss: 1396.2308 - val_loss: 970.2583
Epoch 2/50
 - 0s - loss: 1016.3853 - val_loss: 668.7350
Epoch 3/50
 - 0s - loss: 711.4084 - val_loss: 416.6903
Epoch 4/50
 - 0s - loss: 508.7454 - val_loss: 303.1306
Epoch 5/50
 - 0s - loss: 396.2853 - val_loss: 254.6479
Epoch 6/50
 - 0s - loss: 337.6540 - val_loss: 222.5295
Epoch 7/50
 - 0s - loss: 306.6736 - val_loss: 209.0795
Epoch 8/50
 - 0s - loss: 281.6236 - val_loss: 201.3846
Epoch 9/50
 - 0s - loss: 262.5533 - val_loss: 187.3416
Epoch 10/50
 - 0s - loss: 242.4723 - val_loss: 175.7151
Epoch 11/50
 - 0s - loss: 231.0506 - val_loss: 176.4926
Epoch 12/50
 - 0s - loss: 219.9827 - val_loss: 164.1790
Epoch 13/50
 - 0s - loss: 206.0455 - val_loss: 154.1881
Epoch 14/50
 - 0s - loss: 197.0292 - val_loss: 150.8615
Epoch 15/50
 - 0s - loss: 189.5102 - val_loss: 147.7283
Epoch 16/50
 - 0s - loss: 182.7801 - val_loss: 153.2810
Epoch 17/50
 - 0s - loss: 177.0564 - val_loss: 137.8013
Epoch 18/

In [42]:
mean_squared_errors = np.array(mean_squared_errors)
mean = np.mean(mean_squared_errors)
standard_deviation = np.std(mean_squared_errors)
print('For Part A')
print("Mean -> "+str(mean)+"\nStandard Deviation -> "+str(standard_deviation))

For Part A
Mean -> 48.31864657389873
Standard Deviation -> 7.718435437295102


## B. Normalize the data

In [43]:
# normalise the data
predictors_norm = (predictors - predictors.mean()) / predictors.std()
predictors_norm.head()
print(predictors_norm.shape[1])


8


In [44]:
total_rounds = 50
mean_squared_errors = []
for i in range(0, total_rounds):
    # Split the labelled data into Train set & Train set
    X_train, X_test, y_train, y_test = train_test_split( predictors_norm, target, test_size=0.3, random_state=i+2)
    
    #Train the model
    model.fit(X_train, y_train, validation_split=0.3, epochs=50, verbose=2)

    # evaluate the model
    mse = model.evaluate(X_test, y_test, verbose=0)
    print("MSE for Round "+str(i+1)+": "+str(mse)+"\n")
    y_pred = model.predict(X_test)
    mean_square_error = mean_squared_error(y_test, y_pred)
    mean_squared_errors.append(mean_square_error)

Train on 504 samples, validate on 217 samples
Epoch 1/50
 - 0s - loss: 1647.6952 - val_loss: 1540.0349
Epoch 2/50
 - 0s - loss: 1600.8935 - val_loss: 1491.5045
Epoch 3/50
 - 0s - loss: 1555.1465 - val_loss: 1446.4735
Epoch 4/50
 - 0s - loss: 1511.4479 - val_loss: 1405.8234
Epoch 5/50
 - 0s - loss: 1471.3445 - val_loss: 1366.1225
Epoch 6/50
 - 0s - loss: 1432.2846 - val_loss: 1328.2551
Epoch 7/50
 - 0s - loss: 1395.6245 - val_loss: 1290.9294
Epoch 8/50
 - 0s - loss: 1358.5345 - val_loss: 1255.6796
Epoch 9/50
 - 0s - loss: 1323.5003 - val_loss: 1221.3290
Epoch 10/50
 - 0s - loss: 1289.1465 - val_loss: 1188.0605
Epoch 11/50
 - 0s - loss: 1256.2654 - val_loss: 1155.2141
Epoch 12/50
 - 0s - loss: 1223.5395 - val_loss: 1123.6668
Epoch 13/50
 - 0s - loss: 1191.6782 - val_loss: 1093.6528
Epoch 14/50
 - 0s - loss: 1161.6617 - val_loss: 1063.3097
Epoch 15/50
 - 0s - loss: 1131.5867 - val_loss: 1034.2186
Epoch 16/50
 - 0s - loss: 1102.1767 - val_loss: 1006.3934
Epoch 17/50
 - 0s - loss: 1074.0732

In [45]:
mean_squared_errors = np.array(mean_squared_errors)
meanB = np.mean(mean_squared_errors)
standard_deviationB = np.std(mean_squared_errors)
print("Part_A\tMean -> "+str(mean)+"\tStandard Deviation -> "+str(standard_deviation))
print("Part_B\tMean -> "+str(meanB)+"\tStandard Deviation -> "+str(standard_deviationB))

Part_A	Mean -> 48.31864657389873	Standard Deviation -> 7.718435437295102
Part_B	Mean -> 58.2462396176907	Standard Deviation -> 67.81759827182972


_After normalizing the data, its observed that Mean of the Mean Squared Errors is decreased while the Standard Deviation is increased._

## C. Increase the number of epochs

In [46]:
total_rounds = 50
mean_squared_errors = []
for i in range(0, total_rounds):
    # Split the labelled data into Train set & Train set
    X_train, X_test, y_train, y_test = train_test_split( predictors_norm, target, test_size=0.3, random_state=i+2)
    
    #Train the model with 100 epochs (50 more than in Part B)
    model.fit(X_train, y_train, validation_split=0.3, epochs=100, verbose=2)

    # evaluate the model
    mse = model.evaluate(X_test, y_test, verbose=0)
    print("MSE for Round "+str(i+1)+": "+str(mse)+"\n")
    y_pred = model.predict(X_test)
    mean_square_error = mean_squared_error(y_test, y_pred)
    mean_squared_errors.append(mean_square_error)

Train on 504 samples, validate on 217 samples
Epoch 1/100
 - 0s - loss: 32.7615 - val_loss: 34.1274
Epoch 2/100
 - 0s - loss: 32.5078 - val_loss: 34.3092
Epoch 3/100
 - 0s - loss: 32.3202 - val_loss: 34.6965
Epoch 4/100
 - 0s - loss: 32.2233 - val_loss: 34.9997
Epoch 5/100
 - 0s - loss: 32.1099 - val_loss: 34.9967
Epoch 6/100
 - 0s - loss: 32.0883 - val_loss: 35.0007
Epoch 7/100
 - 0s - loss: 32.0219 - val_loss: 35.1344
Epoch 8/100
 - 0s - loss: 31.9867 - val_loss: 35.2331
Epoch 9/100
 - 0s - loss: 31.9401 - val_loss: 35.4177
Epoch 10/100
 - 0s - loss: 31.9525 - val_loss: 35.3683
Epoch 11/100
 - 0s - loss: 31.8544 - val_loss: 35.5095
Epoch 12/100
 - 0s - loss: 31.8670 - val_loss: 35.5181
Epoch 13/100
 - 0s - loss: 31.8354 - val_loss: 35.4879
Epoch 14/100
 - 0s - loss: 31.7884 - val_loss: 35.5090
Epoch 15/100
 - 0s - loss: 31.7860 - val_loss: 35.6894
Epoch 16/100
 - 0s - loss: 31.7189 - val_loss: 35.6483
Epoch 17/100
 - 0s - loss: 31.7403 - val_loss: 35.7466
Epoch 18/100
 - 0s - loss: 3

In [47]:
mean_squared_errors = np.array(mean_squared_errors)
meanC = np.mean(mean_squared_errors)
standard_deviationC = np.std(mean_squared_errors)
print("Part_B\tMean -> "+str(meanB)+"\tStandard Deviation -> "+str(standard_deviationB))
print("Part_C\tMean -> "+str(meanC)+"\tStandard Deviation -> "+str(standard_deviationC))

Part_B	Mean -> 58.2462396176907	Standard Deviation -> 67.81759827182972
Part_C	Mean -> 32.45877002686632	Standard Deviation -> 2.8102767111934073


_After increasing the epochs to double, it is observed that not only Mean of the Mean Squared Errors is decreased but the Standard Deviation is also decreased greatly._

## D. Increase the number of hidden layers

In [48]:
# Build a model with increased hidden layers

def new_regression_model():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

# build the model
model = new_regression_model()

In [49]:
total_rounds = 50
mean_squared_errors = []
for i in range(0, total_rounds):
    # Split the labelled data into Train set & Train set
    X_train, X_test, y_train, y_test = train_test_split( predictors_norm, target, test_size=0.3, random_state=i+2)
    
    #Train the model with only 50 epochs
    model.fit(X_train, y_train, validation_split=0.3, epochs=50, verbose=2)

    # evaluate the model
    mse = model.evaluate(X_test, y_test, verbose=0)
    print("MSE for Round "+str(i+1)+": "+str(mse)+"\n")
    y_pred = model.predict(X_test)
    mean_square_error = mean_squared_error(y_test, y_pred)
    mean_squared_errors.append(mean_square_error)

Train on 504 samples, validate on 217 samples
Epoch 1/50
 - 0s - loss: 1586.4793 - val_loss: 1483.2456
Epoch 2/50
 - 0s - loss: 1570.4431 - val_loss: 1464.1621
Epoch 3/50
 - 0s - loss: 1549.0327 - val_loss: 1438.3080
Epoch 4/50
 - 0s - loss: 1518.7910 - val_loss: 1402.7303
Epoch 5/50
 - 0s - loss: 1477.0132 - val_loss: 1352.5410
Epoch 6/50
 - 0s - loss: 1418.2842 - val_loss: 1283.9806
Epoch 7/50
 - 0s - loss: 1338.1388 - val_loss: 1191.5652
Epoch 8/50
 - 0s - loss: 1232.7386 - val_loss: 1067.5129
Epoch 9/50
 - 0s - loss: 1097.4121 - val_loss: 916.3134
Epoch 10/50
 - 0s - loss: 935.1830 - val_loss: 744.4357
Epoch 11/50
 - 0s - loss: 757.7128 - val_loss: 571.8011
Epoch 12/50
 - 0s - loss: 583.5450 - val_loss: 423.9017
Epoch 13/50
 - 0s - loss: 444.4017 - val_loss: 317.4886
Epoch 14/50
 - 0s - loss: 348.8264 - val_loss: 256.0409
Epoch 15/50
 - 0s - loss: 295.0086 - val_loss: 226.2267
Epoch 16/50
 - 0s - loss: 263.4034 - val_loss: 210.7029
Epoch 17/50
 - 0s - loss: 246.7487 - val_loss: 202

In [50]:
mean_squared_errors = np.array(mean_squared_errors)
meanD = np.mean(mean_squared_errors)
standard_deviationD = np.std(mean_squared_errors)
print("Part_B\tMean -> "+str(meanB)+"\tStandard Deviation -> "+str(standard_deviationB))
print("Part_D\tMean -> "+str(meanC)+"\tStandard Deviation -> "+str(standard_deviationD))

Part_B	Mean -> 58.2462396176907	Standard Deviation -> 67.81759827182972
Part_D	Mean -> 32.45877002686632	Standard Deviation -> 25.89826725655455


_After increasing the hidden layers in the model, it is observed that both Mean of the Mean Squared Errors and Standard Deviation are decreased._

**It means that the model learns more accurately with normalised data, increase in epochs and hidden layers.** 