# Preprocessing Data

In [39]:
import pandas as pd
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import statistics 
from keras.layers.convolutional import Conv2D # to add convolutional layers
from keras.layers.convolutional import MaxPooling2D # to add pooling layers
from keras.layers import Flatten # to flatten data for fully connected layers
from keras.utils import to_categorical

In [40]:
concrete_data = pd.read_csv('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DL0101EN/labs/data/concrete_data.csv')
concrete_data.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


In [41]:
concrete_data.describe()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age,Strength
count,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0,1030.0
mean,281.167864,73.895825,54.18835,181.567282,6.20466,972.918932,773.580485,45.662136,35.817961
std,104.506364,86.279342,63.997004,21.354219,5.973841,77.753954,80.17598,63.169912,16.705742
min,102.0,0.0,0.0,121.8,0.0,801.0,594.0,1.0,2.33
25%,192.375,0.0,0.0,164.9,0.0,932.0,730.95,7.0,23.71
50%,272.9,22.0,0.0,185.0,6.4,968.0,779.5,28.0,34.445
75%,350.0,142.95,118.3,192.0,10.2,1029.4,824.0,56.0,46.135
max,540.0,359.4,200.1,247.0,32.2,1145.0,992.6,365.0,82.6


In [42]:
concrete_data_columns = concrete_data.columns

predictors = concrete_data[concrete_data_columns[concrete_data_columns != 'Strength']] # all columns except Strength
target = concrete_data['Strength'] # Strength column

In [43]:
predictors.head()


Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360


In [44]:
target.head()

0    79.99
1    61.89
2    40.27
3    41.05
4    44.30
Name: Strength, dtype: float64

In [45]:
n_cols = predictors.shape[1] # number of predictors

In [46]:

# Spliting data into train and test
X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3, shuffle= True)


# Question  A

In [47]:
# define regression model
def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(1))    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [48]:
# build the model
model = regression_model()

In [49]:
# fit the model
model.fit(X_train, y_train, validation_split=0.3, epochs=50, verbose=2)

Train on 504 samples, validate on 217 samples
Epoch 1/50
 - 1s - loss: 48824.2296 - val_loss: 35035.1900
Epoch 2/50
 - 0s - loss: 22158.7443 - val_loss: 14444.7104
Epoch 3/50
 - 0s - loss: 8575.1145 - val_loss: 5656.9746
Epoch 4/50
 - 0s - loss: 3820.8212 - val_loss: 2875.2962
Epoch 5/50
 - 0s - loss: 2594.9920 - val_loss: 2268.3612
Epoch 6/50
 - 0s - loss: 2421.4598 - val_loss: 2122.6247
Epoch 7/50
 - 0s - loss: 2351.7446 - val_loss: 2067.5120
Epoch 8/50
 - 0s - loss: 2277.4045 - val_loss: 2015.1545
Epoch 9/50
 - 0s - loss: 2207.0798 - val_loss: 1955.1985
Epoch 10/50
 - 0s - loss: 2137.8614 - val_loss: 1900.7209
Epoch 11/50
 - 0s - loss: 2066.5463 - val_loss: 1852.1631
Epoch 12/50
 - 0s - loss: 2004.5680 - val_loss: 1792.0040
Epoch 13/50
 - 0s - loss: 1934.7491 - val_loss: 1753.4886
Epoch 14/50
 - 0s - loss: 1872.5980 - val_loss: 1692.2984
Epoch 15/50
 - 0s - loss: 1810.1260 - val_loss: 1634.3303
Epoch 16/50
 - 0s - loss: 1744.4413 - val_loss: 1589.2037
Epoch 17/50
 - 0s - loss: 1660.

<keras.callbacks.History at 0x7fc1ec381b00>

In [50]:
# Predict on the test data: y_pred
y_pred = model.predict(X_test)

# Compute and print MSE
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error: {}".format(mse))

Mean Squared Error: 163.20515667090348


In [51]:
mse_list = []
for i in range(50):
    
    X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3, shuffle= True)
    mse = mean_squared_error(y_test, y_pred)
    mse_list.append(mse)
    
    
mse_list

[386.54535660682643,
 428.8132389200628,
 409.2536963406737,
 368.6527877872977,
 425.62434115541123,
 343.3255904480354,
 472.82584860399146,
 361.79902473098895,
 451.7537243053626,
 449.6249244175566,
 419.8360339513665,
 413.0305769135587,
 388.56935320244736,
 411.6711421613654,
 411.1515155995765,
 471.3705603988281,
 412.5746721457057,
 382.3217845862902,
 438.4991925124855,
 367.05332760220915,
 430.33980730906654,
 402.9976190259993,
 414.85905210539477,
 406.1613241406269,
 384.7424184677943,
 422.3141439250246,
 432.0489294341329,
 386.2295238104445,
 423.96191346085726,
 406.70704776759266,
 419.16076502383135,
 361.47798468671175,
 460.5269388291223,
 418.1976680632636,
 417.03381330963225,
 404.5076889251194,
 372.85057229155797,
 379.8347188202574,
 448.6499170648154,
 366.21588440914996,
 461.7304781388444,
 424.3468127832682,
 439.76066027802193,
 366.4696309137515,
 407.5239810787242,
 414.54485477300636,
 448.7490759757901,
 429.53146294231516,
 385.1560299558354,
 4

In [52]:
# report  mean and the standard deviation
# Prints standard deviation 
print("Standard Deviation is % s " 
                % (statistics.stdev(mse_list))) 

print("Mean is % s " 
                % (statistics.mean(mse_list)))

Standard Deviation is 30.69861923570335 
Mean is 411.1428282642758 


# Question B

In [53]:
#normalize data
predictors_norm = (predictors - predictors.mean()) / predictors.std()
predictors_norm.head()

Unnamed: 0,Cement,Blast Furnace Slag,Fly Ash,Water,Superplasticizer,Coarse Aggregate,Fine Aggregate,Age
0,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,0.862735,-1.217079,-0.279597
1,2.476712,-0.856472,-0.846733,-0.916319,-0.620147,1.055651,-1.217079,-0.279597
2,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,3.55134
3,0.491187,0.79514,-0.846733,2.174405,-1.038638,-0.526262,-2.239829,5.055221
4,-0.790075,0.678079,-0.846733,0.488555,-1.038638,0.070492,0.647569,4.976069


In [54]:
n_cols = predictors_norm.shape[1] # number of predictors

In [55]:
X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3, shuffle= True)

In [56]:
# fit the model
model.fit(X_train, y_train, validation_split=0.3, epochs=50, verbose=2)

Train on 504 samples, validate on 217 samples
Epoch 1/50
 - 0s - loss: 1495.1331 - val_loss: 1583.6771
Epoch 2/50
 - 0s - loss: 1470.2810 - val_loss: 1557.3611
Epoch 3/50
 - 0s - loss: 1441.4334 - val_loss: 1528.5282
Epoch 4/50
 - 0s - loss: 1411.3508 - val_loss: 1497.4633
Epoch 5/50
 - 2s - loss: 1378.4899 - val_loss: 1465.6549
Epoch 6/50
 - 0s - loss: 1345.3087 - val_loss: 1432.4937
Epoch 7/50
 - 0s - loss: 1310.8164 - val_loss: 1398.6730
Epoch 8/50
 - 0s - loss: 1276.4887 - val_loss: 1364.2415
Epoch 9/50
 - 0s - loss: 1241.3943 - val_loss: 1329.7744
Epoch 10/50
 - 0s - loss: 1206.3092 - val_loss: 1295.7470
Epoch 11/50
 - 1s - loss: 1171.5461 - val_loss: 1261.8407
Epoch 12/50
 - 0s - loss: 1138.0296 - val_loss: 1227.4128
Epoch 13/50
 - 0s - loss: 1103.7415 - val_loss: 1193.8384
Epoch 14/50
 - 0s - loss: 1071.1839 - val_loss: 1160.3653
Epoch 15/50
 - 0s - loss: 1038.0430 - val_loss: 1128.3649
Epoch 16/50
 - 0s - loss: 1006.3132 - val_loss: 1097.1034
Epoch 17/50
 - 0s - loss: 976.2272 

<keras.callbacks.History at 0x7fc1ec09f438>

In [57]:
# Predict on the test data: y_pred
y_pred = model.predict(X_test)

# Compute and print RMSE
Nmse = mean_squared_error(y_test, y_pred)
print(" Mean Squared Error: {}".format(mse))

 Mean Squared Error: 406.21400311379824


In [58]:
Nmse_list = []
for i in range(50):
    
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3, shuffle= True)
    mse = mean_squared_error(y_test, y_pred)
    Nmse_list.append(mse)
    
    
Nmse_list

[605.335736999527,
 625.456398731581,
 653.6370611391258,
 618.8607068587113,
 582.6181981826719,
 607.2162941101079,
 597.9225342865025,
 639.07094641029,
 711.8820069462543,
 644.1969619974974,
 633.170512744409,
 681.5651845590578,
 601.0262128460589,
 604.8679649077138,
 641.7515812153731,
 589.1606616891193,
 662.8324908765834,
 661.580741140916,
 573.8369360877601,
 655.75398156909,
 595.8940965228718,
 662.6375762802419,
 577.5507101127049,
 658.3260614213424,
 577.529127533539,
 635.1444463504137,
 661.2138639436362,
 660.6920751444738,
 642.775164068665,
 574.3232562690449,
 691.0593856081915,
 574.9942305588124,
 691.1958458189762,
 569.0036387433721,
 634.7041992373516,
 668.2954740002745,
 562.0722578673228,
 627.9591855480785,
 558.3724149797204,
 561.2421247203363,
 639.2633735435096,
 595.7166246207535,
 569.7112398429753,
 625.4466742484445,
 651.1223668605026,
 683.1486000423027,
 701.9086431533805,
 575.2504319455093,
 605.6203689800063,
 624.9762296238895]

In [59]:
# report  mean and the standard deviation
# Prints standard deviation 
print("Standard Deviation is % s " 
                % (statistics.stdev(Nmse_list))) 

print("Mean is % s " 
                % (statistics.mean(Nmse_list)))

Standard Deviation is 41.29833412700124 
Mean is 624.9778560177799 


# Question C

In [60]:
# fit the model 100 epochs
model1 = regression_model()
model1.fit(X_train, y_train, validation_split=0.3, epochs=100, verbose=2)

Train on 504 samples, validate on 217 samples
Epoch 1/100
 - 1s - loss: 1550.6311 - val_loss: 1456.4155
Epoch 2/100
 - 0s - loss: 1539.6487 - val_loss: 1445.9152
Epoch 3/100
 - 0s - loss: 1528.2644 - val_loss: 1435.2055
Epoch 4/100
 - 0s - loss: 1516.7076 - val_loss: 1424.2443
Epoch 5/100
 - 0s - loss: 1504.9742 - val_loss: 1412.9120
Epoch 6/100
 - 0s - loss: 1492.7072 - val_loss: 1401.3310
Epoch 7/100
 - 0s - loss: 1480.0853 - val_loss: 1389.3386
Epoch 8/100
 - 0s - loss: 1467.0342 - val_loss: 1376.5548
Epoch 9/100
 - 0s - loss: 1453.1445 - val_loss: 1363.2059
Epoch 10/100
 - 0s - loss: 1438.7753 - val_loss: 1349.0308
Epoch 11/100
 - 0s - loss: 1423.4500 - val_loss: 1334.2691
Epoch 12/100
 - 0s - loss: 1407.1970 - val_loss: 1318.8805
Epoch 13/100
 - 0s - loss: 1390.5681 - val_loss: 1302.1931
Epoch 14/100
 - 0s - loss: 1372.4277 - val_loss: 1285.1233
Epoch 15/100
 - 0s - loss: 1353.7848 - val_loss: 1267.1164
Epoch 16/100
 - 0s - loss: 1334.2605 - val_loss: 1248.2961
Epoch 17/100
 - 0s 

<keras.callbacks.History at 0x7fc28c960320>

In [61]:
# Predict on the test data: y_pred
y_pred = model1.predict(X_test)

# Compute and print RMSE
Nmse = mean_squared_error(y_test, y_pred)
print(" Mean Squared Error: {}".format(mse))

 Mean Squared Error: 624.9762296238895


In [62]:
Nmse_list = []
for i in range(50):
    
    X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3, shuffle= True)
    mse = np.sqrt(mean_squared_error(y_test, y_pred))
    Nmse_list.append(mse)
    
    
Nmse_list

[22.038028497017326,
 22.3887836420074,
 21.627519592659368,
 20.331377896321985,
 22.031513590193722,
 21.809347930070626,
 22.141184861954613,
 22.261568965589596,
 22.80003254596999,
 22.382090090888585,
 23.084892387467256,
 22.05114017975416,
 22.425233943505287,
 21.33890250622632,
 22.379691543865576,
 22.89533599148827,
 21.990313801994137,
 20.234188971363082,
 22.51829866334692,
 24.39074851532772,
 20.60416533852088,
 21.54201084415925,
 22.68946003823464,
 22.46644910409977,
 22.6067807565032,
 22.532432142901595,
 23.058484308251266,
 22.149183720850157,
 23.194324478653762,
 21.376024811368787,
 22.13493577020451,
 21.97106281589691,
 23.902103550173816,
 20.6037568606301,
 21.423310274257133,
 23.573438492347666,
 22.612332751562494,
 23.168161664734093,
 22.951174679201827,
 22.123751945787788,
 23.09991141125784,
 22.845840928174383,
 21.86834060123185,
 22.962513089389372,
 21.70005215739133,
 22.256363505576342,
 22.207413947712908,
 20.79943913733367,
 21.7816472787

In [63]:
# report  mean and the standard deviation
# Prints standard deviation 
print("Standard Deviation is % s " 
                % (statistics.stdev(Nmse_list))) 

print("Mean is % s " 
                % (statistics.mean(Nmse_list)))

Standard Deviation is 0.8684060768570312 
Mean is 22.205059452816712 


# Question D

In [64]:
target = to_categorical(target, )
X_train, X_test, y_train, y_test = train_test_split(predictors_norm, target, test_size=0.3, shuffle= True)

n_cols = predictors_norm.shape[1]
n_output = target.shape[1]

print(n_cols)
print(n_output)

8
83


In [65]:
y_train

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [78]:
# define RN model
    model = Sequential()
    model.add(Dense(10, activation='relu', input_shape=(n_cols,)))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(10, activation='relu'))
    model.add(Dense(n_output, activation='softmax'))
    
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_squared_error'])
    model.summary()
  

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_33 (Dense)             (None, 10)                90        
_________________________________________________________________
dense_34 (Dense)             (None, 10)                110       
_________________________________________________________________
dense_35 (Dense)             (None, 10)                110       
_________________________________________________________________
dense_36 (Dense)             (None, 83)                913       
Total params: 1,223
Trainable params: 1,223
Non-trainable params: 0
_________________________________________________________________


In [79]:
#fit model
model.fit(X_train, y_train, epochs=50, verbose=2)

Epoch 1/50
 - 2s - loss: 0.0119 - mean_squared_error: 0.0119
Epoch 2/50
 - 0s - loss: 0.0119 - mean_squared_error: 0.0119
Epoch 3/50
 - 0s - loss: 0.0119 - mean_squared_error: 0.0119
Epoch 4/50
 - 0s - loss: 0.0119 - mean_squared_error: 0.0119
Epoch 5/50
 - 0s - loss: 0.0119 - mean_squared_error: 0.0119
Epoch 6/50
 - 0s - loss: 0.0119 - mean_squared_error: 0.0119
Epoch 7/50
 - 0s - loss: 0.0119 - mean_squared_error: 0.0119
Epoch 8/50
 - 0s - loss: 0.0119 - mean_squared_error: 0.0119
Epoch 9/50
 - 0s - loss: 0.0119 - mean_squared_error: 0.0119
Epoch 10/50
 - 0s - loss: 0.0118 - mean_squared_error: 0.0118
Epoch 11/50
 - 0s - loss: 0.0118 - mean_squared_error: 0.0118
Epoch 12/50
 - 0s - loss: 0.0118 - mean_squared_error: 0.0118
Epoch 13/50
 - 0s - loss: 0.0118 - mean_squared_error: 0.0118
Epoch 14/50
 - 0s - loss: 0.0118 - mean_squared_error: 0.0118
Epoch 15/50
 - 0s - loss: 0.0118 - mean_squared_error: 0.0118
Epoch 16/50
 - 0s - loss: 0.0118 - mean_squared_error: 0.0118
Epoch 17/50
 - 0s

<keras.callbacks.History at 0x7fc1483cfe48>

In [72]:
#predict model model
#model.predict(X_test)

In [84]:
_ , mean_squared_error = model.evaluate(X_test, y_test)
print('mean_squared_error:%s' % (mean_squared_error))

mean_squared_error:0.011815901586490542


In [86]:
mse_list = []
for i in range(50):
    
    X_train, X_test, y_train, y_test = train_test_split(predictors, target, test_size=0.3, shuffle= True)
    _ , mean_squared_error = model.evaluate(X_test, y_test)
    mse_list.append(mean_squared_error)
    
    
mse_list



[0.02378445752808963,
 0.023160603036049113,
 0.02347253015246785,
 0.023472530411670894,
 0.023940421086298996,
 0.02378445752808963,
 0.02362849396988026,
 0.02378445746178187,
 0.023706475748984945,
 0.023628493903572505,
 0.02386243930719431,
 0.023706475748984945,
 0.023550512184747602,
 0.023940421086298996,
 0.023706475748984945,
 0.023316566787153773,
 0.023394548632566212,
 0.02355051219077558,
 0.023394548632566212,
 0.02362849396988026,
 0.02362849396988026,
 0.023316566787153773,
 0.02355051219077558,
 0.023550512184747602,
 0.02386243930719431,
 0.02355051219077558,
 0.023706475748984945,
 0.023706475748984945,
 0.023238584881461553,
 0.023628493903572505,
 0.02355051219077558,
 0.023628493903572505,
 0.02386243930719431,
 0.023472530411670894,
 0.023706475748984945,
 0.02347253034536314,
 0.023472530411670894,
 0.023394548632566212,
 0.02355051212446782,
 0.02378445752808963,
 0.02347253034536314,
 0.02378445752808963,
 0.023940421086298996,
 0.023706475748984945,
 0.0237

In [87]:
# report  mean
print("Mean mse is % s " 
                % (statistics.mean(mse_list)))

Mean mse is 0.023611337949422377 
