In [1]:
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

from time import time

from run_notebook import execute_notebook

Using TensorFlow backend.


In [2]:
execute_notebook('./data/dataset-sonar.ipynb')

Shape:
(207, 61)
_array:
[[0.0453 0.0523 0.0843 0.0689 0.1183 0.2583 0.2156 0.3481 0.3337 0.2872
  0.4918 0.6552 0.6919 0.7797 0.7464 0.9444 1.0 0.8874 0.8024 0.7818 0.5212
  0.4052 0.3957 0.3914 0.325 0.32 0.3271 0.2767 0.4423 0.2028 0.3788 0.2947
  0.1984 0.2341 0.1306 0.4182 0.3835 0.1057 0.184 0.197 0.1674 0.0583
  0.1401 0.1628 0.0621 0.0203 0.053 0.0742 0.0409 0.0061 0.0125 0.0084
  0.0089 0.0048 0.0094 0.0191 0.014 0.0049 0.0052 0.0044 'R']
 [0.0262 0.0582 0.1099 0.1083 0.0974 0.228 0.2431 0.3771 0.5598 0.6194
  0.6333 0.706 0.5544 0.532 0.6479 0.6931 0.6759 0.7551 0.8929 0.8619
  0.7974 0.6737 0.4293 0.3648 0.5331 0.2413 0.507 0.8533 0.6036 0.8514
  0.8512 0.5045 0.1862 0.2709 0.4232 0.3043 0.6116 0.6756 0.5375 0.4719
  0.4647 0.2587 0.2129 0.2222 0.2111 0.0176 0.1348 0.0744 0.013 0.0106
  0.0033 0.0232 0.0166 0.0095 0.018 0.0244 0.0316 0.0164 0.0095 0.0078 'R']
 [0.01 0.0171 0.0623 0.0205 0.0205 0.0368 0.1098 0.1276 0.0598 0.1264
  0.0881 0.1992 0.0184 0.2261 0.1729 0.2131 0.0

In [3]:
def create_baseline():
    _model = Sequential()
    _model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu'))
    _model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
    _model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return _model

def create_smaller():
    _model = Sequential()
    _model.add(Dense(30, input_dim=60, kernel_initializer='normal', activation='relu'))
    _model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
    _model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return _model   

def create_larger():
    _model = Sequential()
    _model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu'))
    _model.add(Dense(30, kernel_initializer='normal', activation='relu'))
    _model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
    _model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return _model   

In [4]:
_estimator = KerasClassifier(build_fn=create_baseline, epochs=100, batch_size=5, verbose=0)

In [5]:
_kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=_seed)

In [6]:
_start = time()
_results = cross_val_score(_estimator, _X, _Y_encoded, cv=_kfold)
_end = time()
print('time: {:.2f} minutes'.format((_end-_start)/60))

time: 0.92 minutes


In [7]:
print('Baseline: {:.2%} ({:.2%})'.format(_results.mean(), _results.std()))

Baseline: 81.09% (9.23%)


In [8]:
# It is a good practice to prepare your data before modeling. 
# Neural network models are especially suitable to having consistent input values, both in scale and distribution. 
# An effective data preparation scheme for tabular data when building neural network models is standardization. 
# This is where the data is rescaled such that the mean value for each attribute is 0 and the standard deviation is 1.
# This preserves Gaussian and Gaussian-like distributions whilst normalizing the central tendencies for each attribute.

In [9]:
_estimators = []

In [10]:
_estimators.append(('Standardize', StandardScaler()))

In [11]:
_estimators.append(('mlp', _estimator))

In [12]:
_pipeline = Pipeline(_estimators)

In [13]:
_start = time()
_results = cross_val_score(_pipeline, _X, _Y_encoded, cv=_kfold)
_end = time()
print('time: {:.2f} minutes'.format((_end-_start)/60))



time: 0.95 minutes


In [14]:
print('Standardized: {:.2%} ({:.2%})'.format(_results.mean(), _results.std()))

Standardized: 83.54% (5.44%)


In [15]:
# There are many things to tune on a neural network, such as the weight initialization, activation functions,
# optimization procedure and so on. 
# One aspect that may have an outsized effect is the structure of the network itself called the network topology. 
# In this section we take a look at two experiments on the structure of the network: 
# making it smaller and making it larger.

In [16]:
# I suspect that there is a lot of redundancy in the input variables for this problem. 
# The data describes the same signal from different angles. 
# Perhaps some of those angles are more relevant than others. 
# We can force a type of feature extraction by the network by restricting the representational space in the 
# first hidden layer.

# In this experiment we take our baseline model with 60 neurons in the hidden layer and reduce it by half to 30. 
# This will put pressure on the network during training to pick out the most important structure in the input data 
#to model.

In [17]:
_estimators_small = []

In [18]:
_estimators_small.append(('Standardize', StandardScaler()))
_estimators_small.append(('mlp', KerasClassifier(build_fn=create_smaller, epochs=100, batch_size=5, verbose=0)))

In [19]:
_pipeline_small = Pipeline(_estimators_small)

In [20]:
_start = time()
_results = cross_val_score(_pipeline_small, _X, _Y_encoded, cv=_kfold)
_end = time()
print('time: {:.2f} minutes'.format((_end-_start)/60))



time: 0.99 minutes


In [21]:
print('Small: {:.2%} ({:.2%})'.format(_results.mean(), _results.std()))

Small: 83.99% (6.26%)


In [22]:
# A neural network topology with more layers offers more opportunity for the network to extract key features 
# and recombine them in useful nonlinear ways
# The idea here is that the network is given the opportunity to model all input variables before being bottlenecked 
# and forced to halve the representational capacity, much like we did in the experiment above with the smaller network. Instead of squeezing the representation of the inputs themselves, we have an additional hidden layer to aid in the process.

In [23]:
_estimators_large = []
_estimators_large.append(('Standardize', StandardScaler()))
_estimators_large.append(('mlp', KerasClassifier(build_fn=create_larger, epochs=100, batch_size=5, verbose=0)))

In [24]:
_pipeline_large = Pipeline(_estimators_large)

In [25]:
_start = time()
_results = cross_val_score(_pipeline_large, _X, _Y_encoded, cv=_kfold)
_end = time()
print('time: {:.2f} minutes'.format((_end-_start)/60))



time: 1.16 minutes


In [26]:
print('Small: {:.2%} ({:.2%})'.format(_results.mean(), _results.std()))

Small: 83.54% (5.44%)
