Import libraries

In [3]:
import numpy
import pandas
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

Random seed

In [4]:
seed = 7
numpy.random.seed(seed)

In [5]:
dataframe = pandas.read_csv("sonar.all-data.csv",header=None)
dataset = dataframe.values
X = dataset[:,0:60].astype(float)
Y = dataset[:,60]

In [6]:
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)

In [9]:
unique, counts = numpy.unique(encoded_Y,return_counts = True)
dict(zip(unique,counts))

{0: 111, 1: 97}

In [10]:
def create_baseline():
    model = Sequential()
    model.add(Dense(60,input_dim=60,kernel_initializer='normal', activation = 'relu'))
    model.add(Dense(1, kernel_initializer='normal',activation='sigmoid'))
    model.compile(loss = 'binary_crossentropy',optimizer='adam',metrics=['accuracy'])
    return model

In [11]:
estimator = KerasClassifier(build_fn = create_baseline, epochs = 100, batch_size = 5, verbose = 0)

In [12]:
kfold = StratifiedKFold(n_splits = 10, shuffle = True, random_state=seed)

In [13]:
results = cross_val_score(estimator,X,encoded_Y,cv=kfold)

W0423 13:55:53.539087 140114495031104 deprecation.py:506] From /home/arvind/.local/lib/python3.6/site-packages/tensorflow_core/python/ops/resource_variable_ops.py:1633: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
W0423 13:55:53.626345 140114495031104 deprecation.py:323] From /home/arvind/.local/lib/python3.6/site-packages/tensorflow_core/python/ops/nn_impl.py:183: where (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
W0423 13:55:53.788334 140114495031104 deprecation.py:323] From /home/arvind/.local/lib/python3.6/site-packages/keras/optimizers.py:550: BaseResourceVariable.constraint (from tensorflow.python.ops.resource_variable_ops) is deprecated and will 

In [14]:
print("Baseline: %.2f%% (%.2f%%)" %(results.mean()*100, results.std()*100))

Baseline: 80.24% (9.61%)


Binary classification with standardization

In [15]:
import numpy
import pandas
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [16]:
seed = 7
numpy.random.seed(seed)

In [17]:
dataframe = pandas.read_csv('sonar.all-data.csv',header = None)

In [18]:
dataset = dataframe.values
X = dataset[:,0:60].astype(float)
Y = dataset[:,60]

In [19]:
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)

In [26]:
def create_baseline():
    model = Sequential()
    model.add(Dense(60,input_dim=60,kernel_initializer='normal',activation='relu'))
    model.add(Dense(1,kernel_initializer='normal',activation='sigmoid'))
    model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
    return model

In [27]:
numpy.random.seed(seed)
estimators = []
estimators.append(('standardize',StandardScaler()))
estimators.append(('mlp',KerasClassifier(build_fn=create_baseline,epochs = 100,batch_size = 5, verbose = 0)))

In [28]:
pipeline = Pipeline(estimators)

In [29]:
kfold = StratifiedKFold(n_splits = 10, shuffle = True, random_state=seed)

In [30]:
results = cross_val_score(pipeline,X,encoded_Y,cv=kfold)

In [31]:
print("Standardized: %.2f%% (%.2f%%)"%(results.mean()*100,results.std()*100))

Standardized: 85.07% (7.46%)


Evaluate a smaller network

In [33]:
def create_smaller():
    model = Sequential()
    model.add(Dense(30,input_dim=60, kernel_initializer='normal',activation='relu'))
    model.add(Dense(1,kernel_initializer='normal',activation='sigmoid'))
    model.compile(loss="binary_crossentropy",optimizer = "adam", metrics = ["accuracy"])
    return model

In [34]:
estimators.pop()

('mlp', <keras.wrappers.scikit_learn.KerasClassifier at 0x7f6e65347550>)

In [35]:
estimators

[('standardize', StandardScaler(copy=True, with_mean=True, with_std=True))]

In [36]:
estimators.append(("mlp",KerasClassifier(build_fn=create_smaller, epochs = 100,batch_size =5,verbose = 0)))

In [37]:
pipeline = Pipeline(estimators)

In [38]:
results = cross_val_score(pipeline,X,encoded_Y,cv=kfold)

In [39]:
print("Smaller: %.2f%% (%.2f%%)"%(results.mean()*100,results.std()*100))

Smaller: 85.07% (6.33%)


Evaluate a larger network

In [40]:
def create_larger():
    model = Sequential()
    model.add(Dense(60, input_dim=60, kernel_initializer='normal',activation='relu'))
    model.add(Dense(30,kernel_initializer='normal',activation='relu'))
    model.add(Dense(1,kernel_initializer='normal',activation='sigmoid'))
    model.compile(loss = "binary_crossentropy",optimizer='adam',metrics=['accuracy'])
    return model

In [41]:
estimators.pop()

('mlp', <keras.wrappers.scikit_learn.KerasClassifier at 0x7f6e4e01bc18>)

In [42]:
estimators

[('standardize', StandardScaler(copy=True, with_mean=True, with_std=True))]

In [43]:
estimators.append(('mlp',KerasClassifier(build_fn=create_larger, epochs=100, batch_size=5,verbose=0)))
pipeline = Pipeline(estimators)

In [44]:
results = cross_val_score(pipeline,X,encoded_Y,cv=kfold)

In [45]:
print("Larger: %.2f%% (%.2f%%)"%(results.mean()*100,results.std()*100))

Larger: 83.62% (6.35%)
