In [23]:
import numpy  as np
import pandas
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [24]:
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)

In [25]:
DATAPATH = "/home/ubuntu/fastai/data/sonar/"

In [26]:
# load dataset
dataframe = pandas.read_csv(DATAPATH+"sonar.all-data", header=None)
print ("dataframe:", type(dataframe), dataframe.shape)
dataset = dataframe.values
print ("dataset:", type(dataset), dataset.shape)
# split into input (X) and output (Y) variables
X = dataset[:,0:60].astype(float)
Y = dataset[:,60]

dataframe: <class 'pandas.core.frame.DataFrame'> (208, 61)
dataset: <class 'numpy.ndarray'> (208, 61)


In [29]:
print (type(X), X.shape, X[0,:])
print ("max(X)", np.amax(X), ", min(X)", np.min(X))
print (type(Y), Y.shape, Y[0:10], np.unique(Y))

<class 'numpy.ndarray'> (208, 60) [ 0.02    0.0371  0.0428  0.0207  0.0954  0.0986  0.1539  0.1601  0.3109
  0.2111  0.1609  0.1582  0.2238  0.0645  0.066   0.2273  0.31    0.2999
  0.5078  0.4797  0.5783  0.5071  0.4328  0.555   0.6711  0.6415  0.7104
  0.808   0.6791  0.3857  0.1307  0.2604  0.5121  0.7547  0.8537  0.8507
  0.6692  0.6097  0.4943  0.2744  0.051   0.2834  0.2825  0.4256  0.2641
  0.1386  0.1051  0.1343  0.0383  0.0324  0.0232  0.0027  0.0065  0.0159
  0.0072  0.0167  0.018   0.0084  0.009   0.0032]
max(X) 1.0 , min(X) 0.0
<class 'numpy.ndarray'> (208,) ['R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R' 'R'] ['M' 'R']


In [15]:
# encode class values as integers
encoder = LabelEncoder()
#http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.LabelEncoder.html
#Encode labels with value between 0 and n_classes-1
print (type(encoder))

encoder.fit(Y)
encoded_Y = encoder.transform(Y)
print (type(encoded_Y), encoded_Y.shape, encoded_Y[0:10])

<class 'sklearn.preprocessing.label.LabelEncoder'>
<class 'numpy.ndarray'> (208,) [1 1 1 1 1 1 1 1 1 1]


In [16]:
# baseline model
def create_baseline():
	# create model
	model = Sequential()
	model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu'))
	model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
	# Compile model
	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model

In [30]:
# evaluate model with standardized dataset
estimator = KerasClassifier(build_fn=create_baseline, nb_epoch=100, batch_size=5, verbose=0)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(estimator, X, encoded_Y, cv=kfold)
print("Results: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Results: 74.92% (7.40%)


In [31]:
print (type(estimator))

<class 'keras.wrappers.scikit_learn.KerasClassifier'>


In [33]:
# evaluate baseline model with standardized dataset
numpy.random.seed(seed)
estimators = []
estimators.append(('standardize', StandardScaler()))
#http://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.StandardScaler.html
#Standardize features by deducting the mean and scaling to unit variance
#results in normal distribution centered about zero

estimators.append(('mlp', KerasClassifier(build_fn=create_baseline, epochs=100, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Standardized: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Standardized: 84.09% (7.53%)


In [34]:
# smaller model
def create_smaller():
	# create model
	model = Sequential()
	model.add(Dense(30, input_dim=60, kernel_initializer='normal', activation='relu'))
	model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
	# Compile model
	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model


estimators = []#list
estimators.append(('standardize', StandardScaler()))
#append tuple to list
estimators.append(('mlp', KerasClassifier(build_fn=create_smaller, epochs=100, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Smaller: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))


Smaller: 85.06% (7.03%)


In [40]:
print (type(estimators), len(estimators), type(estimators[0]))
print (type(estimators[0][0]), type(estimators[0][1]))
print (type(estimators[1][0]), type(estimators[1][1]))

<class 'list'> 2 <class 'tuple'>
<class 'str'> <class 'sklearn.preprocessing.data.StandardScaler'>
<class 'str'> <class 'keras.wrappers.scikit_learn.KerasClassifier'>


In [44]:
# larger model
def create_larger():
	# create model
	model = Sequential()
	model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu'))
	model.add(Dense(30, kernel_initializer='normal', activation='relu'))
	model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
	# Compile model
	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
	model.summary()
	return model


estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_larger, epochs=1, batch_size=5, verbose=2)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Larger: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_141 (Dense)            (None, 60)                3660      
_________________________________________________________________
dense_142 (Dense)            (None, 30)                1830      
_________________________________________________________________
dense_143 (Dense)            (None, 1)                 31        
Total params: 5,521
Trainable params: 5,521
Non-trainable params: 0
_________________________________________________________________
Epoch 1/1
0s - loss: 0.6806 - acc: 0.6720
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_144 (Dense)            (None, 60)                3660      
_________________________________________________________________
dense_145 (Dense)            (None, 30)                1830      
________________________________