# Overview

This template is based on: [Binary Classification Tutorial with the Keras Deep Learning Library](https://machinelearningmastery.com/binary-classification-tutorial-with-the-keras-deep-learning-library/).

In [1]:
import numpy
import pandas as pd
from pandas import Series, DataFrame
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import sys

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
sys.path.append('/gits/conrpt')
import conrpt

In [3]:
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)

## Load and prepare data for use in Keras

In [4]:
# load dataset. The 60 input variables are the strength of the returns 
# at different angles. It is a binary classification problem that 
# requires a model to differentiate rocks from metal cylinders.
dataframe = pd.read_csv("sonar.csv", header=None)
dataset = dataframe.values
# Split into input (X) and output (Y) variables
# Use pandas to load the data because it easily handles strings (the 
# output variable), whereas attempting to load the data directly 
# using NumPy would be more difficult.
X = dataset[:,0:60].astype(float)
Y = dataset[:,60]

In [5]:
# All of the variables are continuous and generally in the range of 
# 0 to 1. The output variable is a string “M” for mine and “R” for 
# rock, which will need to be converted to integers 1 and 0.
dataframe.iloc[:,[1,3,4,5,21,22,23,58,59,60]].head()

Unnamed: 0,1,3,4,5,21,22,23,58,59,60
0,0.0371,0.0207,0.0954,0.0986,0.5071,0.4328,0.555,0.009,0.0032,R
1,0.0523,0.0689,0.1183,0.2583,0.4052,0.3957,0.3914,0.0052,0.0044,R
2,0.0582,0.1083,0.0974,0.228,0.6737,0.4293,0.3648,0.0095,0.0078,R
3,0.0171,0.0205,0.0205,0.0368,0.369,0.5556,0.4846,0.004,0.0117,R
4,0.0666,0.0394,0.059,0.0649,0.4292,0.573,0.5399,0.0107,0.0094,R


### Recode string output as integer

In [6]:
# Encode class values as integers. The output variable is string values.
# We must convert them into integer values 0 and 1. Using the LabelEncoder 
# class from scikit-learn. This class will model the encoding required 
# using the entire dataset via the fit() function, then apply the 
# encoding to create a new output variable using the transform() function.
encoder = LabelEncoder()
encoder.fit(Y)
encoded_Y = encoder.transform(Y)

In [7]:
# Encoded_Y (Results from first method)
encoded_Y

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

### Alternate recoding method

In [8]:
# Anternate option would be to encode by creating a dummy variable
# that references the text value.
dataframe['y'] = numpy.where(dataframe.iloc[:,[60]]=='R', 1, 0)
dataframe.iloc[:,[1,3,4,5,21,22,23,58,59,60,61]].head()

Unnamed: 0,1,3,4,5,21,22,23,58,59,60,y
0,0.0371,0.0207,0.0954,0.0986,0.5071,0.4328,0.555,0.009,0.0032,R,1
1,0.0523,0.0689,0.1183,0.2583,0.4052,0.3957,0.3914,0.0052,0.0044,R,1
2,0.0582,0.1083,0.0974,0.228,0.6737,0.4293,0.3648,0.0095,0.0078,R,1
3,0.0171,0.0205,0.0205,0.0368,0.369,0.5556,0.4846,0.004,0.0117,R,1
4,0.0666,0.0394,0.059,0.0649,0.4292,0.573,0.5399,0.0107,0.0094,R,1


In [9]:
# Encoded_Y (Results from second method)
encoded_Y = dataframe.iloc[:,61].values
encoded_Y

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

## Create a baseline neural network model

In [10]:
# baseline model
def create_baseline():
	# create model
	model = Sequential()
	# model will have a single fully connected hidden layer
	# with the same number of neurons as input variables. This is a good 
	# default starting point when creating neural networks.
	model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu'))
	# The output layer contains a single neuron in order to make predictions.
	# It uses the sigmoid activation function in order to produce a probability 
	# output in the range of 0 to 1 that can easily and automatically be 
	# converted to crisp class values.
	model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
	# Compile model; using the logarithmic loss function (binary_crossentropy) 
	# during training, the preferred loss function for binary classification 
	# problems. The model also uses the efficient Adam optimization algorithm 
	# for gradient descent and accuracy metrics will be collected when the model is trained.
	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model

## Evaluate model using scikit-learn and stratified k-fold cross validation

In [11]:
# Evaluate model with standardized dataset. pass the number of training 
# epochs to the KerasClassifier, again using reasonable default values.
estimator = KerasClassifier(build_fn=create_baseline, epochs=100, batch_size=5, verbose=0)
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)
results = cross_val_score(estimator, X, encoded_Y, cv=kfold)
print("Results: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))
print(results)

Results: 80.27% (2.85%)
[0.83720931 0.76190477 0.80487806 0.8292683  0.78048781]


## Demonstrate data preparation schemes; improve performance

In [12]:
# Re-Run The Baseline Model With Data Preparation.
# Evaluate baseline model with standardized dataset. Neural 
# network models are especially suitable to having consistent 
# input values, both in scale and distribution.
numpy.random.seed(seed)
estimators = []
# Use scikit-learn to perform the standardization of our Sonar
# dataset using the StandardScaler class.
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_baseline, epochs=100, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Standardized: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Standardized: 83.16% (4.02%)


## Adjust topology, reduce neurons by half

In [13]:
# There is a lot of redundancy in the input variables for this problem.
# Perhaps some of those angles are more relevant than others. We can 
# force a type of feature extraction by the network by restricting the 
# representational space in the first hidden layer.

# Take baseline model with 60 neurons in the hidden layer and reduce by 
# half to 30. This will put pressure on the network during training to 
# pick out the most important structure in the input data to model.

# smaller model
def create_smaller():
	# create model
	model = Sequential()
	model.add(Dense(30, input_dim=60, kernel_initializer='normal', activation='relu'))
	model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
	# Compile model
	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_smaller, epochs=100, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Smaller: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Smaller: 86.52% (7.48%)


## Adjust topology, add an additional layer

In [14]:
# Instead of squeezing the representation of the inputs themselves, add an 
# additional hidden layer to aid in the process.

# larger model
def create_larger():
	# create model
	model = Sequential()
	model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu'))
	model.add(Dense(30, kernel_initializer='normal', activation='relu'))
	model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
	# Compile model
	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp', KerasClassifier(build_fn=create_larger, epochs=100, batch_size=5, verbose=0)))
pipeline = Pipeline(estimators)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(pipeline, X, encoded_Y, cv=kfold)
print("Larger: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))

Larger: 86.54% (8.63%)


## Make predictions

In [15]:
# create model
model = Sequential()
model.add(Dense(60, input_dim=60, kernel_initializer='normal', activation='relu'))
model.add(Dense(30, kernel_initializer='normal', activation='relu'))
model.add(Dense(1, kernel_initializer='normal', activation='sigmoid'))
# Compile model
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X, encoded_Y, epochs=150, batch_size=5, verbose=0)

<keras.callbacks.History at 0x7fd2d5eaceb8>

In [16]:
prediction_conts = model.predict(X)
prediction_class = model.predict_classes(X)

In [17]:
predictions_rounded = [round(x[0]) for x in prediction_conts]

In [18]:
dataframe['p_conts'] = predictions_rounded
dataframe['p_class'] = prediction_class

In [19]:
dataframe[['y','p_conts','p_class']].head()

Unnamed: 0,y,p_conts,p_class
0,1,1.0,1
1,1,1.0,1
2,1,1.0,1
3,1,1.0,1
4,1,1.0,1


In [20]:
# Evaluate those predictions using conrpt
conrpt.conrpt(dataframe[['y','p_conts','p_class']])


Notes: ObservedPos: 97, ObservedNeg: 111, & ObservedTot: 208, Prevalence: 46.635


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  df['srtr'] = numpy.random.randint(1, 101, size=len(df))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  df[new_col_name] = numpy.where(df['srtr'] < coin, 1, 0)


Unnamed: 0,Results,Perfect,p_conts,p_class,25coin,50coin,75coin
0,TestedPos,97.0,97.0,97.0,62.0,103.0,152.0
1,TestedNeg,111.0,111.0,111.0,146.0,105.0,56.0
2,TestedTot,208.0,208.0,208.0,208.0,208.0,208.0
3,TruePos,97.0,97.0,97.0,31.0,52.0,76.0
4,TrueNeg,111.0,111.0,111.0,80.0,60.0,35.0
5,FalesPos,0.0,0.0,0.0,66.0,45.0,21.0
6,FalseNeg,0.0,0.0,0.0,31.0,51.0,76.0
7,Sensitivity,1.0,1.0,1.0,0.32,0.536,0.784
8,Specificity,1.0,1.0,1.0,0.721,0.541,0.315
9,PosPredVal,1.0,1.0,1.0,0.32,0.536,0.784


In [22]:
pd.crosstab(dataframe['y'], dataframe['p_class'])

p_class,0,1
y,Unnamed: 1_level_1,Unnamed: 2_level_1
0,111,0
1,0,97


In [23]:
pd.crosstab(dataframe['y'], dataframe['p_conts'])

p_conts,0.0,1.0
y,Unnamed: 1_level_1,Unnamed: 2_level_1
0,111,0
1,0,97
