# C11: Project: Binary Classification of Sonar Returns

## 1. Download Dataset

In terminal

```
$ cd dlwp/data_set/
$ ./get_sonar_data.sh
```

The data file named "sonar.data" will be downloaded to this diercory.

## 2. Preparation

Import the classes and functions we will need

In [21]:
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [22]:
# fix random seed
seed = 7
np.random.seed(seed)

In [23]:
# read dataset
dataframe = pd.read_csv('./data_set/sonar.data', header=None)
dataset = dataframe.values
X = dataset[:, :60].astype(float)
Y = dataset[:, 60]

encoder = LabelEncoder()
encoder.fit(Y)
Y_enc = encoder.transform(Y)

## 3. Baseline Model

Create baseline model

In [24]:
# create baseline model
def create_baseline_model():
    model = Sequential()
    model.add(Dense(60, input_dim=60, init='normal', activation='relu'))
    model.add(Dense(1, init='normal', activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# translate to sklearn form
estimator = KerasClassifier(build_fn=create_baseline_model, nb_epoch=100, batch_size=5, verbose=0)

Evaluate baseline model

In [25]:
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(estimator, X, Y_enc, cv=kfold)
print "Baseline Acc: %.2f%% (%.2f%%)"%(results.mean()*100, results.std()*100)

Baseline Acc: 81.68% (5.67%)


## 4. Improve Performance With Data Preprocessing

Add a StandardScalar process through pipeline model

In [26]:
estimators = [
    ('standardize', StandardScaler()),
    ('mlp', estimator),
]
pipeline = Pipeline(estimators)

In [27]:
results = cross_val_score(pipeline, X, Y_enc, cv=kfold)
print "Model with StandardScaler Acc: %.2f%% (%.2f%%)"%(results.mean()*100, results.std()*100)

Model with StandardScaler Acc: 82.66% (5.83%)


## 5. Tuning Layers and Neurons in the Model

Change the structure of network (called tepology): making it smaller or larger.

### 5.1 Evaluate a Smaller Network

In [28]:
# define a smaller network
def create_smaller_model():
    model = Sequential()
    model.add(Dense(30, input_dim=60, init='normal', activation='relu'))
    model.add(Dense(1, init='normal', activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [30]:
# set random seed
seed = 7
np.random.seed(seed)

In [31]:
# create pipeline model
pipeline = [
    ('standardize', StandardScaler()),
    ('mlp', KerasClassifier(build_fn=create_smaller_model, nb_epoch=100, batch_size=5, verbose=0))
]
estimator = Pipeline(pipeline)

In [33]:
# evaluate model
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(estimator, X, Y_enc, cv=kfold)
print "Smaller Network Acc: %.2f%% (%.2f%%)"%(results.mean()*100, results.std()*100)

Smaller Network Acc: 87.00% (4.34%)


Wow! Much better than before!

### 5.3 Evaluate a Larger Network

In [35]:
# define a larger network
def create_large_model():
    model = Sequential()
    model.add(Dense(60, input_dim=60, init='normal', activation='relu'))
    model.add(Dense(30, init='normal', activation='relu'))
    model.add(Dense(1, init='normal', activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# set random seed
seed = 7
np.random.seed(7)

# assemble a model
pipeline = [
    ('standardlze', StandardScaler()),
    ('mlp', KerasClassifier(build_fn=create_large_model, nb_epoch=100, batch_size=5, verbose=0))
]
estimator = Pipeline(pipeline)

# evaluate model
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
results = cross_val_score(estimator, X, Y_enc, cv=kfold)
print "Larger Netwotk Acc: %.2f%% (%.2f%%)"%(results.mean()*100, results.std()*100)


Larger Netwotk Acc: 86.47% (3.82%)
