# Keras: Tabular Classify Binary

*Detecting Naval Mines with Binary Classification of Sonar Data.*

![mines](../images/mines.png)

In [None]:
import aiqc
from aiqc import datum

---

## Example Data

This dataset is comprised of:

* *Features* = sonar readings that have been bounced off a distant object. 
* *Label* = either a rock or metal structure (potentially a naval mine).

Reference [Example Datasets](example_datasets.ipynb) for more information.

In [3]:
df = datum.to_pandas('sonar.csv')

In [4]:
df.head()

Unnamed: 0,a,b,c,d,e,f,g,h,i,j,...,az,ba,bb,bc,bd,be,bf,bg,bh,object
0,0.02,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0027,0.0065,0.0159,0.0072,0.0167,0.018,0.0084,0.009,0.0032,R
1,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0084,0.0089,0.0048,0.0094,0.0191,0.014,0.0049,0.0052,0.0044,R
2,0.0262,0.0582,0.1099,0.1083,0.0974,0.228,0.2431,0.3771,0.5598,0.6194,...,0.0232,0.0166,0.0095,0.018,0.0244,0.0316,0.0164,0.0095,0.0078,R
3,0.01,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,...,0.0121,0.0036,0.015,0.0085,0.0073,0.005,0.0044,0.004,0.0117,R
4,0.0762,0.0666,0.0481,0.0394,0.059,0.0649,0.1209,0.2467,0.3564,0.4459,...,0.0031,0.0054,0.0105,0.011,0.0015,0.0072,0.0048,0.0107,0.0094,R


---

In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.callbacks import History

from sklearn.preprocessing import LabelBinarizer, PowerTransformer## a) High-Level API

## a) High-Level API

Reference [High-Level API Docs](api_high_level.ipynb) for more information including how to work with non-tabular data.

In [5]:
splitset = aiqc.Pipeline.Tabular.make(
    # --- Data source ---
    df_or_path = df
    , dtype = None

    # --- Label preprocessing ---
    , label_column = 'object'
    , label_interpolater = None
    , label_encoder = dict(sklearn_preprocess = LabelBinarizer(sparse_output=False))

    # --- Feature preprocessing ---
    , feature_cols_excluded = 'object'
    , feature_interpolaters = None
    , feature_window = None
    , feature_encoders = dict(
        sklearn_preprocess = PowerTransformer(method='yeo-johnson', copy=False)
        , dtypes = ['float64']
    )
    , feature_reshape_indices = None
    
    # --- Stratification ---
    , size_test = 0.12
    , size_validation = 0.22
    , fold_count = None
    , bin_count = None
)


___/ featurecoder_index: 0 \_________

=> The column(s) below matched your filter(s) featurecoder filters.

['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'aa', 'ab', 'ac', 'ad', 'ae', 'af', 'ag', 'ah', 'ai', 'aj', 'ak', 'al', 'am', 'an', 'ao', 'ap', 'aq', 'ar', 'as', 'at', 'au', 'av', 'aw', 'ax', 'ay', 'az', 'ba', 'bb', 'bc', 'bd', 'be', 'bf', 'bg', 'bh']

=> Done. All feature column(s) have featurecoder(s) associated with them.
No more Featurecoders can be added to this Encoderset.



Reference this great blog for machine learning cookbooks: [MachineLearningMastery.com "Binary Classification"](https://machinelearningmastery.com/binary-classification-tutorial-with-the-keras-deep-learning-library/).

In [6]:
def fn_build(features_shape, label_shape, **hp):
    model = Sequential(name='Sonar')
    model.add(Input(shape=features_shape))
    model.add(Dense(hp['neuron_count'], activation='relu', kernel_initializer='he_uniform'))
    model.add(Dropout(0.30))
    model.add(Dense(hp['neuron_count'], activation='relu', kernel_initializer='he_uniform'))
    model.add(Dense(units=label_shape[0], activation='sigmoid', kernel_initializer='glorot_uniform'))
    return model

In [7]:
def fn_train(model, loser, optimizer, samples_train, samples_evaluate, **hp):
    model.compile(
        loss=loser
        , optimizer=optimizer
        , metrics=['accuracy']
    )
    model.fit(
        samples_train['features'], samples_train['labels']
        , validation_data = (samples_evaluate['features'], samples_evaluate['labels'])
        , verbose = 0
        , batch_size = 3
        , epochs = hp['epochs']
        , callbacks = [History()]
    )
    return model

In [8]:
hyperparameters = {
    "neuron_count": [25, 50]
    , "epochs": [75, 150]
}

In [9]:
queue = aiqc.Experiment.make(
    # --- Analysis type ---
    library = "keras"
    , analysis_type = "classification_binary"
    
    # --- Model functions ---
    , fn_build = fn_build
    , fn_train = fn_train
    , fn_lose = None #auto
    , fn_optimize = None #auto
    , fn_predict = None #auto
    
    # --- Training options ---
    , repeat_count = 2
    , hyperparameters = hyperparameters
    , pick_percent = None
    
    # --- Data source ---
    , splitset_id = splitset.id
    , foldset_id = None
    , hide_test = False
)

In [10]:
queue.run_jobs()

ðŸ”® Training Models ðŸ”®: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 8/8 [01:32<00:00, 11.58s/it]


For more information on visualization of performance metrics, reference the [Visualization & Metrics](visualization.html) documentation.

---

## b) Low-Level API

Reference [Low-Level API Docs](api_low_level.ipynb) for more information including how to work with non-tabular data and defining optimizers.

In [11]:
dataset = aiqc.Dataset.Tabular.from_pandas(df)

In [12]:
label_column = 'object'

In [13]:
label = dataset.make_label(columns=[label_column])

In [14]:
labelcoder = label.make_labelcoder(sklearn_preprocess = LabelBinarizer(sparse_output=False))

In [15]:
feature = dataset.make_feature(exclude_columns=[label_column])

In [16]:
encoderset = feature.make_encoderset()

In [17]:
featurecoder_0 = encoderset.make_featurecoder(
    sklearn_preprocess = PowerTransformer(method='yeo-johnson', copy=False)
    , dtypes = ['float64']
)


___/ featurecoder_index: 0 \_________

=> The column(s) below matched your filter(s) featurecoder filters.

['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'aa', 'ab', 'ac', 'ad', 'ae', 'af', 'ag', 'ah', 'ai', 'aj', 'ak', 'al', 'am', 'an', 'ao', 'ap', 'aq', 'ar', 'as', 'at', 'au', 'av', 'aw', 'ax', 'ay', 'az', 'ba', 'bb', 'bc', 'bd', 'be', 'bf', 'bg', 'bh']

=> Done. All feature column(s) have featurecoder(s) associated with them.
No more Featurecoders can be added to this Encoderset.



In [18]:
splitset = aiqc.Splitset.make(
    feature_ids = [feature.id]
    , label_id = label.id
    , size_test = 0.22
    , size_validation = 0.12
)

In [19]:
def fn_build(features_shape, label_shape, **hp):
    model = Sequential(name='Sonar')
    model.add(Dense(hp['neuron_count'], activation='relu', kernel_initializer='he_uniform'))
    model.add(Dropout(0.30))
    model.add(Dense(hp['neuron_count'], activation='relu', kernel_initializer='he_uniform'))
    model.add(Dropout(0.30))
    model.add(Dense(hp['neuron_count'], activation='relu', kernel_initializer='he_uniform'))
    model.add(Dense(units=label_shape[0], activation='sigmoid', kernel_initializer='glorot_uniform'))
    return model

In [20]:
def fn_train(model, loser, optimizer, samples_train, samples_evaluate, **hp):
    model.compile(
        loss=loser
        , optimizer=optimizer
        , metrics=['accuracy']
    )
    model.fit(
        samples_train['features'], samples_train['labels']
        , validation_data = (samples_evaluate['features'], samples_evaluate['labels'])
        , verbose = 0
        , batch_size = 3
        , epochs = hp['epochs']
        , callbacks = [History()]
    )
    return model

In [21]:
algorithm = aiqc.Algorithm.make(
    library = "keras"
    , analysis_type = "classification_binary"
    , fn_build = fn_build
    , fn_train = fn_train
)

In [22]:
hyperparameters = {
    "neuron_count": [25, 50]
    , "epochs": [75, 150]
}

In [23]:
hyperparamset = algorithm.make_hyperparamset(
    hyperparameters = hyperparameters
)

In [24]:
queue = algorithm.make_queue(
    splitset_id = splitset.id
    , hyperparamset_id = hyperparamset.id
    , repeat_count = 2
)

In [25]:
queue.run_jobs()

ðŸ”® Training Models ðŸ”®: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 8/8 [01:08<00:00,  8.53s/it]


For more information on visualization of performance metrics, reference the [Visualization & Metrics](visualization.html) documentation.