# Course 4 - Project - Part 7: Dense network

<a name="top-7"></a>
This notebook is concerned with *Part 7: Dense network*.

**Contents:**
* [Step 0: Loading data](#step-7.0)
* [Step 1: 1-layer dense network](#step-7.1)
* [Step 2: 2-layer dense network](#step-7.2)

## Step 0: Loading data<a name="step-7.0"></a> ([top](#top-7))
---

We load the training set with the extracted high-level features.

In [1]:
# Standard library.
import os
import pathlib
import typing as T

# 3rd party.
import numpy as np

# Project.
import utils

In [2]:
separator = ''.center(80, '-')

path_train = pathlib.Path.cwd() / 'data' / 'swissroads-features-train.npz'
data_train = utils.load(path_train)
print(separator)
print(f'Dataset: train\n{utils.info(data_train)}')

path_valid = pathlib.Path.cwd() / 'data' / 'swissroads-features-valid.npz'
data_valid = utils.load(path_valid)
print(separator)
print(f'Dataset: valid\n{utils.info(data_valid)}')

path_test = pathlib.Path.cwd() / 'data' / 'swissroads-features-test.npz'
data_test = utils.load(path_test)
print(separator)
print(f'Dataset: test\n{utils.info(data_test)}')

--------------------------------------------------------------------------------
Dataset: train
data: shape=(280, 224, 224, 3), dtype=float32
label_idxs: shape=(280,), dtype=int64
label_strs: shape=(6,), dtype=<U10
names: shape=(280,), dtype=<U19
features: shape=(280, 1280), dtype=float32
--------------------------------------------------------------------------------
Dataset: valid
data: shape=(139, 224, 224, 3), dtype=float32
label_idxs: shape=(139,), dtype=int64
label_strs: shape=(6,), dtype=<U10
names: shape=(139,), dtype=<U19
features: shape=(139, 1280), dtype=float32
--------------------------------------------------------------------------------
Dataset: test
data: shape=(50, 224, 224, 3), dtype=float32
label_idxs: shape=(50,), dtype=int64
label_strs: shape=(6,), dtype=<U10
names: shape=(50,), dtype=<U19
features: shape=(50, 1280), dtype=float32


In [3]:
label_strs = data_train['label_strs']  # Same for all data sets.
assert (
    np.all(data_train['label_strs'] == data_valid['label_strs']) and
    np.all(data_train['label_strs'] == data_test['label_strs'])
)

X_train = data_train['data']
y_train = data_train['label_idxs']
F_train = data_train['features']
N_train = data_train['names']

X_valid = data_valid['data']
y_valid = data_valid['label_idxs']
F_valid = data_valid['features']
N_valid = data_train['names']

X_test = data_test['data']
y_test = data_test['label_idxs']
F_test = data_test['features']
N_test = data_test['names']

## Step 1: 1-layer dense network<a name="step-7.1"></a> ([top](#top-7))
---

In [91]:
import pandas as pd

In [5]:
RANDOM_SEED = 0

In [146]:
import tensorflow as tf
import tensorflow.keras as keras
from sklearn.model_selection import ParameterGrid
from tensorflow.keras import activations
from tensorflow.keras import initializers
from tensorflow.keras import losses
from tensorflow.keras import metrics
from tensorflow.keras import optimizers

# Create and com.
n_classes = len(label_strs)
    

def build_model(dropout_rate: float = 0.0, l2_alpha: float = 0.01):
    """\
    Build, compiles and returns a Keras model.
    
    .. seealso:: https://keras.io/scikit-learn-api/
    """
    # Create model.
    model = keras.Sequential()
    
    # This makes building a network easier.
    model.add(keras.layers.InputLayer(input_shape=(1280,)))

    # Add drop-out layer.
    model.add(keras.layers.Dropout(dropout_rate, seed=RANDOM_SEED))

    # Add output layer.
    model.add(keras.layers.Dense(
        units=n_classes, activation=activations.softmax,
        kernel_initializer=initializers.VarianceScaling(scale=1.0, seed=RANDOM_SEED),
        kernel_regularizer=keras.regularizers.l2(l=l2_alpha)
    ))

    # Print network summary.
#     model.summary()
    
    # Compile the model.
    model.compile(
        optimizer=optimizers.Adam(),  # use defaults
        loss=losses.sparse_categorical_crossentropy,
        metrics=['acc']  # cannot use metrics.sparse_categorical_accuracy
    )
    
    return model
    

model = keras.wrappers.scikit_learn.KerasClassifier(build_fn=build_model)

In [147]:
# Define the grid of values to search.
param_grid = {
    'dropout_rate': [0.0, 0.1],#, 0.2, 0.3, 0.4, 0.5],  # disable: 0.0
    'l2_alpha': [0.0, 0.001, 0.01, 0.1],  # disable: 0.0, default: 0.01
}

# End training when accuracy stops improving (optional).
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, verbose=0)

In [148]:
# from sklearn.model_selection import GridSearchCV

# gscv = GridSearchCV(model, param_grid, iid=False, cv=3)
# gscv.fit(F_train, y_train, batch_size=32, epochs=100, verbose=0,
#          callbacks=[early_stopping],
#          validation_data=(F_valid, y_valid), shuffle=True)

In [149]:
# pd.DataFrame(gscv.cv_results_)

In [150]:
from sklearn.utils.class_weight import compute_class_weight

y_classes = np.unique(y_train)
class_weight_ = dict(zip(y_classes, compute_class_weight('balanced', y_classes, y_train)))
class_weight_

{0: 0.7070707070707071,
 1: 0.7291666666666666,
 2: 0.9150326797385621,
 3: 1.4583333333333333,
 4: 1.1111111111111112,
 5: 1.8666666666666667}

In [151]:
gs_results = []

grid = ParameterGrid(param_grid)

for i, params_dict in enumerate(grid, start=1):
    print(f'\rconfiguration: {i}', end='', flush=True)
    
    # Set the parameters.
    model.set_params(**params_dict)
    
    # Fit the clasifier to the data.
    history = model.fit(
        F_train, y_train, batch_size=32, epochs=100, verbose=0,
        callbacks=[early_stopping],
        validation_data=(F_valid, y_valid), shuffle=True, class_weight=class_weight_)
    
    
    # Store the scores.
    gs_result = params_dict
    # Due to dropout, etc. this will not be exactly the same as mode.score(F_train, y_train).
#     gs_result['train_accuracy'] = history.history['acc'][-1]
    gs_result['train_accuracy'] = model.score(F_train, y_train)
#     gs_result['valid_accuracy'] = history.history['val_acc'][-1]
    gs_result['valid_accuracy'] = model.score(F_valid, y_valid)
    
    # Save result.
    gs_results.append(gs_result)



In [130]:
# Convert results to a data frame.
df_results = (pd
              .DataFrame(gs_results)
              .sort_values(by='valid_accuracy', ascending=False)
             )
df_results.head()

Unnamed: 0,dropout_rate,l2_alpha,train_accuracy,valid_accuracy
7,0.1,0.1,1.0,0.913669
4,0.1,0.0,1.0,0.906475
6,0.1,0.01,1.0,0.906475
2,0.0,0.01,1.0,0.906475
1,0.0,0.001,1.0,0.899281


**Comment:** The best result is 91.3 % accuracy on the test set.

We want to use a random forest classifier.

In this part it makes sense to merge the training and the validation sets for cross-validation (since we would not make use of the validation set otherwise).

## Step 2: 2-layer dense network<a name="step-7.2"></a> ([top](#top-7))
---

In [133]:
def build_model2(input_shape=(1280,), n_hidden=1, n_hidden_neurons=30, n_classes=n_classes, dropout_rate: float = 0.0, l2_alpha: float = 0.0):
    """\
    Build, compiles and returns a Keras model.
    
    .. seealso:: https://keras.io/scikit-learn-api/
    """
    # Create model.
    model = keras.Sequential()

    # Add drop-out layer.
    model.add(keras.layers.Dropout(dropout_rate, input_shape=(1280,), seed=RANDOM_SEED))

    # Add output layer.
    model.add(keras.layers.Dense(
        units=n_classes, activation=activations.softmax,
        kernel_initializer=initializers.VarianceScaling(scale=1.0, seed=RANDOM_SEED),
        kernel_regularizer=keras.regularizers.l2(l=l2_alpha)
    ))

    # Print network summary.
#     model.summary()
    
    # Compile the model.
    model.compile(
        optimizer=optimizers.Adam(),  # use defaults
        loss=losses.sparse_categorical_crossentropy,
        metrics=['acc']  # cannot use metrics.sparse_categorical_accuracy
    )
    
    return model
    

model = keras.wrappers.scikit_learn.KerasClassifier(build_fn=build_model)

We want to tune the regularization strength of the logistic regression classifier with cross-validated grid search.

**Note:** We have imbalanced classes (e.g. 22.63% bike vs. 8.83% van).

In [None]:
df_counts = (pd
 .DataFrame(data=pd.Series(data=y_train_large).value_counts(), columns=['count'])
 .set_index(label_strs)
)
df_counts['fraction'] = df_counts['count'] / df_counts['count'].sum()
df_counts.style.format({'fraction': '{:.2%}'})

In [None]:
# Create the estimator.
svm_pipe = Pipeline([
    ('svm', LinearSVC(random_state=RANDOM_STATE)),
])

In [None]:
Cs = np.logspace(-4, 4, num=2 * 8 + 1)  # C defaults to 1.0.
gammas = [0.01, 0.1, 1.0, 10.0, 'scale']

# Setup the cross-validated grid search.
grid = [
    # LinearSVC (minize: squared hinge loss, strategy: one-vs-rest)
    {
        'svm__C': Cs,
        'svm__class_weight':[None, 'balanced']
    },
    # SVC (kernel: linear, minimize: hinge loss, strategy: one-vs-one)
    {
        'svm': [SVC(random_state=RANDOM_STATE)],
        'svm__kernel': ['linear'],
        'svm__C': Cs,
        'svm__class_weight':[None, 'balanced']
    },
    # SVC (kernel: RBF, minimize: hinge loss, strategy: one-vs-one)
    {
        'svm': [SVC(random_state=RANDOM_STATE)],
        'svm__kernel': ['rbf'],
        'svm__C': Cs,
        'svm__gamma': gammas,
        'svm__class_weight':[None, 'balanced']
    }
]

cv = StratifiedKFold(n_splits=10, random_state=RANDOM_STATE)
svm_gscv = GridSearchCV(svm_pipe, grid, n_jobs=-1, iid=False, refit=True, cv=cv, return_train_score=True)

In [None]:
# Fit/evaluate the estimator.
svm_gscv.fit(F_train_large, y_train_large);

In [None]:
# Collect results in a data frame.
df_results = (pd
    .DataFrame({
        'svm': svm_gscv.cv_results_['param_svm'],
        'kernel': svm_gscv.cv_results_['param_svm__kernel'],
        'C': svm_gscv.cv_results_['param_svm__C'],
        'gamma': svm_gscv.cv_results_['param_svm__gamma'],
        'class_weight': svm_gscv.cv_results_['param_svm__class_weight'],
        'mean_train_score': svm_gscv.cv_results_['mean_train_score'],
        'mean_test_score': svm_gscv.cv_results_['mean_test_score'],
        'std_test_score': svm_gscv.cv_results_['std_test_score'],
        'params': svm_gscv.cv_results_['params']
    })
    .sort_values(by='mean_test_score', ascending=False)
)

In [None]:
df_results.head()