In [None]:
import sklearn
import tensorflow as tf
from tensorflow import keras

print ( "sklearn: {}".format(sklearn.__version__) )
print ( "tensorflow: {}".format(tf.__version__) )

import os
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt

In [None]:
tf.config.experimental.list_physical_devices('GPU')

### Fashion MNIST dataset

In [None]:
(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.fashion_mnist.load_data()
X_train_full = X_train_full / 255.0
X_test = X_test / 255.0
X_valid, X_train = X_train_full[:5000], X_train_full[5000:]
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]

tf.random.set_seed(42)
np.random.seed(42)

In [None]:
print ( X_train.shape )
X_train[10]

In [None]:
i_entry = 20
plt.imshow( X_train[i_entry], cmap="binary" )
plt.axis('off')
plt.show()

In [None]:
class_names = ["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat",
               "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"]
class_names[ y_train[i_entry] ]

### Scale inputs

In [None]:
X_mean = X_train.mean( axis=0 )
X_std = X_train.std( axis=0 )
X_train_scaled = ( X_train - X_mean ) / X_std
X_valid_scaled = ( X_valid - X_mean ) / X_std
X_test_scaled  = ( X_test - X_mean ) / X_std
X_train_scaled[10]

### Define model build function

In [None]:
def build_model(n_hidden=1, n_neurons=50, learning_rate=5e-4, input_shape=[28,28], dropout=0.20):
    print( "Building model with:" )
    print( "Number of hidden layers: {}".format(n_hidden) )
    print( "Number of neurons per layer: {}".format(n_neurons) )
    print( "Learning rate: {}".format(learning_rate) )
    print( "Input shape: {}".format(input_shape) )
    print( "Dropout rate: {}".format(dropout) )
    
    model = keras.models.Sequential()
    model.add( keras.layers.Flatten(input_shape=input_shape) )
    for layer in range(n_hidden):
        if dropout > 0.:
            model.add( keras.layers.Dropout(rate=dropout) )
        model.add( keras.layers.Dense(n_neurons, activation="elu", kernel_initializer="he_normal") )
    if dropout > 0.:
        model.add( keras.layers.Dropout(rate=dropout) )    
    model.add( keras.layers.Dense(10, activation="softmax") )
    
    #optimizer = keras.optimizers.SGD(lr=learning_rate, momentum=0.9, nesterov=True)
    optimizer = keras.optimizers.Nadam(lr=learning_rate)
    model.compile( loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])
    
    return model

### Define training callbacks

In [None]:
def get_run_logdir(log_dir):
    import time
    run_id = time.strftime("run_%Y_%m_%d-%H_%M_%S")
    return os.path.join(log_dir, run_id)

In [None]:
def callbacks(patience=10, log_dir=""):
    callbacks_ = []
    # Early stopping
    if patience > 0:
        early_stopping_cb_ = keras.callbacks.EarlyStopping( patience=patience, restore_best_weights=True )
        callbacks_.append( early_stopping_cb_ )
        
    # TensorBoard
    if log_dir:
        run_logdir = get_run_logdir(log_dir)
        print ( "Log dir: {}".format(run_logdir) )
        tensorboard_cb_ = keras.callbacks.TensorBoard( run_logdir )
        callbacks_.append( tensorboard_cb_ )
    
    return callbacks_

In [None]:
log_dir="fashion_mnist_logs"
callbacks_ = callbacks(patience=10, log_dir=log_dir)
callbacks_

### Iterate over different learning rates

In [None]:
def find_max_learning_rate( lr_init=1e-4, lr_end=5e-2, steps=20, epochs=30, model_build_fn=build_model, *build_fn_args, **build_fn_kwargs ):
    results_ = {}
    results_['learning_rate'] = []
    results_['loss'] = []
    results_['accuracy'] = []
    results_['val_loss'] = []
    results_['val_accuracy'] = []
    c_ = (lr_end/lr_init) ** (1/steps)
    lr_ = lr_init
    for i_it in range( steps + 1 ):
        results_['learning_rate'].append( lr_ )
        model_ = model_build_fn( *build_fn_args, **build_fn_kwargs, learning_rate=lr_ )
        callbacks_ = callbacks(patience=10)
        history_ = model_.fit( X_train_scaled, y_train, epochs=epochs, validation_data=(X_valid_scaled, y_valid), callbacks=callbacks_ )
        results_['loss'].append( history_.history['loss'] )
        results_['accuracy'].append( history_.history['accuracy'] )
        results_['val_loss'].append( history_.history['val_loss'] )
        results_['val_accuracy'].append( history_.history['val_accuracy'] )
        # Update lerning rate
        lr_ = lr_ * c_
        
    return results_

In [None]:
results = find_max_learning_rate(
            lr_init=1e-4,
            lr_end=2e-2,
            steps=10,
            epochs=20,
            model_build_fn=build_model,
            n_hidden=5,
            n_neurons=100,
            input_shape=[28,28],
            dropout=0.40
            )

In [None]:
#pd.DataFrame( np.array( results['val_loss'] ).T,
#              columns=["lr_{}".format(lr_) for lr_ in np.round( results['learning_rate'], 4)]  ).plot( figsize=(12,10) )

epochs=20
metrics_ = 'val_loss'
columns=["lr_{}".format(lr_) for lr_ in np.round( results['learning_rate'], 4)] 
df = pd.DataFrame( np.full((epochs,len(columns)),np.nan), columns=columns ) 
for i_lr_,col_ in enumerate(columns):
    df[col_] = pd.Series( results[ metrics_ ][i_lr_] )
df.plot( figsize=(12,10) )
plt.yscale('log')
plt.show()

### Hyperparameter scan

In [None]:
#from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV

build_fn_ = lambda n_hidden, n_neurons: build_model(n_hidden, n_neurons, learning_rate=8e-4, input_shape=[28,28], dropout=0.40)

keras_clf = keras.wrappers.scikit_learn.KerasClassifier( build_fn_ )

param_grid = [
    { "n_hidden": np.arange(1,5),
      "n_neurons": [10,20,50,100] }
    ]

grid_search = GridSearchCV( keras_clf, param_grid, cv=4 )

callbacks_ = callbacks(patience=10)
print ( callbacks_ )

grid_search.fit( X_train_scaled, y_train, epochs=100, validation_data=(X_valid_scaled, y_valid), callbacks=callbacks_ )

### Build model

In [None]:
model = build_model( 
    n_hidden=5,
    n_neurons=100,
    learning_rate=8e-4,
    input_shape=[28,28],
    dropout=0.40
    )
model.summary()

In [None]:
log_dir="fashion_mnist_logs"
callbacks_ = callbacks(patience=10, log_dir=log_dir)
print ( callbacks_ )

history = model.fit( X_train_scaled, y_train, epochs=100, validation_data=(X_valid_scaled, y_valid), callbacks=callbacks_ )
history.history

In [None]:
pd.DataFrame( history.history ).plot( figsize=(12,10) )

### Evaluate on training data (without dropout)

In [None]:
model.evaluate( X_train_scaled, y_train )

In [None]:
help(keras.wrappers.scikit_learn.KerasClassifier)