# Install, Import, and Log In

In [None]:
import os
import random

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.datasets import fashion_mnist

# Set the random seeds
os.environ['TF_CUDNN_DETERMINISTIC'] = '1' 
random.seed(hash("setting random seeds") % 2**32 - 1)
np.random.seed(hash("improves reproducibility") % 2**32 - 1)
tf.random.set_seed(hash("by removing stochasticity") % 2**32 - 1)

### Step 0: Install W&B

In [11]:
%%capture
!pip install wandb

### Step 1: Import W&B and Login

In [None]:
import wandb
from wandb.keras import WandbCallback

project_name = 'project-fashion-mnist'
# %env WANDB_API_KEY='1af04e33e3d441eb82eb612e2c001eddec29bccb'

wandb.login(relogin=True)

> Side note: If this is your first time using W&B or you are not logged in, the link that appears after running `wandb.login` will take you to sign-up/login page. Signing up is easy!

# Download and Prepare the Dataset


In [105]:
from class_names import class_names

(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

# Subsetting train data and normalizing to [0., 1.]
x_train, x_test = x_train / 255., x_test / 255.

input_shape = (28, 28, 1)
num_classes = len(class_names)

print('Shape of x_train: ', x_train.shape)
print('Shape of y_train: ', y_train.shape)
print('Shape of x_test: ', x_test.shape)
print('Shape of y_test: ', y_test.shape)
print('Num of classes: ', num_classes)

Shape of x_train:  (60000, 28, 28)
Shape of y_train:  (60000,)
Shape of x_test:  (10000, 28, 28)
Shape of y_test:  (10000,)
Num of classes:  10


array([[[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]],

       ...,

       [[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0.

# Define the Model

Here, we define a standard CNN (with convolution and max-pooling) in Keras.

In [None]:
def Model():
    inputs = keras.layers.Input(shape=input_shape)

    x = keras.layers.Conv2D(filters=28, kernel_size=(3, 3), activation='relu')(inputs)
    x = keras.layers.Conv2D(filters=28, kernel_size=(3, 3), activation='relu')(x)
    x = keras.layers.MaxPooling2D(pool_size=2)(x)

    x = keras.layers.Conv2D(filters=28, kernel_size=(3, 3), activation='relu')(x)
    x = keras.layers.Conv2D(filters=28, kernel_size=(3, 3), activation='relu')(x)
    x = keras.layers.GlobalAveragePooling2D()(x)

    x = keras.layers.Dense(128, activation='relu')(x)
    x = keras.layers.Dense(28, activation='relu')(x)

    outputs = keras.layers.Dense(num_classes, activation='softmax')(x)

    return keras.models.Model(inputs=inputs, outputs=outputs, )

# Train the Model

In [None]:
# Initialize wandb with your project name
run = wandb.init(project=project_name,
                 config={
                     "learning_rate": 0.001,
                     "epochs": 5,
                     "batch_size": 32,
                     "loss_function": "sparse_categorical_crossentropy",
                     "architecture": "CNN",
                     "dataset": "fashion_mnist"
                 })
config = wandb.config  # We'll use this to configure our experiment

# Initialize model like you usually do.
tf.keras.backend.clear_session()
model = Model()
model.summary()

# Compile model like you usually do.
optimizer = tf.keras.optimizers.Adam(config.learning_rate) 
model.compile(optimizer, config.loss_function, metrics=['acc'])

In [None]:
# We focus on a subset of images, since this is for human review
val_images, val_labels = x_test[:32], y_test[:32]

_ = model.fit(x_train, y_train,
              epochs=config.epochs, 
              batch_size=config.batch_size,
              validation_data=(x_test, y_test),
              callbacks=[WandbCallback(data_type='image', 
                                       validation_data=(val_images, val_labels), 
                                       labels=class_names)])

run.finish()

In [None]:
model.save('./fashion_mnist.hd5')

# Test Hyperparameters with Sweeps

In [None]:
# sweep_configuration = {
#     'method': 'bayes',
#     'name': 'sweep',
#     'metric': {
#         'goal': 'minimize', 
#         'name': 'best_epoch'
#     },
#     'parameters': {
#         'loss_function': {
#             'values': ['sparse_categorical_crossentropy'],
#             'distribution': 'categorical'
#         },
#         'epochs': {
#             'max': 20,
#             'min': 5,
#             'distribution': 'int_uniform'
#         },
#         'learning_rate': {
#             'max': 0.006,
#             'min': 0.0015,
#             'distribution': 'uniform'
#         },
#         'batch_size': {
#             'max': 2048,
#             'min': 512,
#             'distribution': 'int_uniform'
#         }
#      }
# }

# sweep_id = wandb.sweep(sweep=sweep_configuration, project=project_name)
# wandb.agent(f'eshcharc/{project_name}/{sweep_id}')

In [146]:
import keras_tuner as kt

def model_builder(hp):
  hp_conv_units = hp.Int('hp_conv_units', min_value=32, max_value=128, step=16)
  hp_kernel = hp.Int('hp_kernel', min_value=3, max_value=10, step=1)
  hp_dropout_rate = hp.Choice('hp_dropout_rate', [0.25, 0.5, 0.75])
  hp_pool_size = hp.Int('hp_pool_size', min_value=3, max_value=5)
  hp_dense_units = hp.Int('hp_dense_units', min_value=100, max_value=200, step=10)
  hp_learning_rate = hp.Float('learning_rate', min_value=0.001, max_value=0.005, sampling="log")
  ht_activation = hp.Choice("activation", ["relu", "tanh"])

  model = keras.Sequential([
    keras.layers.Input(shape=input_shape),

    keras.layers.Conv2D(filters=hp_conv_units, kernel_size=hp_kernel, activation=ht_activation),
    # keras.layers.Dropout(rate=hp_dropout_rate),
    keras.layers.Conv2D(filters=hp_conv_units, kernel_size=hp_kernel, activation=ht_activation),
    # keras.layers.Dropout(rate=hp_dropout_rate),
    # keras.layers.MaxPooling2D(pool_size=hp_pool_size),

    keras.layers.Conv2D(filters=hp_conv_units, kernel_size=hp_kernel, activation=ht_activation),
    # keras.layers.Dropout(rate=hp_dropout_rate),
    keras.layers.Conv2D(filters=hp_conv_units, kernel_size=hp_kernel, activation=ht_activation),
    # keras.layers.Dropout(rate=hp_dropout_rate),
    keras.layers.GlobalAveragePooling2D(),

    keras.layers.Dense(units=hp_dense_units, activation='softmax')
  ])

  model.compile(optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy'])
  
  return model

model = model_builder(kt.HyperParameters())
# model.summary()

In [150]:
tuner = kt.Hyperband(model_builder,
    objective='val_accuracy',
    max_epochs=10,
    factor=3,
    directory='hp_search',
    project_name='fashion_item_predict2')

stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)

tuner.search(x_train[:300], y_train[:300], epochs=10, validation_split=0.2, callbacks=[stop_early])

# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]
print(best_hps.get_config().values)

INFO:tensorflow:Reloading Oracle from existing project hp_search\fashion_item_predict2\oracle.json
INFO:tensorflow:Reloading Tuner from hp_search\fashion_item_predict2\tuner0.json
INFO:tensorflow:Oracle triggered exit
<built-in method values of dict object at 0x0000026F3E038280>


In [None]:
model = tuner.hypermodel.build(best_hps)
history = model.fit(x_train, y_train, epochs=50, validation_split=0.2)

val_acc_per_epoch = history.history['val_accuracy']
best_epoch = val_acc_per_epoch.index(max(val_acc_per_epoch)) + 1
print('Best epoch: %d' % (best_epoch,))