# Install, import and initialize
First, we shall import basic libraries for handling dataset and training the model

In [1]:
import os
import random

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
import keras
from keras.datasets import fashion_mnist

# Set the random seeds
os.environ['TF_CUDNN_DETERMINISTIC'] = '1' 
random.seed(hash("setting random seeds") % 2**32 - 1)
np.random.seed(hash("improves reproducibility") % 2**32 - 1)
tf.random.set_seed(hash("by removing stochasticity") % 2**32 - 1)

# Import W&B and login
Login the Weights & bias service with the given token

In [2]:
import wandb
from wandb.keras import WandbCallback

project_name = 'project-fashion-mnist'
# %env WANDB_API_KEY='1af04e33e3d441eb82eb612e2c001eddec29bccb'

wandb.login(relogin=True)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: C:\Users\Avni Eshchar/.netrc


True

# Download and prepare the dataset
We download the dataset and normalize it to the 0-1 range

In [3]:
from class_names import class_names

(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

# Subsetting train data and normalizing to [0., 1.]
x_train, x_test = x_train / 255., x_test / 255.
input_shape = (28, 28, 1)
num_classes = len(class_names)

print('Shape of x_train: ', x_train.shape)
print('Shape of y_train: ', y_train.shape)
print('Shape of x_test: ', x_test.shape)
print('Shape of y_test: ', y_test.shape)

Shape of x_train:  (60000, 28, 28)
Shape of y_train:  (60000,)
Shape of x_test:  (10000, 28, 28)
Shape of y_test:  (10000,)


# Define a model

Here, we define a standard CNN (with convolution and max-pooling) in Keras.

In [4]:
def Model():
    inputs = keras.layers.Input(shape=input_shape)

    x = keras.layers.Conv2D(filters=28, kernel_size=(3, 3), activation='relu')(inputs)
    x = keras.layers.Conv2D(filters=28, kernel_size=(3, 3), activation='relu')(x)
    x = keras.layers.MaxPooling2D(pool_size=2)(x)

    x = keras.layers.Conv2D(filters=28, kernel_size=(3, 3), activation='relu')(x)
    x = keras.layers.Conv2D(filters=28, kernel_size=(3, 3), activation='relu')(x)
    x = keras.layers.GlobalAveragePooling2D()(x)

    x = keras.layers.Dense(128, activation='relu')(x)
    x = keras.layers.Dense(28, activation='relu')(x)

    outputs = keras.layers.Dense(num_classes, activation='softmax')(x)

    return keras.models.Model(inputs=inputs, outputs=outputs, )

# Train the model

In [7]:
# Initialize wandb with your project name
run = wandb.init(project=project_name,
                 config={
                     "learning_rate": 0.001,
                     "epochs": 5,
                     "batch_size": 32,
                     "loss_function": "sparse_categorical_crossentropy",
                     "architecture": "CNN",
                     "dataset": "fashion_mnist"
                 })
config = wandb.config  # We'll use this to configure our experiment

# Initialize model like you usually do.
keras.backend.clear_session()
model = Model()
model.summary()

# Compile model like you usually do.
optimizer = tf.keras.optimizers.Adam(config.learning_rate) 
model.compile(optimizer, config.loss_function, metrics=['acc'])

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 28, 28, 1)]       0         
                                                                 
 conv2d (Conv2D)             (None, 26, 26, 28)        280       
                                                                 
 conv2d_1 (Conv2D)           (None, 24, 24, 28)        7084      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 12, 12, 28)       0         
 )                                                               
                                                                 
 conv2d_2 (Conv2D)           (None, 10, 10, 28)        7084      
                                                                 
 conv2d_3 (Conv2D)           (None, 8, 8, 28)          7084      
                                                             

In [8]:
# We focus on a subset of images, since this is for human review
val_images, val_labels = x_test[:32], y_test[:32]

_ = model.fit(x_train, y_train,
              epochs=config.epochs, 
              batch_size=config.batch_size,
              validation_data=(x_test, y_test),
              callbacks=[WandbCallback(data_type='image', 
                                       validation_data=(val_images, val_labels), 
                                       labels=class_names)])

run.finish()



Epoch 1/5




INFO:tensorflow:Assets written to: d:\dev\elbit\ML-course\project-2\wandb\run-20230110_195259-13niyo3n\files\model-best\assets


INFO:tensorflow:Assets written to: d:\dev\elbit\ML-course\project-2\wandb\run-20230110_195259-13niyo3n\files\model-best\assets
[34m[1mwandb[0m: Adding directory to artifact (d:\dev\elbit\ML-course\project-2\wandb\run-20230110_195259-13niyo3n\files\model-best)... Done. 0.0s


Epoch 2/5




INFO:tensorflow:Assets written to: d:\dev\elbit\ML-course\project-2\wandb\run-20230110_195259-13niyo3n\files\model-best\assets


INFO:tensorflow:Assets written to: d:\dev\elbit\ML-course\project-2\wandb\run-20230110_195259-13niyo3n\files\model-best\assets
[34m[1mwandb[0m: Adding directory to artifact (d:\dev\elbit\ML-course\project-2\wandb\run-20230110_195259-13niyo3n\files\model-best)... Done. 0.0s


Epoch 3/5




INFO:tensorflow:Assets written to: d:\dev\elbit\ML-course\project-2\wandb\run-20230110_195259-13niyo3n\files\model-best\assets


INFO:tensorflow:Assets written to: d:\dev\elbit\ML-course\project-2\wandb\run-20230110_195259-13niyo3n\files\model-best\assets
[34m[1mwandb[0m: Adding directory to artifact (d:\dev\elbit\ML-course\project-2\wandb\run-20230110_195259-13niyo3n\files\model-best)... Done. 0.0s


Epoch 4/5




INFO:tensorflow:Assets written to: d:\dev\elbit\ML-course\project-2\wandb\run-20230110_195259-13niyo3n\files\model-best\assets


INFO:tensorflow:Assets written to: d:\dev\elbit\ML-course\project-2\wandb\run-20230110_195259-13niyo3n\files\model-best\assets
[34m[1mwandb[0m: Adding directory to artifact (d:\dev\elbit\ML-course\project-2\wandb\run-20230110_195259-13niyo3n\files\model-best)... Done. 0.0s


Epoch 5/5




INFO:tensorflow:Assets written to: d:\dev\elbit\ML-course\project-2\wandb\run-20230110_195259-13niyo3n\files\model-best\assets


INFO:tensorflow:Assets written to: d:\dev\elbit\ML-course\project-2\wandb\run-20230110_195259-13niyo3n\files\model-best\assets
[34m[1mwandb[0m: Adding directory to artifact (d:\dev\elbit\ML-course\project-2\wandb\run-20230110_195259-13niyo3n\files\model-best)... Done. 0.0s




0,1
acc,▁▅▇▇█
epoch,▁▃▅▆█
loss,█▄▃▂▁
val_acc,▁▃▅▇█
val_loss,█▆▃▂▁

0,1
acc,0.86772
best_epoch,4.0
best_val_loss,0.35881
epoch,4.0
loss,0.35585
val_acc,0.8714
val_loss,0.35881


## Hyperparameters search
At start, there was an attempt to use W&B swip for hyper search, but it failed every time, probably of configurations issues.

So, I investigated for most recommended tuners, and came up with keras-tuner as one of the them.

Model is declared in a model_builder function among with hyper parameters to tweek on.

In [10]:
import keras_tuner as kt
import keras
from keras.layers import Input, Conv2D, MaxPooling2D, Dropout, Dense, Flatten
from keras.losses import SparseCategoricalCrossentropy

def model_builder(hp):
  hp_conv_units = hp.Int('hp_conv_units', min_value=32, max_value=64, step=4)
  hp_kernel = hp.Int('hp_kernel', min_value=3, max_value=6, step=1)
  hp_pool_size = hp.Int('hp_pool_size', min_value=3, max_value=5)
  hp_dense_units = hp.Int('hp_dense_units', min_value=100, max_value=200, step=10)
  hp_learning_rate = hp.Float('learning_rate', min_value=0.001, max_value=0.01, sampling="log")
  ht_activation = hp.Choice("activation", ["relu", "tanh"])

  model = keras.Sequential([
    Input(shape=input_shape),

    Conv2D(filters=hp_conv_units, kernel_size=hp_kernel, activation=ht_activation, padding="SAME"),
    MaxPooling2D(pool_size=hp_pool_size, padding="SAME"),

    Conv2D(filters=hp_conv_units*2, kernel_size=hp_kernel*2, activation=ht_activation, padding="SAME"),
    MaxPooling2D(pool_size=hp_pool_size, padding="SAME"),

    Conv2D(filters=hp_conv_units*4, kernel_size=hp_kernel*4, activation=ht_activation, padding="SAME"),
    MaxPooling2D(pool_size=hp_pool_size, padding="SAME"),

    Flatten(),
    Dense(units=hp_dense_units, activation='relu'),
    Dense(units=num_classes, activation='softmax')
  ])

  model.compile(optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
                loss=tf.keras.losses.SparseCategoricalCrossentropy(
                      from_logits=False,
                      ignore_class=None,
                      name='sparse_categorical_crossentropy'
                ),
                metrics=['accuracy'])
  
  return model

model = model_builder(kt.HyperParameters())
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_4 (Conv2D)           (None, 28, 28, 32)        320       
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 10, 10, 32)       0         
 2D)                                                             
                                                                 
 conv2d_5 (Conv2D)           (None, 10, 10, 64)        73792     
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 4, 4, 64)         0         
 2D)                                                             
                                                                 
 conv2d_6 (Conv2D)           (None, 4, 4, 128)         1179776   
                                                                 
 max_pooling2d_3 (MaxPooling  (None, 2, 2, 128)        0

## Perform a Hyper parameters search
In the beginning, I used the whole dataset for hyper searching params. That took quite a long time (about 30+ hours).

I realized that in ML, it is always a question of available time and computer resources, which (in my case) I had neither.

Actually, in hyper search, I only need to reach the best parameters' set, not to have the most trained model (which should come later).

I guessed that, having working with only a subset of the data that was able to converge into a learning run, I could achieve that goal.

So, multiplying dataset by a fraction of my choice, allowed my to gain some time and be able to run several tweeking cycles of the model.

In [9]:
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint

tuner = kt.Hyperband(model_builder,
    objective='val_accuracy',
    max_epochs=10,
    directory='hp_search',
    overwrite=True,
    project_name='predict_fashion_item')

es = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=3)
mc = ModelCheckpoint('./fashion_mnist.hd5', monitor='val_accuracy', verbose=1)
dataset_fraction = 1 / 24
idx  = np.random.choice(len(x_train), int(len(x_train) * dataset_fraction), replace=False)
X_samples = x_train[idx]
y_samples = y_train[idx]

tuner.search(X_samples, y_samples, epochs=10, validation_split=0.2, callbacks=[es, mc])

# Get the optimal model with best params
model = tuner.get_best_models()[0]

NameError: name 'kt' is not defined

# Fit model with the whole dataset based on best params

In [7]:
# model = tuner.hypermodel.build(best_hps)
history = model.fit(x_train, y_train, epochs=50, batch_size=32, validation_split=0.2, callbacks=[es])

val_acc_per_epoch = history.history['val_accuracy']
best_epoch = val_acc_per_epoch.index(max(val_acc_per_epoch)) + 1
print('Best epoch: %d' % (best_epoch,))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Best epoch: 6


# Evaluate model

In [8]:
# Evaluate the model on the test data using `evaluate`
print("Evaluate on test data")
results = model.evaluate(x_test, y_test, batch_size=128)
print("test loss, test acc:", results)

Evaluate on test data
test loss, test acc: [0.3160328269004822, 0.9085999727249146]


# Save Trained Best Model

In [9]:
model.save('./fashion_mnist.hd5')



INFO:tensorflow:Assets written to: ./fashion_mnist.hd5\assets


INFO:tensorflow:Assets written to: ./fashion_mnist.hd5\assets
