# EfficientNet

This notebook trains a network based on the EfficientNet B0 architecture


After training, model is serialized and uploaded to W&B project.

In [None]:
import wandb
import pathlib
import shutil
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from typing import List


def load_data(run) -> List[tf.data.Dataset]:
    """
    Downloads datasets from a wandb artifact and loads them into a list of tf.data.Datasets.
    """

    artifact_name = f"letters_splits_tfds"
    artifact = run.use_artifact(f"master-thesis/{artifact_name}:latest")
    artifact_dir = pathlib.Path(
        f"./artifacts/{artifact.name.replace(':', '-')}"
    ).resolve()
    if not artifact_dir.exists():
        artifact_dir = artifact.download()
        artifact_dir = pathlib.Path(artifact_dir).resolve()

    # if tf.__version__ minor is less than 10, use
    # tf.data.experimental.load instead of tf.data.Dataset.load

    if int(tf.__version__.split(".")[1]) < 10:
        load_function = tf.data.experimental.load
    else:
        load_function = tf.data.Dataset.load
    
    output_list = []
    for split in ["train", "test", "val"]:
        ds = load_function(str(artifact_dir / split), compression="GZIP")
        output_list.append(ds)
    
    return output_list


def get_number_of_classes(ds: tf.data.Dataset) -> int:
    """
    Returns the number of classes in a dataset.
    """
    labels_iterator= ds.map(lambda x, y: y).as_numpy_iterator()
    labels = np.concatenate(list(labels_iterator))
    return len(np.unique(labels))


def preprocess_dataset(ds: tf.data.Dataset, batch_size: int, cache: bool = True) -> tf.data.Dataset:
    ds = ds.map(lambda x, y: (tf.cast(x, tf.float32) / 255.0, y))  # normalize
    ds = ds.unbatch().batch(batch_size)
    if cache:
        ds = ds.cache().prefetch(buffer_size=tf.data.AUTOTUNE)
    return ds


def calculate_model_size_on_disk(path: str) -> int:
    return pathlib.Path(path).stat().st_size


def calculate_model_num_parameters(model: tf.keras.Model) -> int:
    return model.count_params()


def calculate_model_flops() -> str:
    pass


def plot_history(history):
    fig, ax = plt.subplots(1, 1, figsize=(15, 10))
    epochs = range(1, len(history.history["loss"]) + 1)
    ax.plot(epochs, history.history["accuracy"], label="accuracy")
    ax.plot(epochs, history.history["val_accuracy"], label="val_accuracy")
    ax.set_xlabel("Epoch")
    ax.set_ylabel("Accuracy")
    ax.legend(loc="lower right")

    plt.show()

def evaluate_model(model, ds_test, model_name):
    """
    Evaluate model test loss, accuracy and other characteristics then log to wandb
    """
    flops = wandb.run.summary["GFLOPs"]
    disk_size = calculate_model_size_on_disk(f"./artifacts/{model_name}.h5")
    num_parameters = calculate_model_num_parameters(model)

    # evaluate model on ds_test and log to wandb
    test_loss, test_acc = model.evaluate(ds_test)

    wandb.log(
        {
            "test loss": test_loss,
            "test accuracy": test_acc,
            "number of parameters": num_parameters,
            "disk size": disk_size,
            "model flops": flops,
        }
    )


def evaluate_diacritics_performance(model, ds_test):
    """
    Evaluate model test loss, accuracy on letters with diacritics then log to wandb
    """
    diacritics = {
        62: "ą",
        63: "ć",
        64: "ę",
        65: "ł",
        66: "ń",
        67: "ó",
        68: "ś",
        69: "ź",
        70: "ż",
        71: "Ą",
        72: "Ć",
        73: "Ę",
        74: "Ł",
        75: "Ń",
        76: "Ó",
        77: "Ś",
        78: "Ź",
        79: "Ż",
    }

    # log test accuracy on these classes separately to wandb

    diacritics_acc = {}
    for diacritic_label in diacritics.keys():
        ds_test_diacritic = ds_test.filter(lambda x, y: y == diacritic_label)
        test_loss, test_acc = model.evaluate(ds_test_diacritic)
        diacritics_acc[diacritic_label] = {
            "loss": test_loss,
            "accuracy": test_acc,
            "label": diacritics[diacritic_label],
        }

    wandb.log(diacritics_acc)


In [None]:
# Set mixed precision policy for faster training
tf.keras.mixed_precision.set_global_policy('mixed_float16')

In [None]:
defaults = dict(
    batch_size=32*4,
    epochs=100,    
    optimizer="sgd",
    learning_rate=0.01,
    momentum=0.9,
    efficientnet_name="b3",
    efficientnet_version=2
)

MODEL_NAME = f"efficientnetv2-{defaults['efficientnet_name']}"
run = wandb.init(project="master-thesis", job_type="training", name=MODEL_NAME, config=defaults,)

# hyperparameters

opt_name = wandb.config.optimizer
lr = wandb.config.learning_rate
momentum = wandb.config.momentum
bs = wandb.config.batch_size
epochs = wandb.config.epochs
efficientnet_name = wandb.config.efficientnet_name
efficientnet_version = wandb.config.efficientnet_version

In [None]:
ds_train, ds_test, ds_val = load_data(run)

num_classes = get_number_of_classes(ds_val)

print(f"There are {num_classes} classes")
print(f"Training set has {len(ds_train)} batches")
print(f"Test set has {len(ds_test)} batches")
print(f"Validation set has {len(ds_val)} batches")

ds_train = ds_train.unbatch().batch(bs).cache().prefetch(buffer_size=tf.data.AUTOTUNE)
ds_test = ds_test.unbatch().batch(bs).cache().prefetch(buffer_size=tf.data.AUTOTUNE)
ds_val = ds_val.unbatch().batch(bs)

In [None]:
from tensorflow.keras.applications import efficientnet_v2, efficientnet

def get_efficientnet_model(input_shape, num_classes, model_name="b0", efficientnet_version = 2 ) -> tf.keras.Sequential:
    base_model = None
    effnet_module = None

    if efficientnet_version == 2:
        effnet_module = efficientnet_v2
    else:
        effnet_module = efficientnet
    
    if model_name == "b0":
        base_model = effnet_module.EfficientNetV2B0(include_top=False, input_shape=input_shape, weights=None, pooling = 'max')
    elif model_name == "b1":
        base_model = effnet_module.EfficientNetV2B1(include_top=False, input_shape=input_shape, weights=None, pooling = 'max')
    elif model_name == "b2":
        base_model = effnet_module.EfficientNetV2B2(include_top=False, input_shape=input_shape, weights=None, pooling = 'max')
    elif model_name == "b3":
        base_model = effnet_module.EfficientNetV2B3(include_top=False, input_shape=input_shape, weights=None, pooling = 'max')
    model = tf.keras.Sequential(
        [
            base_model,
            tf.keras.layers.Dense(num_classes),
            tf.keras.layers.Activation('softmax', dtype='float32') # to work correctly with mixed precision
        ]
    )
    return model

In [None]:
def train(config=None):
    with wandb.init(project="master-thesis", job_type="sweep", config=config, settings=wandb.Settings(start_method="thread")) as run:
        MODEL_NAME = run.name
        split_paths = load_data(run=run)
        # hyperparameters
        opt_name = wandb.config.optimizer
        lr = wandb.config.learning_rate
        bs = wandb.config.batch_size
        epochs = wandb.config.epochs

        ds_train, ds_test, ds_val = load_data(run)

        num_classes = get_number_of_classes(ds_val)
        ds_train = preprocess_dataset(ds_train, batch_size=bs)
        ds_val = preprocess_dataset(ds_val, batch_size=bs)
        ds_test = preprocess_dataset(ds_test, batch_size=bs, cache=False)
        
        model = get_efficientnet_model(input_shape=[32, 32, 1], num_classes=num_classes, model_name=efficientnet_name, efficientnet_version=efficientnet_version)

        opt = tf.keras.optimizers.get({
            'class_name': wandb.config.optimizer,
            'config': {
                'learning_rate': lr,
                'momentum': momentum
            }
        })

        model.compile(
            optimizer=opt,
            loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
            metrics=["accuracy"],
        )

        wandb_callback = wandb.keras.WandbCallback(
            save_model=False,
            compute_flops=True,
        )


        history = model.fit(
            ds_train,
            epochs=epochs,
            validation_data=ds_val,
            callbacks=[wandb_callback],
        )

In [None]:
# define sweep parameters
sweep_config = {
    "method": "random",
    "metric": {
        "goal": "maximize", 
        "name": "val_accuracy"
    },
    "parameters": {
        "batch_size": {
            "values": [32, 64, 128, 256, 512]
        },
        "epochs": {"value": 60},
        "learning_rate": {
            "values": [1e-2, 1e-3, 1e-4, 1e-5]
        },
        "optimizer": {"values": ["adam", "sgd", "adagrad", "adadelta", "rmsprop", "nadam"]},
    },
}

# launch sweep controller
sweep_id = wandb.sweep(sweep_config, project="master-thesis")

In [None]:
wandb.agent(sweep_id, train, count=20)