# Neural architecture search using sweep

After training, model is serialized and uploaded to W&B project.

In [2]:
import wandb
import tensorflow as tf
import numpy as np
import pathlib
import shutil
from typing import List

def load_data(run) -> List[tf.data.Dataset]:
    """
    Downloads datasets from a wandb artifact and loads them into a list of tf.data.Datasets.
    """

    artifact_name = f"letters_splits_tfds"
    artifact = run.use_artifact(f"master-thesis/{artifact_name}:latest")
    artifact_dir = pathlib.Path(
        f"./artifacts/{artifact.name.replace(':', '-')}"
    ).resolve()
    if not artifact_dir.exists():
        artifact_dir = artifact.download()
        artifact_dir = pathlib.Path(artifact_dir).resolve()

    # if tf.__version__ minor is less than 10, use
    # tf.data.experimental.load instead of tf.data.Dataset.load

    if int(tf.__version__.split(".")[1]) < 10:
        load_function = tf.data.experimental.load
    else:
        load_function = tf.data.Dataset.load
    
    output_list = []
    for split in ["train", "test", "val"]:
        ds = load_function(str(artifact_dir / split), compression="GZIP")
        output_list.append(ds)
    
    return output_list

def get_number_of_classes(ds: tf.data.Dataset) -> int:
    """
    Returns the number of classes in a dataset.
    """
    labels_iterator= ds.map(lambda x, y: y).as_numpy_iterator()
    labels = np.concatenate(list(labels_iterator))
    return len(np.unique(labels))

def preprocess_dataset(ds: tf.data.Dataset, batch_size: int, cache: bool = True) -> tf.data.Dataset:
    ds = ds.map(lambda x, y: (tf.cast(x, tf.float32) / 255.0, y))  # normalize
    ds = ds.unbatch().batch(batch_size)
    if cache:
        ds = ds.cache().prefetch(buffer_size=tf.data.AUTOTUNE)
    return ds

def calculate_model_size_on_disk(path: str) -> int:
    return pathlib.Path(path).stat().st_size    

def calculate_model_num_parameters(model: tf.keras.Model) -> int:
    return model.count_params()

def calculate_model_flops(model: tf.keras.Model) -> str:
    pass

In [3]:
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
print("Available devices: ", tf.config.list_physical_devices())

Num GPUs Available:  0
Available devices:  [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]


In [None]:
# Set mixed precision policy for faster training
# tf.keras.mixed_precision.set_global_policy('mixed_float16')

In [None]:
def get_model(
    input_shape: tuple,
    num_classes: int,
    num_conv_layers: int,
    num_filters: int,
    kernel_size: int,
    pool_size: int,
    dropout_rate: float,
    dense_layers: List[int],
) -> tf.keras.Model:
    """
    Creates a CNN model with the given parameters.
    """
    inputs = tf.keras.Input(shape=input_shape)
    x = inputs

    for i in range(num_conv_layers):
        x = tf.keras.layers.Conv2D(
            num_filters, kernel_size, padding="same", activation="relu"
        )(x)
        x = tf.keras.layers.MaxPooling2D(pool_size)(x)
        x = tf.keras.layers.Dropout(dropout_rate)(x)
    x = tf.keras.layers.Flatten()(x)

    for num_units in dense_layers:
        x = tf.keras.layers.Dense(num_units, activation="relu")(x)
        x = tf.keras.layers.Dropout(dropout_rate)(x)

    x = tf.keras.layers.Dense(num_classes, activation="softmax")(x)
    tf.keras.layers.Activation('softmax', dtype='float32') # to work correctly with mixed precision

    return tf.keras.Model(inputs=inputs, outputs=x)


In [None]:
def train(config=None):
    with wandb.init(project="master-thesis", job_type="sweep", config=config, settings=wandb.Settings(start_method="thread")) as run:
        MODEL_NAME = run.name
        # hyperparameters
        opt_name = wandb.config.optimizer
        lr = wandb.config.learning_rate
        bs = wandb.config.batch_size
        epochs = wandb.config.epochs

        ds_train, ds_test, ds_val = load_data(run)

        num_classes = get_number_of_classes(ds_val)
        ds_train = preprocess_dataset(ds_train, batch_size=bs)
        ds_val = preprocess_dataset(ds_val, batch_size=bs)
        ds_test = preprocess_dataset(ds_test, batch_size=bs, cache=False)

        dense_layer_1 = wandb.config.dense_layer_1
        dense_layer_2 = wandb.config.dense_layer_2
        dense_layer_3 = wandb.config.dense_layer_3
        dense_layer_4 = wandb.config.dense_layer_4

        dense_layers = []

        if dense_layer_1 != 0:
            dense_layers.append(dense_layer_1)
        if dense_layer_2 != 0:
            dense_layers.append(dense_layer_2)
        if dense_layer_3 != 0:
            dense_layers.append(dense_layer_3)
        if dense_layer_4 != 0:
            dense_layers.append(dense_layer_4)

        model = get_model(
            input_shape=(28, 28, 1),
            num_classes=num_classes,
            num_conv_layers=wandb.config.num_conv_layers,
            num_filters=wandb.config.num_filters,
            kernel_size=wandb.config.kernel_size,
            pool_size=wandb.config.pool_size,
            dropout_rate=wandb.config.dropout_rate,
            dense_layers=dense_layers,
        )

        opt = tf.keras.optimizers.get({
            'class_name': wandb.config.optimizer,
            'config': {
                'learning_rate': wandb.config.learning_rate,
                'beta_1': wandb.config.beta1,
                'beta_2': wandb.config.beta2,
                'epsilon': wandb.config.epsilon
            }
        })

        model.compile(
            optimizer=opt,
            loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
            metrics=["accuracy"],
        )

        # save the best model
        checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
            filepath=f"./artifacts/{MODEL_NAME}.h5",
            save_weights_only=False,
            monitor="val_accuracy",
            mode="max",
            save_best_only=True,
        )

        wandb_callback = wandb.keras.WandbCallback(
            save_model=False,
            compute_flops=True,
        )
        early_stop = tf.keras.callbacks.EarlyStopping(
            monitor="val_accuracy", patience=5
        )

        model.fit(
            ds_train,
            epochs=epochs,
            validation_data=ds_val,
            callbacks=[wandb_callback, checkpoint_callback, early_stop],
        )

        # calculate model size on disk, flops and number of parameters
        flops = wandb.run.summary["GFLOPS"]
        disk_size = calculate_model_size_on_disk(f"./artifacts/{MODEL_NAME}.h5")
        num_parameters = calculate_model_num_parameters(model)

        # evaluate model on ds_test and log to wandb
        test_loss, test_acc = model.evaluate(ds_test)

        wandb.log({
            "test loss": test_loss, 
            "test accuracy": test_acc, 
            "number of parameters": num_parameters,
            "disk size": disk_size, 
            "model flops": flops
            })

In [None]:
# define sweep parameters
sweep_config = {
    "method": "bayes",
    "metric": {
        "goal": "maximize", 
        "name": "val_accuracy"
    },
    "early_terminate": {
        "type": "hyperband",
        "min_iter": 3
    },
    "parameters": {
        "batch_size": {
            "values": [32, 64, 128, 256]
        },
        "epochs": {"value": 20},
        "learning_rate": {
            "values": [1e-2, 1e-3, 1e-4]
        },
        "optimizer": {"value": "adam"},
        "beta1": { "value": 0.9 },
        "beta2": { "value": 0.999 },
        "epsilon": { "value": 0.1 },

        "num_conv_layers": {
            "values": [1, 2, 3, 4, 5]
        },
        "num_filters": {
            "values": [32, 64, 128, 256, 512]
        },
        "kernel_size": {
            "value": 3
        },
        "pool_size": {
            "values": [2, 3, 4]
        },
        "dropout_rate": {
            "values": [0.0, 0.1, 0.2, 0.3, 0.4, 0.5]
        },
        "dense_layer_1": {
            "values": [32, 64, 128, 256, 512],
        },
        "dense_layer_2": {
            "values": [64, 128, 256, 512, 1024],
        },
        "dense_layer_3": {
            "values": [128, 256, 512, 1024, 2048, 2048*2],
        },
        "dense_layer_4": {
            "values": [0, 256, 512, 1024, 2048],
        },
    },
}


# launch sweep controller
sweep_id = wandb.sweep(sweep_config, project="master-thesis")

In [None]:
wandb.agent(sweep_id, train, count=15)