# Baseline network + hyperparameter sweep

This notebook trains the baseline network with exact same architecture as the one in paper [Recognition of handwritten Latin characters with diacritics using CNN](https://journals.pan.pl/dlibra/publication/136210/edition/119099/content/bulletin-of-the-polish-academy-of-sciences-technical-sciences-recognition-of-handwritten-latin-characters-with-diacritics-using-cnn-lukasik-edyta-charytanowicz-malgorzata-milosz-marek-tokovarov-michail-kaczorowska-monika-czerwinski-dariusz-zientarski-tomasz-2021-69-no-1?language=en)

Model architecture description:

- Input layer (32x32 grayscale image) 

- feature extraction
    - Conv2d (padding = 1, kernel = (3,3), stride = 1, activation = relu) 
    - MaxPool layer ( kernel=(2,2), stride=2) 
    - Conv2d layer (padding = 1, kernel = (3,3), stride = 1, activation = relu) 
    - MaxPool layer ( kernel=(2,2), stride=2) 

- dense layers
    - Dense(5376)
    - Dense(256)
    - Dense( number of classes - here in paper 89 classes)




After training, model is serialized and uploaded to W&B project.

In [2]:
import wandb
import tensorflow as tf
import numpy as np
import pathlib
import shutil
from typing import List

def load_data(run) -> List[tf.data.Dataset]:
    """
    Downloads datasets from a wandb artifact and loads them into a list of tf.data.Datasets.
    """

    artifact_name = f"letters_splits_tfds"
    artifact = run.use_artifact(f"master-thesis/{artifact_name}:latest")
    artifact_dir = pathlib.Path(
        f"./artifacts/{artifact.name.replace(':', '-')}"
    ).resolve()
    if not artifact_dir.exists():
        artifact_dir = artifact.download()
        artifact_dir = pathlib.Path(artifact_dir).resolve()

    # if tf.__version__ minor is less than 10, use
    # tf.data.experimental.load instead of tf.data.Dataset.load

    if int(tf.__version__.split(".")[1]) < 10:
        load_function = tf.data.experimental.load
    else:
        load_function = tf.data.Dataset.load
    
    output_list = []
    for split in ["train", "test", "val"]:
        ds = load_function(str(artifact_dir / split), compression="GZIP")
        output_list.append(ds)
    
    return output_list

def get_number_of_classes(ds: tf.data.Dataset) -> int:
    """
    Returns the number of classes in a dataset.
    """
    labels_iterator= ds.map(lambda x, y: y).as_numpy_iterator()
    labels = np.concatenate(list(labels_iterator))
    return len(np.unique(labels))

def preprocess_dataset(ds: tf.data.Dataset, batch_size: int, cache: bool = True) -> tf.data.Dataset:
    ds = ds.map(lambda x, y: (tf.cast(x, tf.float32) / 255.0, y))  # normalize
    ds = ds.unbatch().batch(batch_size)
    if cache:
        ds = ds.cache().prefetch(buffer_size=tf.data.AUTOTUNE)
    return ds

def calculate_model_size_on_disk(path: str) -> int:
    return pathlib.Path(path).stat().st_size    

def calculate_model_num_parameters(model: tf.keras.Model) -> int:
    return model.count_params()

def calculate_model_flops(summary) -> float:
    # from run.summary get GFLOPs or GFLOPS whichever is available
    if "GFLOPs" in summary.keys():
        return summary.get("GFLOPs")
    elif "GFLOPS" in summary.keys():
        return summary.get("GFLOPS")

In [3]:
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
print("Available devices: ", tf.config.list_physical_devices())

Num GPUs Available:  0
Available devices:  [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]


In [None]:
# Set mixed precision policy for faster training
# tf.keras.mixed_precision.set_global_policy('mixed_float16')

In [None]:
def train(config=None):
    with wandb.init(project="master-thesis", job_type="sweep", config=config, settings=wandb.Settings(start_method="thread")) as run:
        MODEL_NAME = run.name
        # hyperparameters
        opt_name = wandb.config.optimizer
        lr = wandb.config.learning_rate
        bs = wandb.config.batch_size
        epochs = wandb.config.epochs

        ds_train, ds_test, ds_val = load_data(run)

        num_classes = get_number_of_classes(ds_val)
        ds_train = preprocess_dataset(ds_train, batch_size=bs)
        ds_val = preprocess_dataset(ds_val, batch_size=bs)
        ds_test = preprocess_dataset(ds_test, batch_size=bs, cache=False)

        filters = wandb.config.filters
        padding = wandb.config.padding #"same" # or "valid"
        max_pool = wandb.config.max_pool #False # or True
        last_activation = wandb.config.last_activation # "softmax" # or None

        model = tf.keras.Sequential([tf.keras.layers.InputLayer(input_shape=(32, 32, 1))])
        
        for filter in filters:
            model.add(tf.keras.layers.Conv2D(filter, kernel_size=(3, 3), activation="relu", padding=padding))
            if max_pool:
                model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))

        tf.keras.layers.Conv2D(num_classes, kernel_size=(3, 3), activation=last_activation),
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Activation("softmax")


        model.compile(
            optimizer="adam",
            loss="sparse_categorical_crossentropy",
            metrics=["accuracy"],
        )

        wandb_callback = wandb.keras.WandbCallback(
            save_model=False,
            compute_flops=True,
        )

        model.fit(
            ds_train,
            epochs=epochs,
            validation_data=ds_val,
            callbacks=[wandb_callback],
        )

In [None]:
# define sweep parameters
sweep_config = {
    "method": "random",
    "metric": {
        "goal": "maximize", 
        "name": "accuracy"
    },
    "parameters": {
        "epochs": {"value": 3},
        "filters": {"values": [[8, 128], [16, 256], [32, 512],  [8, 128, 256], [16, 256, 512], [32, 512, 1024], [1024, 2048]]},
        "padding": {"values": ["same", "valid"]},
        "last_activation": {"values": ["softmax", None]},
        "max_pool": {"values": [True, False]},
        
    },
}


# launch sweep controller
sweep_id = wandb.sweep(sweep_config, project="master-thesis")

In [None]:
wandb.agent(sweep_id, train, count=15)