<a href="https://colab.research.google.com/github/albertaillet/KTH-EQ2425/blob/master/project3/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%pip install wandb --quiet # if using colab

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical, set_random_seed
from tensorflow.random import set_seed

# for type hinting
from typing import List
from numpy import ndarray

In [None]:
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [None]:
import wandb
from wandb.keras import WandbCallback

wandb.login()

In [None]:
def create_config(
    input_shape: tuple=(32, 32, 3),
    num_classes: int=10,
    activation: str="relu",
    output_activation: str='softmax',
    conv_kernel_sizes: List[tuple]=[(5, 5), (3, 3), (3, 3)],
    conv_strides: List[int]=[1, 1, 1],
    conv_filters: List[int]=[24, 48, 96],
    conv_activate: List[bool]=[True, True, False],
    pool_kernel_sizes: List[tuple]=[(2, 2), (2, 2), (2, 2)],
    pool_strides: List[int]=[2, 2, 2],
    fully_connected_sizes: List[int]=[512],
    dropout: bool=False,
    dropout_rate: float=0.3,
    batch_normalization: bool=False,
    batch_size: int=64,
    learning_rate: float=1e-3,
    model_name: str="cifar10_model",
    data_shuffling: bool=False,
    epochs: int=300,
    optimizer: str="sgd",
    monitor: str="val_loss",
    patience: int=15,
    seed: int=1,
    sweep: bool=False,
) -> dict:
    """
    Creates a dictionary of hyperparameters for the model.
    :param input_shape: shape of the input images
    :param num_classes: number of classes in the dataset
    :param activation: activation function to use on the convolutional layers and the fully connected layers
    :param output_activation: activation function to use on the output layer
    :param conv_kernel_sizes: list of kernel sizes for the convolutional layers
    :param conv_strides: list of strides for the convolutional layers
    :param conv_filters: list of number of filters for the convolutional layers
    :param conv_activate: list of booleans indicating whether to use the activation function on the convolutional layers
    :param pool_kernel_sizes: list of kernel sizes for the pooling layers
    :param pool_strides: list of strides for the pooling layers
    :param fully_connected_sizes: list of sizes for the fully connected layers
    :param dropout: whether to use dropout
    :param dropout_rate: dropout rate
    :param batch_normalization: whether to use batch normalization
    :param batch_size: batch size
    :param learning_rate: learning rate
    :param model_name: name of the model
    :param data_shuffling: whether to shuffle the data
    :param epochs: number of epochs
    :param optimizer: optimizer to use
    :param monitor: metrics to monitor
    :param patience: patience for early stopping
    :param seed: seed for the random number generators
    :param sweep: whether to create a sweep config or a normal config
    :return: config of hyperparameters or a sweep config
    """
    if sweep:
        del sweep
        return {
            k:(
                v if isinstance(v, dict) else {'value': v}
            ) 
            for k,v in locals().items()
        }
    else:
        del sweep
        return locals()

def create_and_train_model(
    input_shape: tuple,
    num_classes: int,
    activation: str,
    output_activation: str,
    conv_kernel_sizes: List[tuple],
    conv_strides: List[int],
    conv_filters: List[int],
    conv_activate: List[bool],
    pool_kernel_sizes: List[tuple],
    pool_strides: List[int],
    fully_connected_sizes: List[int],
    dropout: bool,
    dropout_rate: float,
    batch_normalization: bool,
    batch_size: int,
    learning_rate: float,
    model_name: str,
    optimizer: str,
    seed: int,
    **kwargs,
) -> models.Model:
    """
    Creates and trains a model on the CIFAR10 dataset.
    :param input_shape: shape of the input images
    :param num_classes: number of classes in the dataset
    :param activation: activation function to use on the convolutional layers and the fully connected layers
    :param output_activation: activation function to use on the output layer
    :param conv_kernel_sizes: list of kernel sizes for the convolutional layers
    :param conv_strides: list of strides for the convolutional layers
    :param conv_filters: list of number of filters for the convolutional layers
    :param conv_activate: list of booleans indicating whether to use the activation function on the convolutional layers
    :param pool_kernel_sizes: list of kernel sizes for the pooling layers
    :param pool_strides: list of strides for the pooling layers
    :param fully_connected_sizes: list of sizes for the fully connected layers
    :param dropout: whether to use dropout
    :param dropout_rate: dropout rate
    :param batch_normalization: whether to use batch normalization
    :param batch_size: batch size
    :param learning_rate: learning rate
    :param model_name: name of the model
    :param seed: seed for the random number generators
    """
    # set random seed for reproducibility
    set_seed(seed)
    set_random_seed(seed)
    np.random.seed(seed)

    # Create model
    model = models.Sequential(name=model_name)
    model.add(layers.Input(shape=input_shape))

    # Normalize the pixel values to the range of [-0.5, 0.5].
    model.add(layers.Lambda(lambda x: (x / 255.0) - 0.5, name="normalize"))

    names = [[f"conv_{i}", f"pool_{i}"] for i in range(1, len(conv_kernel_sizes)+1)]
    # Convolutional layers.
    for kernel_size, stride, filters, activate, pool_kernel_size, pool_stride, (conv_name, pool_name) in zip(
        conv_kernel_sizes,
        conv_strides,
        conv_filters,
        conv_activate,
        pool_kernel_sizes,
        pool_strides,
        names,
    ):
        # Add convolutional layer
        model.add(
            layers.Conv2D(
                kernel_size=kernel_size,
                strides=stride,
                padding="valid",
                filters=filters,
                name=conv_name,
            )
        )
        if activate:
            # Add activation
            if activation == "leaky_relu":
                model.add(layers.LeakyReLU())
            else:
                model.add(layers.Activation(activation))

            # Add batch normalization
            if batch_normalization:
                model.add(layers.BatchNormalization())
        
        # Add pooling
        model.add(
            layers.MaxPooling2D(
                pool_size=pool_kernel_size, 
                strides=pool_stride, 
                name=pool_name,
            ),
        )

    # Flatten the output of the convolutional layers.
    model.add(layers.Flatten(name="flatten"))

    names = [f"fc_{i}" for i in range(1, len(fully_connected_sizes)+1)]
    # Fully connected layers.
    for size, name in zip(fully_connected_sizes, names):
        model.add(layers.Dense(size, activation=activation, name=name))
        if dropout:
            model.add(layers.Dropout(dropout_rate))
        if batch_normalization:
            model.add(layers.BatchNormalization())
    
    # Output layer
    model.add(layers.Dense(num_classes, activation=output_activation, name=f"fc_{len(fully_connected_sizes)+1}"))
    
    # Compile the model.
    model.compile(
        optimizer=optimizer,
        loss="categorical_crossentropy",
        metrics=["accuracy"],
    )
    
    return model

In [None]:
partial_config = {
    "learning_rate": 1e-3,
    "epochs": 1,
    "batch_size": 64,
    "dropout": False,
    "batch_normalization": False,
    "data_shuffling": False,
    "seed": 1,
    "optimizer": "sgd",
}
full_config = create_config(**partial_config)
model = create_and_train_model(**full_config)
model.summary()

In [None]:
wandb.init(
    project="project3", 
    entity="eq2425_2022p3_aillet_bonato",
    config = full_config,
)

In [None]:
ES = tf.keras.callbacks.EarlyStopping(monitor=full_config['monitor'], patience=full_config['patience'])
model.fit(
    X_train,
    y_train,
    epochs=wandb.config['epochs'],
    batch_size=wandb.config['batch_size'],
    validation_data=(X_test, y_test),
    shuffle=wandb.config['data_shuffling'],
    callbacks=[WandbCallback(), ES],
)


In [None]:
wandb.run.finish()

## Sweeping 

In [None]:
def train_function():
    wandb.init()
    ES = tf.keras.callbacks.EarlyStopping(monitor=wandb.config['monitor'], patience=wandb.config['patience'])    
    
    model = create_and_train_model(**wandb.config)
    
    model.fit(
        X_train,
        y_train,
        epochs=wandb.config['epochs'],
        batch_size=wandb.config['batch_size'],
        validation_data=(X_test, y_test),
        shuffle=wandb.config['data_shuffling'],
        callbacks=[WandbCallback(), ES],
    )

In [None]:
# set up variables you don't want to sweep over, but that will be added to the sweep_config
partial_config = {
    "learning_rate": 1e-3,
    "epochs": 300,
    "batch_size": 64,
    "data_shuffling": False,
    "seed": 1,
    "optimizer": "sgd",
    'conv_filters': [64, 128, 256],
    'fully_connected_sizes': [512],
    'conv_kernel_sizes': [(5, 5), (3, 3), (3, 3)],
    'activation': {
        'values': [
            'relu',
            'leaky_relu',
        ]
    },
    'dropout' : {
         'values': [
             True,
             False,
         ]
    },
    'batch_normalization' : {
        'values': [
            True,
            False,
        ]
    },
}
sweep_config = {
    'name': 'sweep_4_C_D_E',
    'method': 'grid',
    'parameters': create_config(**partial_config, sweep=True)
}

sweep_id = wandb.sweep(sweep_config, project="project3", entity="eq2425_2022p3_aillet_bonato")

In [None]:
# start the sweep
wandb.agent(sweep_id, function=train_function)