<a href="https://colab.research.google.com/github/albertaillet/KTH-EQ2425/blob/master/project3/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%pip install wandb --quiet # if using colab

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical
from tensorflow.random import set_seed

# for type hinting
from typing import List
from numpy import ndarray

In [None]:
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [None]:
import wandb
from wandb.keras import WandbCallback

wandb.login()

In [None]:
def create_and_train_model(
    input_shape: tuple=(32, 32, 3),
    num_classes: int=10,
    activation: str="relu",
    output_activation: str='softmax',
    conv_kernel_sizes: List[tuple]=[(5, 5), (3, 3), (3, 3)],
    conv_strides: List[int]=[1, 1, 1],
    conv_filters: List[int]=[24, 48, 96],
    conv_activate: List[bool]=[True, True, False],
    pool_kernel_sizes: List[tuple]=[(2, 2), (2, 2), (2, 2)],
    pool_strides: List[int]=[2, 2, 2],
    fully_connected_sizes: List[int]=[512],
    dropout: bool=False,
    dropout_rate: float=0.3,
    batch_normalization: bool=False,
    batch_size: int=64,
    learning_rate: float=1e-3,
    model_name: str="cifar10_model",
    data_shuffling: bool=False,
    epochs: int=300,
    seed: int=1,
) -> models.Model:
    """
    Creates and trains a model on the CIFAR10 dataset.
    :param input_shape: shape of the input images
    :param num_classes: number of classes in the dataset
    :param activation: activation function to use on the convolutional layers and the fully connected layers
    :param output_activation: activation function to use on the output layer
    :param conv_kernel_sizes: list of kernel sizes for the convolutional layers
    :param conv_strides: list of strides for the convolutional layers
    :param conv_filters: list of number of filters for the convolutional layers
    :param conv_activate: list of booleans indicating whether to use the activation function on the convolutional layers
    :param pool_kernel_sizes: list of kernel sizes for the pooling layers
    :param pool_strides: list of strides for the pooling layers
    :param fully_connected_sizes: list of sizes for the fully connected layers
    :param dropout: whether to use dropout
    :param dropout_rate: dropout rate
    :param batch_normalization: whether to use batch normalization
    :param batch_size: batch size
    :param learning_rate: learning rate
    :param model_name: name of the model
    :param data_shuffling: whether to shuffle the data
    :param epochs: number of epochs
    :param seed: seed for the random number generators
    """
    # set random seed for reproducibility
    set_seed(seed)
    np.random.seed(seed)

    # Create model
    model = models.Sequential(name=model_name)
    model.add(layers.Input(shape=input_shape))

    # Normalize the pixel values to the range of [-0.5, 0.5].
    model.add(layers.Lambda(lambda x: (x / 255.0) - 0.5, name="normalize"))

    names = [[f"conv_{i}", f"pool_{i}"] for i in range(1, len(conv_kernel_sizes)+1)]
    # Convolutional layers.
    for kernel_size, stride, filters, activate, pool_kernel_size, pool_stride, (conv_name, pool_name)in zip(
        conv_kernel_sizes,
        conv_strides,
        conv_filters,
        conv_activate,
        pool_kernel_sizes,
        pool_strides,
        names,
    ):
        model.add(
            layers.Conv2D(
                kernel_size=kernel_size,
                strides=stride,
                padding="valid",
                filters=filters,
                activation=activation if activate else None,
                name=conv_name,
            )
        )
        if activate and batch_normalization:
            model.add(layers.BatchNormalization())
        model.add(
            layers.MaxPooling2D(
                pool_size=pool_kernel_size, 
                strides=pool_stride, 
                name=pool_name,
            ),
        )

    # Flatten the output of the convolutional layers.
    model.add(layers.Flatten(name="flatten"))

    names = [f"fc_{i}" for i in range(1, len(fully_connected_sizes)+1)]
    # Fully connected layers.
    for size, name in zip(fully_connected_sizes, names):
        model.add(layers.Dense(size, activation=activation, name=name))
        if dropout:
            model.add(layers.Dropout(dropout_rate))
        if batch_normalization:
            model.add(layers.BatchNormalization())
    
    # Output layer
    model.add(layers.Dense(num_classes, activation=output_activation, name=f"fc_{len(fully_connected_sizes)+1}"))
    
    # Compile the model.
    model.compile(
        optimizer="sgd",
        loss="categorical_crossentropy",
        metrics=["accuracy"],
    )
    
    return model

In [None]:
wandb.init(
    project="project3", 
    entity="eq2425_2022p3_aillet_bonato",
    config = {
        "learning_rate": 1e-3,
        "epochs": 300,
        "batch_size": 64,
        "dropout": False,
        "batch_normalization": False,
        "data_shuffling": False,
        "seed": 1,
        # "conv_filters": [64,128,256]
    }
)

In [None]:
model = create_and_train_model(**wandb.config)
model.summary()

In [None]:
model.fit(
    X_train,
    y_train,
    epochs=wandb.config['epochs'],
    batch_size=wandb.config['batch_size'],
    validation_data=(X_test, y_test),
    shuffle=wandb.config['data_shuffling'],
    callbacks=[WandbCallback()],
)

In [None]:
wandb.run.finish()