# Weights & Biases

[![Open in Colab](https://lab.aef.me/files/assets/colab-badge.svg)](https://colab.research.google.com/github/adamelliotfields/lab/blob/main/files/wandb.ipynb)
[![Open in Kaggle](https://lab.aef.me/files/assets/open-in-kaggle.svg)](https://kaggle.com/kernels/welcome?src=https://github.com/adamelliotfields/lab/blob/main/files/wandb.ipynb)
[![Render nbviewer](https://lab.aef.me/files/assets/nbviewer_badge.svg)](https://nbviewer.org/github/adamelliotfields/lab/blob/main/files/wandb.ipynb)

Experiment tracking with [W&B](https://wandb.ai).

This notebook starts with a couple Scikit-learn estimators to demonstrate the basics followed by a couple Keras models to demonstrate model checkpointing and hyperparameter sweeps.

**Resources**

* [Alerts](https://docs.wandb.ai/guides/runs/alert)
* [Environment variables](https://docs.wandb.ai/guides/track/environment-variables)
* Artifacts:
  - [TTL](https://docs.wandb.ai/guides/artifacts/ttl)
  - [Webhooks](https://docs.wandb.ai/guides/artifacts/project-scoped-automations)
* Integrations:
  - [🤗 Transformers](https://docs.wandb.ai/guides/integrations/huggingface)
  - [🤗 Diffusers](https://docs.wandb.ai/guides/integrations/diffusers)
  - [Keras](https://docs.wandb.ai/guides/integrations/keras)
  - [TensorBoard](https://docs.wandb.ai/guides/integrations/tensorboard)
  - [Lightning](https://docs.wandb.ai/guides/integrations/lightning)
  - [LightGBM](https://docs.wandb.ai/guides/integrations/lightgbm)
  - [Sklearn](https://docs.wandb.ai/guides/integrations/scikit)
  - [OpenAI](https://docs.wandb.ai/guides/integrations/openai-api)

In [None]:
%pip install -q wandb

In [None]:
import os

os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
os.environ["KERAS_BACKEND"] = "tensorflow"

try:
    from google.colab import userdata

    # disable saving notebook if scratchpad
    # os.environ["WANDB_DISABLE_CODE"] = "true"

    os.environ["WANDB_DISABLE_GIT"] = "true"
    os.environ["WANDB_API_KEY"] = userdata.get("WANDB_API_KEY")
    os.environ["TFDS_DATA_DIR"] = "/content/drive/MyDrive/tensorflow_datasets"
except ImportError:
    pass

# can also use `wandb.login` for interactive login
assert os.environ.get("WANDB_API_KEY"), "missing WANDB_API_KEY"

In [None]:
import io
import wandb
import subprocess

import numpy as np
import pandas as pd
import tensorflow as tf
import plotly.express as px
import matplotlib.pyplot as plt
import tensorflow_datasets as tfds

from PIL import Image as PILImage
from yellowbrick.classifier import ConfusionMatrix

from wandb.sklearn import plot_precision_recall, plot_feature_importances
from wandb.sklearn import plot_class_proportions, plot_learning_curve, plot_roc
from wandb.integration.keras import WandbMetricsLogger, WandbModelCheckpoint

from keras import (
    Input,
    Model,
    Sequential,
    initializers,
    layers,
    losses,
    optimizers,
)

from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.datasets import load_diabetes, load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor, RandomForestClassifier
from sklearn.metrics import (
    mean_absolute_percentage_error,
    mean_squared_error,
    mean_absolute_error,
    r2_score,
)

In [None]:
# @title Config
WANDB_ENTITY = "adamelliotfields"  # @param {type:"string"}
WANDB_PROJECT = "test"  # @param {type:"string"}

## Iris Classification

In [None]:
iris = load_iris()

X_iris, y_iris = iris.data, iris.target
X_iris_train, X_iris_test, y_iris_train, y_iris_test = train_test_split(
    X_iris,
    y_iris,
    test_size=0.2,
    random_state=42,
)

iris_df = pd.DataFrame(data=np.c_[X_iris, y_iris], columns=iris.feature_names + ["target"])
iris_df.target = pd.Categorical.from_codes(y_iris, iris.target_names)

In [None]:
fig = px.scatter(
    iris_df,
    color="target",
    trendline="ols",
    marginal_x="box",
    marginal_y="violin",
    x="sepal width (cm)",
    y="sepal length (cm)",
)
fig.show()

In [None]:
# classifier = DecisionTreeClassifier()
# classifier = RandomForestClassifier(n_estimators=300, min_samples_split=5, min_samples_leaf=2, random_state=42)
classifier = KNeighborsClassifier(n_neighbors=3)
classifier.fit(X_iris_train, y_iris_train)
y_probas = classifier.predict_proba(X_iris_test)

In [None]:
# yellowbrick confusion matrix
cm = ConfusionMatrix(classifier, classes=iris.target_names, cmap="Blues", is_fitted=True)
cm.fit(X_iris_train, y_iris_train)
cm.score(X_iris_test, y_iris_test)

# save as PIL image
buf = io.BytesIO()
plt.savefig(buf, format="png")
plt.show()
buf.seek(0)
img = PILImage.open(buf)

In [None]:
# returns a run instance, which can also be accessed on `wandb.run`
wandb.init(
    group="iris",
    tags=["CPU"],
    job_type="train",
    entity=WANDB_ENTITY,
    project=WANDB_PROJECT,
    notes="KNN classifier",
    config=classifier.get_params(),
)

# log additional information
wandb.config.update(
    {
        "test_size": 0.2,
        "model": "KNeighborsClassifier",
    }
)

# renders an interactive Plotly figure (in the dashboard)
# wandb.log is shorthand for wandb.run.log
wandb.log({"Plotly": wandb.Plotly(fig)})

# renders a static image
wandb.log({"Confusion Matrix": wandb.Image(img)})

# create a dataset artifact and additionally attach the raw CSV
iris_df.to_csv("iris.csv", index=False)
iris_table = wandb.Table(dataframe=iris_df)
iris_artifact = wandb.Artifact("data", type="dataset")
iris_artifact.add(iris_table, "table")
iris_artifact.add_file("iris.csv")
wandb.log({"data": iris_table})
wandb.log_artifact(iris_artifact)

# built-in wandb plots for scikit-learn
plot_class_proportions(y_iris_train, y_iris_test, iris.target_names)
plot_learning_curve(classifier, X_iris_train, y_iris_train, random_state=42)
plot_roc(y_iris_test, y_probas, iris.target_names)
plot_precision_recall(y_iris_test, y_probas, iris.target_names)
# plot_feature_importances(classifier, iris.feature_names)  # only for trees

# must call finish in a notebook (if not using context)
wandb.finish()

## Diabetes Regression

In [None]:
diabetes = load_diabetes()

X_diabetes, y_diabetes = diabetes.data, diabetes.target
X_diabetes_train, X_diabetes_test, y_diabetes_train, y_diabetes_test = train_test_split(
    X_diabetes,
    y_diabetes,
    random_state=42,
)

diabetes_df = pd.DataFrame(
    data=np.c_[X_diabetes, y_diabetes],
    columns=diabetes.feature_names + ["target"],
)

In [None]:
regressor = GradientBoostingRegressor(
    max_depth=2,
    subsample=0.9,
    random_state=42,
    n_estimators=100,
    min_samples_leaf=2,
    min_samples_split=10,
)

regressor.fit(X_diabetes_train, y_diabetes_train)
y_pred = regressor.predict(X_diabetes_test)

# logging these will automatically plot them
r2 = r2_score(y_diabetes_test, y_pred)
mse = mean_squared_error(y_diabetes_test, y_pred)
mae = mean_absolute_error(y_diabetes_test, y_pred)
mape = mean_absolute_percentage_error(y_diabetes_test, y_pred)

In [None]:
# use a context manager so you don't need to call `finish`
with wandb.init(
    tags=["CPU"],
    job_type="train",
    group="diabetes",
    entity=WANDB_ENTITY,
    project=WANDB_PROJECT,
    config=regressor.get_params(),
    notes="GradientBoostingRegressor",
) as run:
    wandb.config.update(
        {
            "test_size": 0.2,
            "model": "GradientBoostingRegressor",
        }
    )

    # use a slash to group
    run.log({"metrics/R2": r2, "metrics/MSE": mse, "metrics/MAE": mae, "metrics/MAPE": mape})

## MNIST Image Classification

In [None]:
# @title Config
SEED = 42
EPOCHS = 10
VERBOSE = 1
DROPOUT = 0.1
MAX_FILTERS = 8
BATCH_SIZE = 128
LEARNING_RATE = 0.0015
ACTIVATION = "leaky_relu"

In [None]:
# @title CNN
def get_cnn(seed=42, classes=10, dropout=0.1, max_filters=8, activation="relu"):
    x_input = Input(shape=(28, 28, 1), name="input")
    x = layers.Conv2D(
        max_filters // 4,
        3,
        name="conv1",
        padding="same",
        activation=activation,
    )(x_input)
    x = layers.MaxPooling2D(2, name="pool1")(x)

    x = layers.Conv2D(
        max_filters // 2,
        3,
        name="conv2",
        padding="same",
        activation=activation,
    )(x)
    x = layers.MaxPooling2D(2, name="pool2")(x)

    x = layers.Conv2D(
        max_filters,
        3,
        name="conv3",
        padding="same",
        activation=activation,
    )(x)

    x = layers.Flatten(name="flatten")(x)
    x = layers.Dropout(dropout, name="dropout")(x)
    x = layers.Dense(
        classes,
        name="output",
        activation="softmax" if classes > 2 else "sigmoid",
        kernel_initializer=initializers.RandomNormal(mean=0.0, stddev=0.01, seed=seed),
    )(x)

    return Model(x_input, outputs=x, name="CNN")

In [None]:
(mnist_train, mnist_test), mnist_info = tfds.load(
    "mnist",
    with_info=True,
    as_supervised=True,
    split=["train", "test"],
)

# fmt: off
X_train = mnist_train.take(55000).shuffle(seed=SEED, buffer_size=mnist_train.cardinality()).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
X_val = mnist_train.skip(55000).take(5000).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
X_test = mnist_test.take(10000).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
# fmt: on

In [None]:
# fmt: off
augment = Sequential([layers.Rescaling(scale=2.0 / 255, offset=-1)], name="augment")
# fmt: on

cnn = get_cnn(seed=SEED, dropout=DROPOUT, activation=ACTIVATION, max_filters=MAX_FILTERS)

x_input = Input(shape=(28, 28, 1), name="input")
x = augment(x_input)
x = cnn(x)

model = Model(inputs=x_input, outputs=x, name="CNN-MNIST")
model.compile(
    metrics=["accuracy"],
    loss=losses.SparseCategoricalCrossentropy(),
    optimizer=optimizers.Adam(learning_rate=LEARNING_RATE),
)
model.summary()

In [None]:
with wandb.init(
    tags=["T4"],
    group="mnist",
    job_type="train",
    entity=WANDB_ENTITY,
    project=WANDB_PROJECT,
    notes="Run without hyperparameter sweep",
    config={"activation": ACTIVATION, "dropout": DROPOUT, "learning_rate": LEARNING_RATE},
) as run:
    wandb.config.update(
        {
            "epochs": EPOCHS,
            "optimizer": "Adam",
            "model": "CNNClassifier",
            "batch_size": BATCH_SIZE,
        }
    )

    model.fit(
        X_train,
        epochs=EPOCHS,
        verbose=VERBOSE,
        validation_data=X_val,
        callbacks=[
            WandbMetricsLogger(log_freq="epoch"),
            # creates {username}/{project}/run_{id}_model:v{epoch}
            WandbModelCheckpoint(
                "cnn-mnist-v{epoch}.model.keras",
                verbose=VERBOSE,
                monitor="accuracy",
                save_best_only=True,
            ),
        ],
    )

    # also upload artifact manually
    # creates {username}/{project}/cnn-mnist:v0
    model.save("cnn-mnist.model.keras")
    artifact = wandb.Artifact("cnn-mnist", type="model")
    artifact.add_file("cnn-mnist.model.keras")
    run.log_artifact(artifact)

### Sweep

The sweep controller runs on W&B's cloud; the agent runs on your machine and gets parameters from the controller. Each sweep has an ID, and you can provide that ID to agents on multiple machines to parallelize the sweep.

You can also create a sweep config from an [existing project](https://docs.wandb.ai/guides/sweeps/existing-project) using hyperparameters you've already logged.

In [None]:
# @title Config
SEED = 42
EPOCHS = 10
VERBOSE = 1
BATCH_SIZE = 128
HIDDEN_LAYERS = 3
HIDDEN_UNITS = 64

In [None]:
# @title MLP
def get_mlp(seed=42, classes=10, dropout=0.1, activation="relu", hidden_layers=2, hidden_units=32):
    # take 2D as input
    x_input = Input(shape=(28, 28, 1), name="input")
    x = layers.Flatten(name="flatten")(x_input)

    for i in range(hidden_layers):
        x = layers.Dense(
            hidden_units,
            name=f"dense_{i}",
            activation=activation,
        )(x)
        x = layers.Dropout(dropout, name=f"dropout_{i}")(x)

    x = layers.Dense(
        classes,
        name="output",
        activation="softmax" if classes > 2 else "sigmoid",
        kernel_initializer=initializers.RandomNormal(mean=0.0, stddev=0.01, seed=seed),
    )(x)

    return Model(x_input, outputs=x, name="MLP")

In [None]:
# @title Trainer
def train(config=None):
    # project and entity passed to `wandb.sweep` further down
    with wandb.init(
        tags=["CPU"],
        config=config,
        group="mnist",
        job_type="sweep",
        notes="Hyperparameter sweep",
    ):
        wandb.config.update(
            {
                "optimizer": "Adam",
                "model": "MLPClassifier",
                "batch_size": BATCH_SIZE,
                "hidden_units": HIDDEN_UNITS,
                "hidden_layers": HIDDEN_LAYERS,
            }
        )

        augment = Sequential([layers.Rescaling(scale=2.0 / 255, offset=-1)], name="augment")
        mlp = get_mlp(
            seed=SEED,
            hidden_units=HIDDEN_UNITS,
            hidden_layers=HIDDEN_LAYERS,
            dropout=wandb.config.dropout,
            activation=wandb.config.activation,
        )

        x_input = Input(shape=(28, 28, 1), name="input")
        x = augment(x_input)
        x = mlp(x)

        model = Model(inputs=x_input, outputs=x, name="MLP-MNIST")
        model.compile(
            metrics=["accuracy"],
            loss=losses.SparseCategoricalCrossentropy(),
            optimizer=optimizers.Adam(learning_rate=wandb.config.learning_rate),
        )
        model.fit(
            X_train,
            verbose=VERBOSE,
            validation_data=X_val,
            epochs=wandb.config.epochs,
            callbacks=[WandbMetricsLogger(log_freq="epoch")],
        )

In [None]:
# initializes the sweep controller on W&B and returns the ID
sweep_id = wandb.sweep(
    {
        # "method": "random",
        "method": "grid",
        "metric": {
            "name": "epoch/val_accuracy",  # the metric as it appears in the dashboard, not what you pass to `model.compile`
            "goal": "maximize",
        },
        "parameters": {
            "activation": {"values": ["relu", "leaky_relu", "swish", "gelu"]},
            "epochs": {"value": EPOCHS},
            "dropout": {"values": [0.0, 0.2]},
            "learning_rate": {"values": [0.001, 0.002]},
            #   "learning_rate": {
            #       "distribution": "uniform",
            #       "min": 0.001,
            #       "max": 0.1
            #   }
        },
    },
    entity=WANDB_ENTITY,
    project=WANDB_PROJECT,
)

# connect to controller to get parameters to pass to train function
wandb.agent(sweep_id, function=train, count=16)

# use CLI to stop sweep
# can also pause, resume, and cancel
subprocess.run(["wandb", "sweep", "--stop", f"{WANDB_ENTITY}/{WANDB_PROJECT}/{sweep_id}"])