In [1]:
from dotenv import load_dotenv
import random
import wandb
from wandb.sklearn import plot_precision_recall, plot_feature_importances
from wandb.sklearn import plot_class_proportions, plot_learning_curve, plot_roc

import numpy as np
from sklearn import datasets
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
load_dotenv()

True

In [None]:
    def train_pytorch(config=None):
    # Initialize a new wandb run
    with wandb.init(config=config) as run:
        # If called by wandb.agent, as below,
        # this config will be set by Sweep Controller
        config = run.config

        loader = build_dataset(config.batch_size)
        network = build_network(config.fc_layer_size, config.dropout)
        optimizer = build_optimizer(network, config.optimizer, config.learning_rate)

        for epoch in range(config.epochs):
            avg_loss = train_epoch(network, loader, optimizer)
            run.log({"loss": avg_loss, "epoch": epoch})


def build_dataset(batch_size):
    transform = transforms.Compose(
        [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
    )
    # download MNIST training dataset
    dataset = datasets.MNIST(".", train=True, download=True, transform=transform)
    sub_dataset = torch.utils.data.Subset(
        dataset, indices=range(0, len(dataset), 5)
    )
    loader = torch.utils.data.DataLoader(sub_dataset, batch_size=batch_size)

    return loader


def build_network(fc_layer_size, dropout):
    network = nn.Sequential(  # fully connected, single hidden layer
        nn.Flatten(),
        nn.Linear(784, fc_layer_size),
        nn.ReLU(),
        nn.Dropout(dropout),
        nn.Linear(fc_layer_size, 10),
        nn.LogSoftmax(dim=1),
    )

    return network.to(device)


def build_optimizer(network, optimizer, learning_rate):
    if optimizer == "sgd":
        optimizer = optim.SGD(network.parameters(), lr=learning_rate, momentum=0.9)
    elif optimizer == "adam":
        optimizer = optim.Adam(network.parameters(), lr=learning_rate)
    return optimizer


def train_epoch(network, loader, optimizer):
    cumu_loss = 0

    with wandb.init() as run:
        for _, (data, target) in enumerate(loader):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()

            # ➡ Forward pass
            loss = F.nll_loss(network(data), target)
            cumu_loss += loss.item()

            # ⬅ Backward pass + weight update
            loss.backward()
            optimizer.step()
            batch_loss = loss.item()
            run.log({"batch loss": batch_loss})

    return cumu_loss / len(loader)

In [4]:
import random

import wandb

def train():
    config = {
        "learning_rate": 0.02,
        "architecture": "CNN",
        "dataset": "CIFAR-100",
        "epochs": 10,
    }
    run = wandb.init(
        project="ml-training",
        config=config,
    )

    # Simulate training.
    epochs = 10
    offset = random.random() / 5
    for epoch in range(2, epochs):
        acc = 1 - 2 ** -epoch - random.random() / epoch - offset
        loss = 2 ** -epoch + random.random() / epoch + offset

        metrics = {"acc": acc, "loss": loss}
        run.log(metrics)
        run.finish()

0,1
acc,▁▄▅▆▇▇██
loss,█▄▂▃▁▂▁▁

0,1
acc,0.91899
loss,0.06936


In [2]:
plugins = [
        "Key Promoter X",
        "net.seesharpsoft.intellij.plugins.csv",
        "org.jetbrains.junie",
        "fleet.ai",
        "org.jetbrains.aidebugger",
        "com.intellij.ml.llm",
        "com.codeium.rd",
        "sourcery.pycharm-plugin",
        "com.koxudaxi.pydantic",
        "mobi.hsz.idea.gitignore",
        "com.intellij.mcpServer",
    "he"
      ]
sorted(plugins)

['Key Promoter X',
 'com.codeium.rd',
 'com.intellij.mcpServer',
 'com.intellij.ml.llm',
 'com.koxudaxi.pydantic',
 'fleet.ai',
 'mobi.hsz.idea.gitignore',
 'net.seesharpsoft.intellij.plugins.csv',
 'org.jetbrains.aidebugger',
 'org.jetbrains.junie',
 'sourcery.pycharm-plugin']

In [6]:
# This script needs these libraries to be installed:
#   numpy, sklearn

import wandb
from wandb.sklearn import plot_precision_recall, plot_feature_importances
from wandb.sklearn import plot_class_proportions, plot_learning_curve, plot_roc

import numpy as np
from sklearn import datasets
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split


# load and process data
def sklearn_train():
    wbcd = datasets.load_breast_cancer()
    feature_names = wbcd.feature_names
    labels = wbcd.target_names

    test_size = 0.2
    X_train, X_test, y_train, y_test = train_test_split(wbcd.data, wbcd.target, test_size=test_size)

    # train model
    model = RandomForestClassifier()
    model.fit(X_train, y_train)
    model_params = model.get_params()

    # get predictions
    y_pred = model.predict(X_test)
    y_probas = model.predict_proba(X_test)
    importances = model.feature_importances_
    indices = np.argsort(importances)[::-1]

    # start a new wandb run and add your model hyperparameters
    wandb.init(project='ml-training', config=model_params)

    # Add additional configs to wandb
    update_configs = {"test_size": test_size,
                      "train_len": len(X_train),
                      "test_len": len(X_test), }
    wandb.config.update({"test_size": test_size,
                         "train_len": len(X_train),
                         "test_len": len(X_test)})

    # log additional visualisations to wandb
    plot_class_proportions(y_train, y_test, labels)
    plot_learning_curve(model, X_train, y_train)
    plot_roc(y_test, y_probas, labels)
    plot_precision_recall(y_test, y_probas, labels)
    plot_feature_importances(model)

    # [optional] finish the wandb run, necessary in notebooks
    wandb.finish()


sklearn_train()

In [5]:
# This is secret and shouldn't be checked into version control

KeyError: 'WANDB_MODE'

In [None]:
# Start a run.
#
# When this block exits, it waits for logged data to finish uploading.
# If an exception is raised, the run is marked failed.
with wandb.init(entity="", project="my-project-name") as run:
  # Save mode inputs and hyperparameters.
  run.config.learning_rate = 0.01

  # Run your experiment code.
  for epoch in range(num_epochs):
    # Do some training...

    # Log metrics over time to visualize model performance.
    run.log({"loss": loss})

  # Upload model outputs as artifacts.
  run.log_artifact(model)
