In [2]:
import wandb
import random

# start a new wandb run to track this script
wandb.init(
    # set the wandb project where this run will be logged
    project="my-awesome-project",

    # track hyperparameters and run metadata
    config={
    "learning_rate": 0.02,
    "architecture": "CNN",
    "dataset": "CIFAR-100",
    "epochs": 10,
    }
)

# simulate training
epochs = 10
offset = random.random() / 5
for epoch in range(2, epochs):
    acc = 1 - 2 ** -epoch - random.random() / epoch - offset
    loss = 2 ** -epoch + random.random() / epoch + offset

    # log metrics to wandb
    wandb.log({"acc": acc, "loss": loss})

# [optional] finish the wandb run, necessary in notebooks
wandb.finish()

[34m[1mwandb[0m: Currently logged in as: [33mnohail2000[0m ([33mtalktalk[0m). Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113474100016497, max=1.0…

VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
acc,▁▆▅█▇▇▇█
loss,█▄▂▂▂▂▁▁

0,1
acc,0.8383
loss,0.19657


In [4]:
wandb.login()

True

In [5]:
import random

# Launch 5 simulated experiments
total_runs = 5
for run in range(total_runs):
  # 🐝 1️⃣ Start a new run to track this script
  wandb.init(
      # Set the project where this run will be logged
      project="basic-intro", 
      # We pass a run name (otherwise it’ll be randomly assigned, like sunshine-lollypop-10)
      name=f"experiment_{run}", 
      # Track hyperparameters and run metadata
      config={
      "learning_rate": 0.02,
      "architecture": "CNN",
      "dataset": "CIFAR-100",
      "epochs": 10,
      })
  
  # This simple block simulates a training loop logging metrics
  epochs = 10
  offset = random.random() / 5
  for epoch in range(2, epochs):
      acc = 1 - 2 ** -epoch - random.random() / epoch - offset
      loss = 2 ** -epoch + random.random() / epoch + offset
      
      # 🐝 2️⃣ Log metrics from your script to W&B
      wandb.log({"acc": acc, "loss": loss})
      
  # Mark the run as finished
  wandb.finish()

VBox(children=(Label(value='0.015 MB of 0.015 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
acc,▁▃▃█▇▆▇▇
loss,█▃▂▃▁▃▂▂

0,1
acc,0.76882
loss,0.2391


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113261966642717, max=1.0…

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=0.7650273224043715, max=1.0…

0,1
acc,▁▆▅▆▆█▇▇
loss,█▇▅▂▂▂▁▁

0,1
acc,0.70116
loss,0.22101


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113150600027034, max=1.0…

VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
acc,▁▅█▇▇███
loss,█▂▂▂▁▁▁▁

0,1
acc,0.80287
loss,0.13859


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011114068988859395, max=1.0…

VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
acc,▁▆▇▆▆█▇▇
loss,█▃▂▃▂▃▁▂

0,1
acc,0.90908
loss,0.07966


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113007122185081, max=1.0…

VBox(children=(Label(value='0.008 MB of 0.008 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
acc,▁▅▅▆█▇█▇
loss,▇█▄▂▂▁▃▃

0,1
acc,0.75834
loss,0.25138


In [6]:
#@title
import wandb
import math
import random
import torch, torchvision
import torch.nn as nn
import torchvision.transforms as T

device = "cuda:0" if torch.cuda.is_available() else "cpu"

def get_dataloader(is_train, batch_size, slice=5):
    "Get a training dataloader"
    full_dataset = torchvision.datasets.MNIST(root=".", train=is_train, transform=T.ToTensor(), download=True)
    sub_dataset = torch.utils.data.Subset(full_dataset, indices=range(0, len(full_dataset), slice))
    loader = torch.utils.data.DataLoader(dataset=sub_dataset, 
                                         batch_size=batch_size, 
                                         shuffle=True if is_train else False, 
                                         pin_memory=True, num_workers=2)
    return loader

def get_model(dropout):
    "A simple model"
    model = nn.Sequential(nn.Flatten(),
                         nn.Linear(28*28, 256),
                         nn.BatchNorm1d(256),
                         nn.ReLU(),
                         nn.Dropout(dropout),
                         nn.Linear(256,10)).to(device)
    return model

def validate_model(model, valid_dl, loss_func, log_images=False, batch_idx=0):
    "Compute performance of the model on the validation dataset and log a wandb.Table"
    model.eval()
    val_loss = 0.
    with torch.inference_mode():
        correct = 0
        for i, (images, labels) in enumerate(valid_dl):
            images, labels = images.to(device), labels.to(device)

            # Forward pass ➡
            outputs = model(images)
            val_loss += loss_func(outputs, labels)*labels.size(0)

            # Compute accuracy and accumulate
            _, predicted = torch.max(outputs.data, 1)
            correct += (predicted == labels).sum().item()

            # Log one batch of images to the dashboard, always same batch_idx.
            if i==batch_idx and log_images:
                log_image_table(images, predicted, labels, outputs.softmax(dim=1))
    return val_loss / len(valid_dl.dataset), correct / len(valid_dl.dataset)

def log_image_table(images, predicted, labels, probs):
    "Log a wandb.Table with (img, pred, target, scores)"
    # 🐝 Create a wandb Table to log images, labels and predictions to
    table = wandb.Table(columns=["image", "pred", "target"]+[f"score_{i}" for i in range(10)])
    for img, pred, targ, prob in zip(images.to("cpu"), predicted.to("cpu"), labels.to("cpu"), probs.to("cpu")):
        table.add_data(wandb.Image(img[0].numpy()*255), pred, targ, *prob.numpy())
    wandb.log({"predictions_table":table}, commit=False)

In [6]:
# Start a wandb run
wandb.init(project="pytorch-intro")

# Simulating a model training loop
acc_threshold = 0.3
for training_step in range(1000):

    # Generate a random number for accuracy
    accuracy = round(random.random() + random.random(), 3)
    print(f'Accuracy is: {accuracy}, {acc_threshold}')
    
    # 🐝 Log accuracy to wandb
    wandb.log({"Accuracy": accuracy})

    # 🔔 If the accuracy is below the threshold, fire a W&B Alert and stop the run
    if accuracy <= acc_threshold:
        # 🐝 Send the wandb Alert
        wandb.alert(
            title='Low Accuracy',
            text=f'Accuracy {accuracy} at step {training_step} is below the acceptable theshold, {acc_threshold}',
        )
        print('Alert triggered')
        break

# Mark the run as finished (useful in Jupyter notebooks)
wandb.finish()

Accuracy is: 0.465, 0.3
Accuracy is: 1.471, 0.3
Accuracy is: 1.059, 0.3
Accuracy is: 0.726, 0.3
Accuracy is: 0.805, 0.3
Accuracy is: 1.191, 0.3
Accuracy is: 0.907, 0.3
Accuracy is: 1.581, 0.3
Accuracy is: 1.584, 0.3
Accuracy is: 0.955, 0.3
Accuracy is: 1.065, 0.3
Accuracy is: 1.245, 0.3
Accuracy is: 0.852, 0.3
Accuracy is: 1.396, 0.3
Accuracy is: 1.088, 0.3
Accuracy is: 1.45, 0.3
Accuracy is: 0.463, 0.3
Accuracy is: 1.267, 0.3
Accuracy is: 1.137, 0.3
Accuracy is: 0.963, 0.3
Accuracy is: 0.221, 0.3
Alert triggered


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
Accuracy,▂▇▅▄▄▆▅██▅▅▆▄▇▅▇▂▆▆▅▁

0,1
Accuracy,0.221
