In [1]:
# from google.colab import drive
# drive.mount('/content/drive')
# !cp /content/drive/MyDrive/CA4/*.py .
# !cp -r /content/drive/MyDrive/CA4/conf .

In [2]:
!pip install hydra-core --no-binary :all:
!pip install wandb



In [3]:
%load_ext autoreload
%autoreload 2
from train import run
import os
import os.path as osp
from hydra import initialize, initialize_config_module, initialize_config_dir, compose
from omegaconf import OmegaConf

In [4]:
with initialize(version_base=None, config_path="conf"):
    cfg = compose(config_name="config")
    print(cfg)
    data_root = cfg.data.data_root
    if not osp.exists(data_root):
        os.makedirs(data_root, exist_ok=True)
        data_root_father = osp.dirname(data_root)
        !wget https://www.cis.upenn.edu/~jshi/ped_html/PennFudanPed.zip -O {data_root_father}/data.zip
        !unzip {data_root_father}/data.zip -d {data_root_father}

{'output_dir': 'tmp/output/first', 'wandb': {'project': 'pedestrian-detection', 'name': 'debug'}, 'model': {'in_channels': 3, 'out_channels': 1, 'kernel_size': 3, 'main_channel': 8, 'depth': 3, 'spatial_scale_factor': 2, 'channel_scale_factor': 4}, 'data': {'data_root': 'tmp/data/PennFudanPed'}, 'train': {'batch_size': 8, 'num_epochs': 300, 'num_workers': 4, 'img_size': 128, 'dtype': 'bfloat16', 'device': 'cuda', 'seed': 42, 'optimizer': {'_target_': 'torch.optim.Adam', 'lr': 0.0001}, 'scheduler': {'_target_': 'utils.CosineAnnealingWarmupRestarts', 'total_epochs': '${train.num_epochs}', 'warmup_epochs': None, 'min_lr': None, 'max_lr': '${train.optimizer.lr}'}}}


In [5]:
# wandb
import wandb
wandb.login()


[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mermu2001[0m ([33mermuzzz2001[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

# Define a sweep

In [6]:
sweep_configuration = {
    "name": "sweep-hyperparams",
    "method": "bayes",
    "metric": {"goal": "maximize", "name": "epoch_val_dice_score"},
    "parameters": {
        "learning_rate": {
            "values": [1e-4, 1e-5],

        },
        "batch_size": {"values": [8, 16, 32]},
        "epochs": {"values": [30, 120, 300]},
        "img_size": {
            "values": [64, 128],
        },
    },
}
# sweep_id = wandb.sweep(sweep=sweep_configuration, project="pedestrian-detection")
sweep_id = "bp8sw90v"
print(sweep_id)

bp8sw90v


# Sweep run

In [7]:
from train import run
def sweep_train():
    from hydra import initialize, compose
    wandb.init()
    with initialize(version_base=None, config_path="conf"):
        cfg = compose(config_name="config",
                overrides=[f"train.optimizer.lr={wandb.config.learning_rate}",
                      f"train.batch_size={wandb.config.batch_size}",
                      f"train.num_epochs={wandb.config.epochs}",
                      f"train.img_size={wandb.config.img_size}",
                      f"wandb.name={wandb.run.name}",])


        run(cfg)

In [8]:
wandb.agent(sweep_id, function=sweep_train, project="pedestrian-detection")

[34m[1mwandb[0m: Agent Starting Run: uh2wowlw with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	epochs: 300
[34m[1mwandb[0m: 	img_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001




  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


0,1
epoch_learning_rate,▃▆▇███████▇▇▆▆▆▆▆▆▆▅▅▄▄▄▄▃▃▃▃▃▃▂▂▂▁▁▁▁▁▁
epoch_train_loss,██▅▅▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch_val_dice_loss,█▄▅▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch_val_dice_score,▁▁▅▅▄▇██████████████████████████████████
epoch_val_iou,▁▅▄▅▄▇██████████████████████████████████

0,1
epoch_learning_rate,0.0
epoch_train_loss,0.04848
epoch_val_dice_loss,0.26811
epoch_val_dice_score,0.73189
epoch_val_iou,0.7099


[34m[1mwandb[0m: Agent Starting Run: wee6zyys with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 300
[34m[1mwandb[0m: 	img_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001




  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


0,1
epoch_learning_rate,▃▃▄▄▅▆████████▇▇▇▆▆▆▆▆▅▅▅▅▄▄▄▃▃▂▂▂▂▁▁▁▁▁
epoch_train_loss,█▇▅▄▄▃▃▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch_val_dice_loss,██▇▅▅▄▅▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch_val_dice_score,▁▁▃▄▄▄▄▄▆▆▇▇▇███████████████████████████
epoch_val_iou,▁▂▂▄▄▄▆▇████████████████████████████████

0,1
epoch_learning_rate,0.0
epoch_train_loss,0.05816
epoch_val_dice_loss,0.28033
epoch_val_dice_score,0.71967
epoch_val_iou,0.70569


[34m[1mwandb[0m: Agent Starting Run: t4jra9ff with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	epochs: 300
[34m[1mwandb[0m: 	img_size: 128
[34m[1mwandb[0m: 	learning_rate: 1e-05




  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


0,1
epoch_learning_rate,▁▄▇█████████▇▇▇▇▇▆▆▆▆▅▅▅▅▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁
epoch_train_loss,█▇▇▆▄▄▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch_val_dice_loss,████▃▂▁▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch_val_dice_score,▁▁▆▆▇▇██████████████████████████████████
epoch_val_iou,▁▁▁▂▂▆▇▇▇▇▇▇████████████████████████████

0,1
epoch_learning_rate,0.0
epoch_train_loss,0.65694
epoch_val_dice_loss,0.39666
epoch_val_dice_score,0.60334
epoch_val_iou,0.55268


[34m[1mwandb[0m: Agent Starting Run: b0qh00sl with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 300
[34m[1mwandb[0m: 	img_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001




  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


0,1
epoch_learning_rate,▂▂▄▄█████▇▇▇▆▆▅▅▅▅▅▅▄▄▄▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁
epoch_train_loss,██▇▄▄▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch_val_dice_loss,██▆▇▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch_val_dice_score,▁▁▄▃▄▆▇▅▆███████████████████████████████
epoch_val_iou,▁▁▂▃▄▆▇▇▇█▇█▇███████████████████████████

0,1
epoch_learning_rate,0.0
epoch_train_loss,0.0651
epoch_val_dice_loss,0.2897
epoch_val_dice_score,0.7103
epoch_val_iou,0.70401


[34m[1mwandb[0m: Agent Starting Run: q2k6tyga with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	epochs: 300
[34m[1mwandb[0m: 	img_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001




  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


0,1
epoch_learning_rate,▂▃▅▅▇█████▇▇▇▆▆▆▅▅▅▅▅▅▅▄▄▃▃▃▃▃▃▃▂▂▂▁▁▁▁▁
epoch_train_loss,██▇▇▇▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch_val_dice_loss,█▅▅▅▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch_val_dice_score,▁▇▇█▇█▇██▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇
epoch_val_iou,▁▄▆▅█▇██████████████████████████████████

0,1
epoch_learning_rate,0.0
epoch_train_loss,0.08961
epoch_val_dice_loss,0.36913
epoch_val_dice_score,0.63087
epoch_val_iou,0.67908


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: tslw5gm2 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 300
[34m[1mwandb[0m: 	img_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001




  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


0,1
epoch_learning_rate,▃▄▅▆▆▇█████▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▄▃▃▃▃▃▂▂▁▁▁▁▁
epoch_train_loss,█▇▇▅▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch_val_dice_loss,█▇▆▅█▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch_val_dice_score,▁▃▄▆▇▇██████████████████████████████████
epoch_val_iou,▁▄▄▄▅▇█▇████████████████████████████████

0,1
epoch_learning_rate,0.0
epoch_train_loss,0.04948
epoch_val_dice_loss,0.29242
epoch_val_dice_score,0.70758
epoch_val_iou,0.69514


[34m[1mwandb[0m: Agent Starting Run: mmi5n2hz with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 300
[34m[1mwandb[0m: 	img_size: 128
[34m[1mwandb[0m: 	learning_rate: 1e-05




  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


0,1
epoch_learning_rate,▁▂▅██████▇▇▇▇▇▇▇▆▆▆▆▅▅▅▄▄▃▃▃▃▂▂▂▁▁▁▁▁▁▁▁
epoch_train_loss,██▇▇▇▇▆▆▅▅▃▃▃▂▂▂▂▂▂▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch_val_dice_loss,████▄▄▃▄▃▃▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch_val_dice_score,▁▁▁▁▁▂▅▅▇▇▇█████████████████████████████
epoch_val_iou,▁▁▁▂▂▄▄▃▇▇▇▆▇▇▇▇▇███████████████████████

0,1
epoch_learning_rate,0.0
epoch_train_loss,0.6645
epoch_val_dice_loss,0.40834
epoch_val_dice_score,0.59166
epoch_val_iou,0.53574


[34m[1mwandb[0m: Agent Starting Run: 2o81f1yq with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 300
[34m[1mwandb[0m: 	img_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001




  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


0,1
epoch_learning_rate,▂▃▇███████▇▇▇▇▇▇▇▆▅▅▅▅▅▄▄▄▄▃▃▃▂▂▂▁▁▁▁▁▁▁
epoch_train_loss,█████▇▇▇▆▅▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch_val_dice_loss,█▅▂▃▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch_val_dice_score,▁▃▄▂▃▅▆▇▇▇██████████████████████████████
epoch_val_iou,▁▁▂▂▄▇▇▇████████████████████████████████

0,1
epoch_learning_rate,0.0
epoch_train_loss,0.04905
epoch_val_dice_loss,0.2818
epoch_val_dice_score,0.7182
epoch_val_iou,0.7053


[34m[1mwandb[0m: Agent Starting Run: yr4khg45 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 300
[34m[1mwandb[0m: 	img_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001




  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


0,1
epoch_learning_rate,▃▄███▇▇▇▇▇▆▆▆▆▆▆▆▅▅▅▅▄▄▄▄▃▃▃▃▂▁▁▁▁▁▁▁▁▁▁
epoch_train_loss,█▇▄▄▄▃▃▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch_val_dice_loss,█▆▅▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch_val_dice_score,▁▁▃▁▅▇██████████████████████████████████
epoch_val_iou,▁▁▄▄▄▄▅▅▆▇▇█████████████████████████████

0,1
epoch_learning_rate,0.0
epoch_train_loss,0.05903
epoch_val_dice_loss,0.27113
epoch_val_dice_score,0.72887
epoch_val_iou,0.71052


[34m[1mwandb[0m: Agent Starting Run: sos17d6l with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 300
[34m[1mwandb[0m: 	img_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001




  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


0,1
epoch_learning_rate,▁▅█████▇▇▇▇▇▇▆▆▆▅▅▅▄▄▃▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁
epoch_train_loss,██▆▅▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch_val_dice_loss,█▆▅▆▅▆▅▃▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch_val_dice_score,▁▂▃▇▇███████████████████████████████████
epoch_val_iou,▁▁▄▆▇███████████████████████████████████

0,1
epoch_learning_rate,0.0
epoch_train_loss,0.05252
epoch_val_dice_loss,0.29602
epoch_val_dice_score,0.70398
epoch_val_iou,0.69648


[34m[1mwandb[0m: Agent Starting Run: 7p3qhxcl with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	epochs: 300
[34m[1mwandb[0m: 	img_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001




  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


0,1
epoch_learning_rate,▂▂▃▄▅▇██████████▇▇▇▇▇▆▆▆▅▅▅▃▃▃▂▂▂▂▁▁▁▁▁▁
epoch_train_loss,█▆▅▄▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch_val_dice_loss,██▆▅▅▃▂▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch_val_dice_score,▁▁▂▃▄▆▇▇▇███████████████████████████████
epoch_val_iou,▁▂▁▅▆███▇▇▇█▇█▇█████████████████████████

0,1
epoch_learning_rate,0.0
epoch_train_loss,0.04366
epoch_val_dice_loss,0.27241
epoch_val_dice_score,0.72759
epoch_val_iou,0.69245


[34m[1mwandb[0m: Agent Starting Run: 7pihowel with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 300
[34m[1mwandb[0m: 	img_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001




  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


0,1
epoch_learning_rate,▂▂▅███▇▇▇▇▇▆▆▆▆▆▅▅▅▄▄▄▄▃▃▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁
epoch_train_loss,█▆▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch_val_dice_loss,███▇▇▆▅▅▄▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch_val_dice_score,▁▁▁▂▄▇▇█████████████████████████████████
epoch_val_iou,▁▁▃▅▄███████████████████████████████████

0,1
epoch_learning_rate,0.0
epoch_train_loss,0.06424
epoch_val_dice_loss,0.28097
epoch_val_dice_score,0.71903
epoch_val_iou,0.70899


[34m[1mwandb[0m: Agent Starting Run: r5x9jhto with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 300
[34m[1mwandb[0m: 	img_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001




  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


0,1
epoch_learning_rate,▂▄▄▅▇█████▇▇▇▇▇▇▇▇▆▆▅▅▅▅▅▄▄▄▃▃▃▃▂▂▁▁▁▁▁▁
epoch_train_loss,████▇▃▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch_val_dice_loss,███▇▆▇▅▄▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch_val_dice_score,▁▁▆▇▇███████████████████████████████████
epoch_val_iou,▁▂▅▅▇▇██████████████████████████████████

0,1
epoch_learning_rate,0.0
epoch_train_loss,0.06293
epoch_val_dice_loss,0.28143
epoch_val_dice_score,0.71857
epoch_val_iou,0.70577


[34m[1mwandb[0m: Agent Starting Run: y3ev5dmo with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	epochs: 300
[34m[1mwandb[0m: 	img_size: 128
[34m[1mwandb[0m: 	learning_rate: 0.0001




  Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
  0%|          | 0/9 [00:00<?, ?it/s][34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.
 11%|█         | 1/9 [00:00<00:05,  1.56it/s]

 56%|█████▌    | 5/9 [00:02<00:01,  2.59it/s]