In [1]:
# Random Imports
import os
import sys
sys.path.append('/storage/vbutoi/projects')
sys.path.append('/storage/vbutoi/projects/ESE')

# Regular schema dictates that we put DATAPATH
os.environ['DATAPATH'] = ':'.join((
       '/storage/vbutoi/datasets',
))
log_root_dir = '/storage/vbutoi/scratch/ESE'

%load_ext yamlmagic

In [2]:
from ionpy.util.config import check_missing

def validate_cfg(cfg):
    # It's usually a good idea to do a sanity check of
    # inter-related settings or force them manually
    check_missing(cfg)        
    return cfg

In [3]:
%%yaml default_cfg 

experiment:
  seed: 42
    
dataloader:
  batch_size: 1 
  num_workers: '?' 
  pin_memory: False 

optim: 
  _class: torch.optim.Adam
  lr: 3.0e-4
  
train:
  epochs: 300
  eval_freq: 10
  
log:
  checkpoint_freq: 50 
  root: '?'
  metrics:
    dice_score:
      _fn: ionpy.metrics.dice_score
      from_logits: True
      batch_reduction: 'mean' 

loss_func: 
  _class: ionpy.loss.SoftDiceLoss
  from_logits: True
  batch_reduction: 'mean' 

<IPython.core.display.Javascript object>

In [4]:
%%yaml model_cfg  

model:
  _class: ese.experiment.models.UNet
  in_channels: 1
  out_channels: 1
  filters: [64, 64, 64, 64]
  convs_per_block: 2

<IPython.core.display.Javascript object>

In [5]:
%%yaml dataset_cfg 

data:
  _class: ese.experiment.datasets.WMH
  annotator: observer_o12
  axis: 0
  dataset: WMH
  slice_batch_size: 1
  slicing: dense
  task: Amsterdam 

<IPython.core.display.Javascript object>

In [6]:
%%yaml callbacks_cfg

callbacks:
  # step:
  #   - ese.experiment.callbacks.ShowPredictions
  epoch:
    - ese.experiment.callbacks.WandbLogger
    - ionpy.callbacks.ETA
    - ionpy.callbacks.JobProgress
    - ionpy.callbacks.TerminateOnNaN
    - ionpy.callbacks.PrintLogged
    - ionpy.callbacks.ModelCheckpoint:
        monitor: dice_score
        phase: val

<IPython.core.display.Javascript object>

In [7]:
# Need to define the experiment name
exp_name = 'big_batch_size'

# Create the ablation options
option_set = [
    {
        'log.root': [f'{log_root_dir}/{exp_name}'],
        'dataloader.num_workers': [4],
        'data.slice_batch_size': [24, 32, 48],
    },
    {
        'log.root': [f'{log_root_dir}/{exp_name}'],
        'dataloader.num_workers': [4],
        'dataloader.batch_size': [24, 32, 48],
    }
]

In [8]:
from ionpy.util import dict_product, Config

# Assemble base config
base_cfg = Config(default_cfg).update(model_cfg).update(dataset_cfg).update(callbacks_cfg)

cfgs = []
for option_dict in option_set:
    for cfg_update in dict_product(option_dict):
        cfg = base_cfg.update(cfg_update)
        cfg = validate_cfg(cfg)
        cfgs.append(cfg)

In [9]:
len(cfgs)

6

## Run the jobs

In [10]:
# Submit cell
from ese.experiment.experiment.ese_exp import CalibrationExperiment 
from ionpy.slite import SliteRunner

# List the available gpus for a machine
available_gpus = ['0', '1', '2', '3']

# Configure Slite Object
srunner = SliteRunner(
    task_type=CalibrationExperiment, 
    exp_name=exp_name, 
    available_gpus=available_gpus
    )

## Debug Station

In [11]:
# srunner.run_exp(cfgs[3])

## Submit Config to Long Term Jobs

In [12]:
srunner.submit_exps(cfgs)

Submitted job 821436 with 2 configs.
Submitted job 821463 with 2 configs.
Submitted job 821526 with 1 configs.
Submitted job 821736 with 1 configs.


In [13]:
len(srunner.jobs)

4

In [18]:
print(srunner.jobs[2].stdout())

submitit INFO (2023-08-17 10:06:06,231) - Starting with JobEnvironment(job_id=821526, hostname=twix, local_rank=0(1), node=0(1), global_rank=0(1))
submitit INFO (2023-08-17 10:06:06,232) - Loading pickle: /storage/vbutoi/scratch/submitit/big_batch_size/821526_submitted.pkl
Running CalibrationExperiment("/storage/vbutoi/scratch/ESE/big_batch_size/20230817_100608-JL3F-3e33d936f6e5424c0531c7b14120edb9")
---
callbacks:
  epoch:
  - ionpy.callbacks.ETA
  - ionpy.callbacks.JobProgress
  - ionpy.callbacks.TerminateOnNaN
  - ionpy.callbacks.PrintLogged
  - ionpy.callbacks.ModelCheckpoint:
      monitor: dice_score
      phase: val
data:
  _class: ese.experiment.datasets.WMH
  annotator: observer_o12
  axis: 0
  dataset: WMH
  slice_batch_size: 48
  slicing: dense
  task: Amsterdam
dataloader:
  batch_size: 1
  num_workers: 4
  pin_memory: false
experiment:
  seed: 42
log:
  checkpoint_freq: 50
  metrics:
    dice_score:
      _fn: ionpy.metrics.dice_score
      batch_reduction: mean
      from