In [1]:
# Random Imports
import os
import sys
sys.path.append('/storage/vbutoi/projects')
sys.path.append('/storage/vbutoi/projects/ESE')

# Regular schema dictates that we put DATAPATH
os.environ['DATAPATH'] = ':'.join((
       '/storage/vbutoi/datasets',
))
log_root_dir = '/storage/vbutoi/scratch/ESE'

%load_ext autoreload
%autoreload 2
%load_ext yamlmagic

In [2]:
from ionpy.util.config import check_missing

def validate_cfg(cfg):
    # It's usually a good idea to do a sanity check of
    # inter-related settings or force them manually
    check_missing(cfg)        
    return cfg

In [3]:
%%yaml default_cfg 

experiment:
  seed: 42
    
dataloader:
  batch_size: 4
  num_workers: 0
  pin_memory: False 

optim: 
  _class: torch.optim.Adam
  lr: 3.0e-4
  
train:
  epochs: 100 # With heavy aug, you need a lot of epochs, without, only 1000
  eval_freq: 10
  
log:
  checkpoint_freq: 100 
  root: '?'
  metrics:
    dice_score:
      _fn: ionpy.metrics.dice_score
      from_logits: True
      batch_reduction: 'mean' 

loss_func: 
  _class: ionpy.loss.SoftDiceLoss
  from_logits: True
  batch_reduction: 'mean' 

<IPython.core.display.Javascript object>

In [4]:
%%yaml model_cfg  

model:
  _class: ese.experiment.models.UNet
  in_channels: 1
  out_channels: 1
  filters: [64, 64, 64, 64]
  convs_per_block: 2

<IPython.core.display.Javascript object>

In [5]:
%%yaml dataset_cfg 

data:
  _class: ese.experiment.datasets.WMH
  annotator: observer_o12
  axis: 0
  dataset: WMH
  slice_batch_size: 1
  slicing: dense
  task: Amsterdam 

<IPython.core.display.Javascript object>

In [6]:
%%yaml callbacks_cfg

callbacks:
  step:
    - ese.experiment.callbacks.ShowPredictions
  epoch:
    - ionpy.callbacks.ETA
    - ionpy.callbacks.JobProgress
    - ionpy.callbacks.TerminateOnNaN
    - ionpy.callbacks.PrintLogged
    - ionpy.callbacks.ModelCheckpoint:
        monitor: dice_score
        phase: val

<IPython.core.display.Javascript object>

In [7]:
# Need to define the experiment name
exp_name = 'WMH_Runs'

# Create the ablation options
options = {
    'log.root': [f'{log_root_dir}/{exp_name}'],
    'data.slice_batch_size': [1, 4, 8, 16],
    'log.root': [1, 4, 8, 16],
    'model.filters': [[32, 32, 32, 32], [64, 64, 64, 64]],
}

In [8]:
from ionpy.util import dict_product, Config

# Assemble base config
base_cfg = Config(default_cfg).update(model_cfg).update(dataset_cfg).update(callbacks_cfg)

cfgs = []
for cfg_update in dict_product(options):
    cfg = base_cfg.update(cfg_update)
    cfg = validate_cfg(cfg)
    cfgs.append(cfg)

In [9]:
len(cfgs)

32

## Run the jobs

In [10]:
# Submit cell
from ese.experiment.experiment.ese_exp import CalibrationExperiment 
from ionpy.slite import SliteRunner

# List the available gpus for a machine
available_gpus = ['0', '1', '2', '3'] 

# Configure Slite Object
srunner = SliteRunner(
    task_type=CalibrationExperiment, 
    exp_name=exp_name, 
    available_gpus=available_gpus
    )

## Debug Station

In [11]:
# srunner.run_exp(cfgs[0])

## Submit Config to Long Term Jobs

In [12]:
srunner.submit_exps(cfgs)

Submitted job 462178 with 8 configs.
Submitted job 462199 with 8 configs.
Submitted job 462218 with 8 configs.
Submitted job 462236 with 8 configs.


In [14]:
print(srunner.jobs[0].stderr())

submitit ERROR (2023-08-16 23:52:47,195) - Submitted job triggered an exception
Traceback (most recent call last):
  File "/local/vbutoi/envs/UniverSegTF/lib/python3.9/runpy.py", line 197, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/local/vbutoi/envs/UniverSegTF/lib/python3.9/runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "/local/vbutoi/envs/UniverSegTF/lib/python3.9/site-packages/submitit/core/_submit.py", line 11, in <module>
    submitit_main()
  File "/local/vbutoi/envs/UniverSegTF/lib/python3.9/site-packages/submitit/core/submission.py", line 72, in submitit_main
    process_job(args.folder)
  File "/local/vbutoi/envs/UniverSegTF/lib/python3.9/site-packages/submitit/core/submission.py", line 65, in process_job
    raise error
  File "/local/vbutoi/envs/UniverSegTF/lib/python3.9/site-packages/submitit/core/submission.py", line 51, in process_job
    delayed = utils.DelayedSubmission.load(paths.submitted_pickle)
  File "/local/