In [12]:
# Random Imports
import os
import sys
sys.path.append('/storage/vbutoi/projects')
sys.path.append('/storage/vbutoi/projects/ESE')

# Regular schema dictates that we put DATAPATH
os.environ['DATAPATH'] = ':'.join((
       '/storage/vbutoi/datasets',
))
log_root_dir = '/storage/vbutoi/scratch/ESE'

%load_ext autoreload
%autoreload 2
%load_ext yamlmagic

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
The yamlmagic extension is already loaded. To reload it, use:
  %reload_ext yamlmagic


In [13]:
from ionpy.util.config import check_missing

def validate_cfg(cfg):
    # It's usually a good idea to do a sanity check of
    # inter-related settings or force them manually
    check_missing(cfg)        
    return cfg

In [14]:
%%yaml default_cfg 

experiment:
  seed: 42
    
dataloader:
  batch_size: 1
  num_workers: 0
  pin_memory: False 

optim: 
  _class: torch.optim.Adam
  lr: 1.0e-5
  
train:
  epochs: 10000 # With heavy aug, you need a lot of epochs, without, only 1000
  eval_freq: 100
  
log:
  checkpoint_freq: 100 
  root: '?'
  metrics:
    dice_score:
      _fn: ionpy.metrics.dice_score
      from_logits: True
      batch_reduction: 'mean' 

loss_func: 
  _class: ionpy.loss.SoftDiceLoss
  from_logits: True
  batch_reduction: 'mean' 

<IPython.core.display.Javascript object>

In [15]:
%%yaml model_cfg  

model:
  _class: ese.experiment.models.UNet
  in_channels: 1
  out_channels: 1
  filters: [32, 64, 128, 256, 512]

<IPython.core.display.Javascript object>

In [16]:
%%yaml dataset_cfg 

data:
  _class: ese.experiment.datasets.Segment2D 
  annotator: observer_o12
  dataset: WMH
  task: Amsterdam 
  axis: 0

<IPython.core.display.Javascript object>

In [17]:
%%yaml callbacks_cfg

callbacks:
  epoch:
    - ionpy.callbacks.ETA
    - ionpy.callbacks.JobProgress
    - ionpy.callbacks.TerminateOnNaN
    - ionpy.callbacks.ModelCheckpoint:
        monitor: dice_score
        phase: val

<IPython.core.display.Javascript object>

In [18]:
# Need to define the experiment name
exp_name = 'debugging'

# Create the ablation options
options = {
    'log.root': [f'{log_root_dir}/{exp_name}'],
}

In [19]:
from ionpy.util import dict_product, Config

# Assemble base config
base_cfg = Config(default_cfg).update(model_cfg).update(dataset_cfg).update(callbacks_cfg)

cfgs = []
for cfg_update in dict_product(options):
    cfg = base_cfg.update(cfg_update)
    cfg = validate_cfg(cfg)
    cfgs.append(cfg)

In [20]:
len(cfgs)

1

## Run the jobs

In [21]:
# Submit cell
from ese.experiment.experiment.ese_exp import CalibrationExperiment 
from ionpy.slite import SliteRunner

# List the available gpus for a machine
available_gpus = ['3'] 

# Configure Slite Object
srunner = SliteRunner(
    task_type=CalibrationExperiment, 
    exp_name=exp_name, 
    available_gpus=available_gpus
    )

## Debug Station

In [22]:
srunner.run_exp(cfgs[0])

Running CalibrationExperiment("/storage/vbutoi/scratch/ESE/debugging/20230816_135041-FZC7-a4dc05ae0909a5b2163296e279e48753")
---
callbacks:
  epoch:
  - ionpy.callbacks.ETA
  - ionpy.callbacks.JobProgress
  - ionpy.callbacks.TerminateOnNaN
  - ionpy.callbacks.ModelCheckpoint:
      monitor: dice_score
      phase: val
data:
  _class: ese.experiment.datasets.Segment2D
  annotator: observer_o12
  axis: 0
  dataset: WMH
  task: Amsterdam
dataloader:
  batch_size: 1
  num_workers: 0
  pin_memory: false
experiment:
  seed: 42
log:
  checkpoint_freq: 100
  metrics:
    dice_score:
      _fn: ionpy.metrics.dice_score
      batch_reduction: mean
      from_logits: true
  root: /storage/vbutoi/scratch/ESE/debugging
loss_func:
  _class: ionpy.loss.SoftDiceLoss
  batch_reduction: mean
  from_logits: true
model:
  _class: ese.experiment.models.UNet
  filters:
  - 32
  - 64
  - 128
  - 256
  - 512
  in_channels: 1
  out_channels: 1
optim:
  _class: torch.optim.Adam
  lr: 1.0e-05
train:
  epochs: 10

ValueError: 'a' and 'p' must have same size

## Submit Config to Long Term Jobs

In [None]:
#srunner.submit_exps(cfgs)

In [None]:
# print(srunner.jobs[0].stderr())