In [1]:
# Random Imports
import os

# Regular schema dictates that we put DATAPATH
os.environ['DATAPATH'] = ':'.join((
       '/storage/vbutoi/datasets',
))
log_root_dir = '/storage/vbutoi/scratch/ESE'

%load_ext autoreload
%autoreload 2
%load_ext yamlmagic

In [2]:
from IonPy.util.config import check_missing

def validate_cfg(cfg):
    # It's usually a good idea to do a sanity check of
    # inter-related settings or force them manually
    check_missing(cfg)        
    return cfg

In [3]:
%%yaml default_cfg 

experiment:
  seed: 42
    
dataloader:
  batch_size: 1
  num_workers: 0
  pin_memory: False 

optim: 
  _class: optax.adam
  lr: 1.0e-5
  
train:
  epochs: 10000 # With heavy aug, you need a lot of epochs, without, only 1000
  eval_freq: 100
  
log:
  checkpoint_freq: 100 
  root: '?'
  metrics:
    dice_score:
      _fn: IonPy.metrics.dice_score
      from_logits: True
      batch_reduction: 'mean' 

loss_func: 
  _class: IonPy.loss.SoftDiceLoss
  from_logits: True
  batch_reduction: 'mean' 

<IPython.core.display.Javascript object>

In [4]:
%%yaml model_cfg  

model:
  _class: ese.experiment.models.UNet

<IPython.core.display.Javascript object>

In [5]:
%%yaml dataset_cfg 

data:
  _class: ese.experiment.datasets.ADE20kDataset

<IPython.core.display.Javascript object>

In [6]:
%%yaml callbacks_cfg

callbacks:
  epoch:
    - IonPy.callbacks.ETA
    - IonPy.callbacks.JobProgress
    - IonPy.callbacks.TerminateOnNaN
    - IonPy.callbacks.ModelCheckpoint:
        monitor: dice_score
        phase: val

<IPython.core.display.Javascript object>

In [7]:
# Need to define the experiment name
exp_name = 'debugging'

# Create the ablation options
options = {
    'log.root': [f'{log_root_dir}/{exp_name}'],
}

In [8]:
from IonPy.util import dict_product, Config
import copy

# Assemble base config
base_cfg = Config(default_cfg).update(model_cfg).update(dataset_cfg).update(callbacks_cfg)

cfgs = []
for cfg_update in dict_product(options):
    cfg = base_cfg.update(cfg_update)
    cfg = validate_cfg(cfg)
    cfgs.append(cfg)

In [9]:
len(cfgs)

1

## Run the jobs

In [10]:
# Submit cell
from ese.experiment.experiment.ese_exp import CalibrationExperiment 
from IonPy.slite import SliteRunner

# List the available gpus for a machine
available_gpus = ['0', '1'] 

# Configure Slite Object
srunner = SliteRunner(task_type=CalibrationExperiment, 
                      exp_name=exp_name, 
                      available_gpus=available_gpus)

## Debug Station

In [11]:
srunner.run_exp(cfgs[0])

ModuleNotFoundError: No module named 'ese.experiment.models.nonlinearity'

## Submit Config to Long Term Jobs

In [None]:
#srunner.submit_exps(cfgs)

In [None]:
# print(srunner.jobs[0].stderr())