In [1]:
# Random Imports
import os
import sys
import yaml
from pathlib import Path
sys.path.append('/storage/vbutoi/projects')
sys.path.append('/storage/vbutoi/libraries')
sys.path.append('/storage/vbutoi/projects/ESE')
sys.path.append('/storage/vbutoi/projects/UniverSegDev')

# Regular schema dictates that we put DATAPATH
os.environ['DATAPATH'] = ':'.join((
       '/storage/vbutoi/datasets',
))
os.environ['WANDB_NOTEBOOK_NAME'] = 'calibrate.ipynb'

# IonPy imports
from ionpy.util import Config

%load_ext yamlmagic
%load_ext autoreload
%autoreload 2

In [2]:
%%yaml default_cfg

# We can change the lr and weight decay mid run.
# Usually we are going to load the old optimizer state.
# optim:
#   weight_decay: 0.0 
#   lr: 1.0e-4

# Optionally we can change the loss function.
# loss_func: 
#   _class: '?'
#   from_logits: True
#   batch_reduction: 'mean' 

experiment:
  restart: True # Important, otherwise we don't load the model.

train:
  epochs: '?' # How much longer to train for.
  load_chkpt: '?' # Which model do we load
  pretrained_dir: '?' # Which runs are we restarting

<IPython.core.display.Javascript object>

In [3]:
%%yaml callbacks_cfg

callbacks:
  step:
    - ese.callbacks.ShowPredictions
  epoch:
    - ese.callbacks.WandbLogger
    - ionpy.callbacks.ETA
    - ionpy.callbacks.JobProgress
    - ionpy.callbacks.TerminateOnNaN
    - ionpy.callbacks.PrintLogged
    - ionpy.callbacks.ModelCheckpoint:
        monitor: dice_score
        phase: val

<IPython.core.display.Javascript object>

In [4]:
%%yaml experiment_cfg 

# name: "ISLES_MEGA_lowLR_RESTARTED" # We will treat this as a NEW EXPERIMENT.
name: "ISLES_META_lowLR_wSVLS_RESTARTED" # We will treat this as a NEW EXPERIMENT.

train:
    epochs: 500
    load_chkpt: 'last'
    pretrained_dir: 
        # - "/storage/vbutoi/scratch/ESE/training/09_05_24_ISLES_MEGA_lowLR"
        - "/storage/vbutoi/scratch/ESE/training/09_05_24_ISLES_MEGA_lowLR_wSVLS"

<IPython.core.display.Javascript object>

In [5]:
from ese.analysis.analysis_utils.submit_utils import get_ese_restart_configs

# Get the configs for the different runs.
base_cfg = Config(default_cfg).update([callbacks_cfg])

updated_base_cfg, restart_cfgs = get_ese_restart_configs(
    exp_cfg=experiment_cfg,
    base_cfg=base_cfg
)

In [6]:
len(restart_cfgs)

8

## Running Jobs

In [7]:
from ese.experiment import run_ese_exp, submit_ese_exps, CalibrationExperiment, PostHocExperiment




In [8]:
# ####### Run individual jobs
# run_ese_exp(
#     config=restart_cfgs[0], 
#     experiment_class=CalibrationExperiment,
#     # experiment_class=PostHocExperiment,
#     run_name='debug',
#     show_examples=True,
#     # track_wandb=False,
#     gpu='0',
#     # gpu='4',
# )

In [9]:
#### Run Batch Jobs
submit_ese_exps(
    group="restart",
    base_cfg=updated_base_cfg,
    exp_cfg=experiment_cfg,
    config_list=restart_cfgs,
    experiment_class=CalibrationExperiment,
    track_wandb=True,
    available_gpus=['0', '1', '2', '3']
    # available_gpus=['4', '5', '6', '7']
)

Submitted job id: 2667548 on gpu: 0.
Submitted job id: 2667654 on gpu: 1.
Submitted job id: 2667810 on gpu: 2.
Submitted job id: 2667958 on gpu: 3.
Submitted job id: 2668155 on gpu: 0.
Submitted job id: 2668438 on gpu: 1.
Submitted job id: 2668629 on gpu: 2.
Submitted job id: 2668889 on gpu: 3.
