In [1]:
# Random Imports
import os
import sys
import yaml
from pathlib import Path
sys.path.append('/storage/vbutoi/projects')
sys.path.append('/storage/vbutoi/libraries')
sys.path.append('/storage/vbutoi/projects/ESE')
sys.path.append('/storage/vbutoi/projects/UniverSegDev')

# Regular schema dictates that we put DATAPATH
os.environ['DATAPATH'] = ':'.join((
       '/storage/vbutoi/datasets',
))
os.environ['WANDB_NOTEBOOK_NAME'] = 'calibrate.ipynb'

# IonPy imports
from ionpy.util import Config

%load_ext yamlmagic
%load_ext autoreload
%autoreload 2

In [2]:
%%yaml default_cfg

data:
  iters_per_epoch: 100 
  train_splits: cal
  val_splits: val 

dataloader:
  batch_size: 8 # Often, we train with a small num of images total. 
  num_workers: 1
  pin_memory: True 

optim: # Unclear if we should tune this or not.
  _class: torch.optim.Adam
  weight_decay: 0.0 
  lr: 1.0e-4

train:
  epochs: 1000 # 10 * 100 = 1000 iterations
  eval_freq: 10 
  base_pretrained_dir: '?'
  base_checkpoint: 'max-val-dice_score'
  base_pt_select_metric: 'val-dice_score'

loss_func: 
  _class: '?'
  from_logits: True
  batch_reduction: 'mean' 

<IPython.core.display.Javascript object>

In [3]:
%%yaml log_cfg

log:
  checkpoint_freq: 50
  root: '?'
  metrics:
    dice_score:
      _fn: ionpy.metrics.dice_score
      batch_reduction: 'mean' 
      ignore_empty_labels: False 
      from_logits: True
    ece_loss:
      _fn: ese.metrics.image_ece_loss
      num_prob_bins: 15
      from_logits: True
    area_estimation_error:
      _fn: ese.losses.area_estimation_error
      from_logits: True
      square_diff: False

<IPython.core.display.Javascript object>

In [4]:
%%yaml callbacks_cfg

callbacks:
  step:
    - ese.callbacks.ShowPredictions
  epoch:
    - ese.callbacks.WandbLogger
    - ionpy.callbacks.ETA
    - ionpy.callbacks.JobProgress
    - ionpy.callbacks.TerminateOnNaN
    - ionpy.callbacks.PrintLogged
    - ionpy.callbacks.ModelCheckpoint:
        monitor: 
          - ece_loss 
          - area_estimation_error
        phase: val

<IPython.core.display.Javascript object>

In [5]:
%%yaml experiment_cfg 

name: "08_25_24_HepaticVessels_LTS"

train: 
    base_pretrained_dir: 
        - "/storage/vbutoi/scratch/ESE/training/08_24_24_HeptaticVessel_LowerLR"

model:
    - TempScaling

loss_func:
    _class: 
        # - ese.losses.PixelCELoss
        # - ese.losses.SoftDiceLoss
        - ese.losses.AreaEstimationError

<IPython.core.display.Javascript object>

In [6]:
from ese.analysis.analysis_utils.submit_utils import get_ese_calibration_configs

# Get the configs for the different runs.
base_cfg = Config(default_cfg).update([log_cfg, callbacks_cfg])

cal_cfgs = get_ese_calibration_configs(
    exp_cfg=experiment_cfg,
    base_cfg=base_cfg,
    calibration_model_cfgs={}
)

No base config found. Using default base config.


In [7]:
len(cal_cfgs)

8

## Running Jobs

In [8]:
from ese.experiment import run_ese_exp, submit_ese_exps, PostHocExperiment




In [9]:
# ####### Run individual jobs
# run_ese_exp(
#     config=cal_cfgs[-1], 
#     experiment_class=PostHocExperiment,
#     run_name='debug',
#     show_examples=True,
#     track_wandb=False,
#     gpu='0',
#     # gpu='4',
# )

In [10]:
#### Run Batch Jobs
submit_ese_exps(
    config_list=cal_cfgs,
    experiment_class=PostHocExperiment,
    track_wandb=True,
    available_gpus=['0', '1', '2', '3']
    # available_gpus=['4', '5', '6', '7']
)

Submitted job id: 3539743 on gpu: 0.
Submitted job id: 3540122 on gpu: 1.
Submitted job id: 3540437 on gpu: 2.
Submitted job id: 3540847 on gpu: 3.
Submitted job id: 3541266 on gpu: 0.
Submitted job id: 3541771 on gpu: 1.
Submitted job id: 3542425 on gpu: 2.
Submitted job id: 3543040 on gpu: 3.
