In [1]:
# Define useful paths
SCRATCH_ROOT = "/storage/vbutoi/scratch/ESE"
CONFIG_ROOT = "/storage/vbutoi/projects/ESE/configs"
import sys
sys.path.append('/storage/vbutoi/projects')
sys.path.append('/storage/vbutoi/projects/ESE')

# IonPy imports
from ionpy.util import Config

%load_ext yamlmagic
%load_ext autoreload
%autoreload 2

In [2]:
%%yaml base_default_cfg 

experiment:
  seed: 40
  seed_range: 1
  val_first: False 
  torch_compile: False 
  torch_mixed_precision: False
  sys_paths:
    - "/storage/vbutoi/projects"
    - "/storage/vbutoi/libraries"
    - "/storage/vbutoi/projects/ESE"
  data_paths:
    - "/storage"
    - "/storage/vbutoi/datasets"

dataloader:
  batch_size: 8 
  num_workers: 3 
  pin_memory: True 

optim: 
  _class: torch.optim.Adam
  lr: 1.0e-4 
  
train:
  epochs: 500 
  eval_freq: 5 

log:
  checkpoint_freq: 5 

<IPython.core.display.Javascript object>

In [3]:
%%yaml finetune_default_cfg 

experiment:
  seed: 40
  seed_range: 1
  val_first: False 
  torch_compile: False 
  torch_mixed_precision: False
  sys_paths:
    - "/storage/vbutoi/projects"
    - "/storage/vbutoi/libraries"
    - "/storage/vbutoi/projects/ESE"
  data_paths:
    - "/storage"
    - "/storage/vbutoi/datasets"

dataloader:
  batch_size: 8 
  num_workers: 3 
  pin_memory: True 

optim: 
  _class: torch.optim.Adam
  lr: 1.0e-5 
  
train:
  epochs: 500 
  eval_freq: 5 

log:
  checkpoint_freq: 5 

<IPython.core.display.Javascript object>

# Define the data.

In [4]:
%%yaml base_data_cfg 

# Used for additional data experiment.
data:
  _class: 'ese.datasets.ISLES'
  train_kwargs:
    split: 'train'
  val_kwargs: 
    split: 'val'

<IPython.core.display.Javascript object>

In [5]:
%%yaml finetune_data_cfg 

# Used for additional data experiment.
data:
  use_pt_data_cfg: True
  train_kwargs:
    split: 'train'
  val_kwargs: 
    split: 'val'

<IPython.core.display.Javascript object>

# Define the Loss Function config.

In [6]:
%%yaml base_loss_cfg

loss_func: 
  _class: ese.losses.SoftDiceLoss
  from_logits: True
  batch_reduction: 'mean' 
  ignore_empty_labels: False 

<IPython.core.display.Javascript object>

In [7]:
%%yaml finetune_loss_cfg

loss_func: 
  _class: ese.losses.PixelCELoss
  from_logits: True
  batch_reduction: 'mean'

<IPython.core.display.Javascript object>

# Define the Model config.

## For base models.

In [8]:
%%yaml base_model_cfg  

model:
  _class: ese.models.unet.UNet
  filters: [64, 64, 64, 64, 64, 64, 64] 
  convs_per_block: 3 # Good default for UNets.
  dims: 2

<IPython.core.display.Javascript object>

# For downstream models (calibrators).

In [9]:
%%yaml finetune_model_cfg  

model:
  _class: '?'
  base_model_dir: '?'
  dims: 2

<IPython.core.display.Javascript object>

# Define the Callbacks config.

In [10]:
%%yaml base_callbacks_cfg

log:
  root: '?'
  metrics:
    dice_score:
      _fn: ionpy.metrics.dice_score
      batch_reduction: 'mean' 
      ignore_empty_labels: False 
      from_logits: True

callbacks:
  step:
    - ionpy.callbacks.ShowPredictions:
        vis_type: 'segmentation'
  epoch:
    - ionpy.callbacks.ETA
    - ionpy.callbacks.JobProgress
    - ionpy.callbacks.TerminateOnNaN
    - ionpy.callbacks.PrintLogged
    - ionpy.callbacks.WandbLogger:
        entity: 'vbutoi'
        project: 'SemanticCalibration'
    - ionpy.callbacks.ModelCheckpoint:
        monitor: dice_score
        phase: val

<IPython.core.display.Javascript object>

In [11]:
%%yaml finetune_callbacks_cfg

log:
  root: '?'
  metrics:
    ece_loss:
      _fn: ese.metrics.image_ece_loss
      num_prob_bins: 15
      from_logits: True
      lower_threshold: 0.01
    dice_score:
      _fn: ionpy.metrics.dice_score
      batch_reduction: 'mean' 
      ignore_empty_labels: False 
      from_logits: True

callbacks:
  step:
    - ionpy.callbacks.ShowPredictions:
        vis_type: 'segmentation'
  epoch:
    - ionpy.callbacks.ETA
    - ionpy.callbacks.JobProgress
    - ionpy.callbacks.TerminateOnNaN
    - ionpy.callbacks.PrintLogged
    - ionpy.callbacks.WandbLogger:
        entity: 'vbutoi'
        project: 'SemanticCalibration'
    - ionpy.callbacks.ModelCheckpoint:
        monitor: 
          - dice_score
          - ece_loss
        phase: val

<IPython.core.display.Javascript object>

# Define the config combos.

In [12]:
# Training standard segmentation models.
base_cfg = Config(base_default_cfg).update([
    base_data_cfg,
    base_loss_cfg,
    base_model_cfg, 
    base_callbacks_cfg
])

# Training models on top of base ones.
finetune_cfg = Config(finetune_default_cfg).update([
    finetune_data_cfg,
    finetune_loss_cfg,
    finetune_model_cfg, 
    finetune_callbacks_cfg 
])

# Experimental Variations.

In [13]:
%%yaml experiment_cfg 

group: "OCTA_ReducedCalibratorSuite_SoftDicePT"

model: 
    base_model_dir: "/storage/vbutoi/scratch/ESE/training/older_runs/2024/08_August_2024/08_07_24_OCTA_FULLRES_SoftDice/20240807_144440-P7H9-459da42e97888bccb0aa79c297f388cd"
    _class: 
        - "ese.models.TS"
        - "ese.models.VS"
        - "ese.models.LTS"
        - "ese.models.IBTS"

<IPython.core.display.Javascript object>

In [14]:
from ionpy.experiment.generate_configs import get_training_configs

# Get the different experiment cfg yamls.
updated_base_cfg, train_cfgs = get_training_configs(
    exp_cfg=experiment_cfg, 
    # base_cfg=base_cfg,
    base_cfg=finetune_cfg,
    config_root=CONFIG_ROOT,
    scratch_root=SCRATCH_ROOT,
    add_date=True
)

In [15]:
len(train_cfgs)

4

# Running Jobs

In [16]:
# # ####### FOR DEBUGGIN
# from ionpy.slite import run_exp
# from ese.experiment import CalibrationExperiment

# run_exp(
#     config=train_cfgs[0], 
#     experiment_class=CalibrationExperiment,
#     run_name='debug',
#     show_examples=True,
#     track_wandb=False,
#     gpu='0',
# )

In [17]:
%%yaml submit_cfg

mode: "local"
group: "training"
add_date: True
track_wandb: True
jobs_per_gpu: 1
scratch_root: "/storage/vbutoi/scratch/ESE"

<IPython.core.display.Javascript object>

In [18]:
# FOR SUBMISSION
from ionpy.slite import submit_exps
from ese.experiment import CalibrationExperiment

submit_exps(
    cfg_list=train_cfgs,
    submit_cfg=submit_cfg,
    exp_cfg=experiment_cfg,
    base_cfg=updated_base_cfg,
    experiment_class=CalibrationExperiment,
)

--> Launched job-id: 706 on gpu: 0.
--> Launched job-id: 707 on gpu: 1.
--> Launched job-id: 708 on gpu: 2.
--> Launched job-id: 709 on gpu: 3.
