In [17]:
import importlib

import torch
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau

from datasets.hdf5 import get_train_loaders
from unet3d.config import load_config
from unet3d.losses import get_loss_criterion
from unet3d.metrics import get_evaluation_metric
from unet3d.model import get_model
from unet3d.trainer import UNet3DTrainer
from unet3d.utils import get_logger
from unet3d.utils import get_number_of_learnable_parameters

import argparse

import os
import yaml

In [18]:
# set config and device parameters

logger = get_logger('UNet3DTrainer')


if torch.cuda.is_available():
    DEFAULT_DEVICE = 'cuda:0'
else:
    DEFAULT_DEVICE = 'cpu'

# config file
config_file = 'resources/train_config_ce.yaml'
config = yaml.load(open(config_file, 'r'))
# Get a device to train on
device = config.get('device', DEFAULT_DEVICE)
config['device'] = torch.device(device)

logger.info(config)


2019-07-11 16:45:14,410 [MainThread] INFO UNet3DTrainer - {'manual_seed': 0, 'model': {'name': 'UNet3D', 'in_channels': 1, 'out_channels': 2, 'layer_order': 'crg', 'f_maps': 32, 'num_groups': 8, 'final_sigmoid': False}, 'trainer': {'checkpoint_dir': '3dunet', 'resume': None, 'validate_after_iters': 20, 'log_after_iters': 20, 'epochs': 50, 'iters': 100000, 'eval_score_higher_is_better': True}, 'optimizer': {'learning_rate': 0.0002, 'weight_decay': 0.0001}, 'loss': {'name': 'CrossEntropyLoss', 'loss_weight': None, 'ignore_index': None}, 'eval_metric': {'name': 'MeanIoU', 'ignore_index': None}, 'lr_scheduler': {'name': 'MultiStepLR', 'milestones': [10, 30, 60], 'gamma': 0.2}, 'loaders': {'train_patch': [32, 64, 64], 'train_stride': [8, 16, 16], 'val_patch': [32, 64, 64], 'val_stride': [32, 64, 64], 'raw_internal_path': 'raw', 'label_internal_path': 'label', 'train_path': ['../suki_fractals/suki_files/h5/0.h5'], 'val_path': ['resources/random_label3D.h5'], 'num_workers': 8, 'transformer': 

  del sys.path[0]


In [19]:
def _create_trainer(config, model, optimizer, lr_scheduler, loss_criterion, eval_criterion, loaders, logger):
    assert 'trainer' in config, 'Could not find trainer configuration'
    trainer_config = config['trainer']

    resume = trainer_config.get('resume', None)
    pre_trained = trainer_config.get('pre_trained', None)

    return UNet3DTrainer(model, optimizer, lr_scheduler, loss_criterion, eval_criterion,
                         config['device'], loaders, trainer_config['checkpoint_dir'],
                         max_num_epochs=trainer_config['epochs'],
                         max_num_iterations=trainer_config['iters'],
                         validate_after_iters=trainer_config['validate_after_iters'],
                         log_after_iters=trainer_config['log_after_iters'],
                         eval_score_higher_is_better=trainer_config['eval_score_higher_is_better'],
                         logger=logger)


def _create_optimizer(config, model):
    assert 'optimizer' in config, 'Cannot find optimizer configuration'
    optimizer_config = config['optimizer']
    learning_rate = optimizer_config['learning_rate']
    weight_decay = optimizer_config['weight_decay']
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    return optimizer


def _create_lr_scheduler(config, optimizer):
    lr_config = config.get('lr_scheduler', None)
    if lr_config is None:
        # use ReduceLROnPlateau as a default scheduler
        return ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=20, verbose=True)
    else:
        class_name = lr_config.pop('name')
        m = importlib.import_module('torch.optim.lr_scheduler')
        clazz = getattr(m, class_name)
        # add optimizer to the config
        lr_config['optimizer'] = optimizer
        return clazz(**lr_config)

In [20]:

# Load and log experiment configuration
manual_seed = config.get('manual_seed', None)
if manual_seed is not None:
    logger.info(f'Seed the RNG for all devices with {manual_seed}')
    torch.manual_seed(manual_seed)
    # see https://pytorch.org/docs/stable/notes/randomness.html
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# Create the model
model = get_model(config)

# put the model on GPUs (if available)
#if torch.cuda.is_available():
logger.info(f"Sending the model to '{config['device']}'")
model = model.to(config['device'])
                
# Log the number of learnable parameters
logger.info(f'Number of learnable params {get_number_of_learnable_parameters(model)}')

# Create loss criterion
loss_criterion = get_loss_criterion(config)
# Create evaluation metric
eval_criterion = get_evaluation_metric(config)

# Create data loaders
loaders = get_train_loaders(config)

# Create the optimizer
optimizer = _create_optimizer(config, model)

# Create learning rate adjustment strategy
lr_scheduler = _create_lr_scheduler(config, optimizer)

# Create model trainer
trainer = _create_trainer(config, model=model, optimizer=optimizer, lr_scheduler=lr_scheduler,
                          loss_criterion=loss_criterion, eval_criterion=eval_criterion, loaders=loaders,
                          logger=logger)
# Start training
trainer.fit()

2019-07-11 16:45:15,119 [MainThread] INFO UNet3DTrainer - Seed the RNG for all devices with 0
2019-07-11 16:45:15,119 [MainThread] INFO UNet3DTrainer - Seed the RNG for all devices with 0
2019-07-11 16:45:15,119 [MainThread] INFO UNet3DTrainer - Seed the RNG for all devices with 0
2019-07-11 16:45:15,119 [MainThread] INFO UNet3DTrainer - Seed the RNG for all devices with 0
2019-07-11 16:45:15,119 [MainThread] INFO UNet3DTrainer - Seed the RNG for all devices with 0
2019-07-11 16:45:15,171 [MainThread] INFO UNet3DTrainer - Sending the model to 'cpu'
2019-07-11 16:45:15,171 [MainThread] INFO UNet3DTrainer - Sending the model to 'cpu'
2019-07-11 16:45:15,171 [MainThread] INFO UNet3DTrainer - Sending the model to 'cpu'
2019-07-11 16:45:15,171 [MainThread] INFO UNet3DTrainer - Sending the model to 'cpu'
2019-07-11 16:45:15,171 [MainThread] INFO UNet3DTrainer - Sending the model to 'cpu'
2019-07-11 16:45:15,176 [MainThread] INFO UNet3DTrainer - Number of learnable params 4080914
2019-07-11 1

Traceback (most recent call last):
Traceback (most recent call last):
  File "/anaconda3/envs/lung/lib/python3.7/multiprocessing/queues.py", line 242, in _feed
    send_bytes(obj)
  File "/anaconda3/envs/lung/lib/python3.7/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/anaconda3/envs/lung/lib/python3.7/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/anaconda3/envs/lung/lib/python3.7/multiprocessing/connection.py", line 368, in _send
    n = write(self._handle, buf)
  File "/anaconda3/envs/lung/lib/python3.7/multiprocessing/queues.py", line 242, in _feed
    send_bytes(obj)
  File "/anaconda3/envs/lung/lib/python3.7/multiprocessing/connection.py", line 200, in send_bytes
    self._send_bytes(m[offset:offset + size])
  File "/anaconda3/envs/lung/lib/python3.7/multiprocessing/connection.py", line 404, in _send_bytes
    self._send(header + buf)
  File "/anaconda3/envs/lung/lib/py

KeyboardInterrupt: 