다음을 리뷰 :
https://github.com/Project-MONAI/tutorials/blob/master/3d_segmentation/ignite/unet_training_dict.py

In [7]:
import os
import sys
import numpy as np
from glob import glob
import matplotlib.pyplot as plt
import nibabel as nib    # nifti 포맷 파일 생성때만 이용

import torch
from torch.utils.data import DataLoader
import monai
## decollate_batch : 배치 텐서를 리스트의 텐서로 변환
from monai.data import ImageDataset, create_test_image_3d, decollate_batch
from monai.transforms import (
    Activations,
    AsChannelFirstd,
    AsDiscrete,
    Compose,
    LoadImaged,
    RandCropByPosNegLabeld,  # randomly crop patch samples from big image based on pos / neg ratio.
    RandRotate90d,
    ScaleIntensityd,
    EnsureTyped,
    EnsureType,
)
from monai.data import Dataset   # dict에선 ImageDataset대신 이용
from monai.data import list_data_collate


# Events : process point를 지정 Events.EPOCH_COMPETED
from ignite.engine import (
    Events,  
    _prepare_batch,                    ### 새롭게 들어옴
    create_supervised_evaluator, 
    create_supervised_trainer
)
# ModelCheckpoint : training 동안 모델 계속 저장
from ignite.handlers import EarlyStopping, ModelCheckpoint
from monai.handlers import (
    MeanDice,          # val_metric {}를 정의시 넣음
    StatsHandler,      # 각 epoch마다 loss, metric 출력
    TensorBoardImageHandler,
    TensorBoardStatsHandler,    # 각 epoch마다 loss, metric plot
    stopping_fn_from_metric,    # ignite EarlyStopping 과 연결, metric기준 stopping
)


In [8]:
tempdir = './dataset'
monai.config.print_config()
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = '5,6,7,8'

## 디렉토리에 40개 랜덤이미지, 마스크 생성
print(f"generating synthetic data to {tempdir} (this may take a while)")
for i in range(40):
    # np image 생성
    im, seg = create_test_image_3d(128, 128, 128, num_seg_classes=1, channel_dim=-1)  
#     print(type(im), type(seg))  # np.array, (128, 128, 128, 1)
#     print(im.shape, seg.shape)  # (128, 128, 128) (128, 128, 128) 3d 라서 img, seg가 같은 dim인 듯?

    n = nib.Nifti1Image(im, np.eye(4))
    nib.save(n, os.path.join(tempdir, f"img{i:d}.nii.gz"))

    n = nib.Nifti1Image(seg, np.eye(4))
    nib.save(n, os.path.join(tempdir, f"seg{i:d}.nii.gz"))

## 파일이름들 가져오기
images = sorted(glob(os.path.join(tempdir, "img*.nii.gz")))    # 40개 nifti file 리스트
segs = sorted(glob(os.path.join(tempdir, "seg*.nii.gz")))

train_files = [{"img": img, "seg": seg}for img, seg in zip(images[:20], segs[:20])]
val_files = [{"img": img, "seg": seg}for img, seg in zip(images[-20:], segs[-20:])]

MONAI version: 0.9.dev2152
Numpy version: 1.21.2
Pytorch version: 1.10.0a0+0aef44c
MONAI flags: HAS_EXT = False, USE_COMPILED = False
MONAI rev id: c5bd8aff8ba461d7b349eb92427d452481a7eb72

Optional dependencies:
Pytorch Ignite version: 0.4.6
Nibabel version: 3.2.1
scikit-image version: 0.18.3
Pillow version: 8.4.0
Tensorboard version: 2.6.0
gdown version: 4.2.0
TorchVision version: 0.11.0a0
tqdm version: 4.62.3
lmdb version: 1.2.1
psutil version: 5.8.0
pandas version: 1.3.4
einops version: 0.3.2
transformers version: 4.12.5
mlflow version: 1.21.0

For details about installing the optional dependencies, please visit:
    https://docs.monai.io/en/latest/installation.html#installing-the-recommended-dependencies

generating synthetic data to ./dataset (this may take a while)


In [9]:
## transform 정의
train_transforms = Compose(
    [
        LoadImaged(keys=["img", "seg"]),   # list에선 ImageDataset을 대신썻기 때문에 LoadImage가 포함되어있었음
        AsChannelFirstd(keys=["img", "seg"], channel_dim=-1),
        ScaleIntensityd(keys="img"),   # scaling은 img에만.
        # aug
        RandCropByPosNegLabeld(
            keys=["img", "seg"],
            label_key="seg",
            spatial_size=[96, 96, 96],
            pos=1,   # foreground voxel as a center rather than a background voxel. ``pos / (pos + neg)``
            neg=1,
            num_samples=4    # 1개 이미지당 4개 결과생성. 즉 4배로 뻥튀기
        ),
        RandRotate90d(keys=["img", "seg"], prob=0.5, spatial_axes=[0, 2]),
        EnsureTyped(keys=["img", "seg"])
    ]
)
val_transforms = Compose(
    [
        LoadImaged(keys=["img", "seg"]),  
        AsChannelFirstd(keys=["img", "seg"], channel_dim=-1),
        ScaleIntensityd(keys="img"), 
        EnsureTyped(keys=["img", "seg"])
    ]
)

# 잘 되는지 프로세스 검증
check_ds = monai.data.Dataset(data=train_files, transform=train_transforms)
# use batch_size=2 to load images and use RandCropByPosNegLabeld to generate 2 x 4 images for network training
# RandCrop에서 samples 4개 만들면, 이게 리스트로 만들어짐. 이걸 풀어서 tensor 하나에 8개(2x4)를 넣어주는게 list_data_collate임
check_loader = DataLoader(check_ds, batch_size=2, num_workers=4, collate_fn=list_data_collate)
# check_loader = DataLoader(check_ds, batch_size=2, num_workers=4)
check_data = monai.utils.misc.first(check_loader)
print(check_data["img"].shape, check_data["seg"].shape)


train_ds = monai.data.Dataset(data=train_files, transform=train_transforms)
train_loader = DataLoader(
    train_ds,
    batch_size=2,
    shuffle=True,
    num_workers=4,
    collate_fn=list_data_collate,
    pin_memory=torch.cuda.is_available(),
)
# create a validation data loader
val_ds = monai.data.Dataset(data=val_files, transform=val_transforms)
val_loader = DataLoader(
    val_ds, 
    batch_size=5, num_workers=8, 
    collate_fn=list_data_collate,
    pin_memory=torch.cuda.is_available(),
)

torch.Size([8, 1, 96, 96, 96]) torch.Size([8, 1, 96, 96, 96])


In [10]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

net = monai.networks.nets.UNet(
    spatial_dims=3,
    in_channels=1,
    out_channels=1,
    channels=(16, 32, 64, 128, 256),
    strides=(2, 2, 2, 2),
    num_res_units=2,
).to(device)
loss = monai.losses.DiceLoss(sigmoid=True)
opt = torch.optim.Adam(net.parameters(), 1e-3)

### Ignite -training 관련 정의
dict form일때 prepare_batch 만 달라짐, trainer 만들때 넣어줌

In [11]:
##### !! prepare_batch : dictform + ignite 일때 꼭 해줘야 하는듯
# Ignite trainer expects batch=(img, seg) and returns output=loss at every iteration,
# user can add output_transform to return other values, like: y_pred, y, etc.
def prepare_batch(batch, device=None, non_blocking=False):
    return _prepare_batch((batch["img"], batch["seg"]), device, non_blocking)

trainer = create_supervised_trainer(
    net, opt, loss, device, False, prepare_batch=prepare_batch
)

###### !!Check Point!! : 모델 저장
checkpoint_handler = ModelCheckpoint(
    dirname="./runs_dict/",
    filename_prefix="net",
    n_saved=5,    # 딱 10개만 저장, 더 업데이트 되면 덮어쓰기
    require_empty=False  # True: 기존 모델이 dir에 있다면 덮어쓰지 않고 오류
)
trainer.add_event_handler(
    event_name=Events.EPOCH_COMPLETED,
    handler=checkpoint_handler,
    to_save={"net": net, "opt": opt}    # opt는 딱히 뭔진 모르겠음
)

###### !! StatsHandler!! : 각 iter와 각 epoch 마다 loss와 metrics를 출력
# StatsHandler prints loss at every iteration and print metrics at every epoch,
# we don't set metrics for trainer here, so just print loss, user can also customize print functions
# and can use output_transform to convert engine.state.output if it's not a loss value
# trainer에 metrics만 설정 해뒀다면 loss뿐 아니라 프린트를 커스터 마이징 가능(여기엔 안되어 있음)
train_stats_handler = StatsHandler(name="trainer", output_transform=lambda x: x)
train_stats_handler.attach(trainer)

###### !! TensorBoardStatsHandler!! : 각 iter, epoch마다 loss와 metric을 plot. statshandler와 같음
# TensorBoardStatsHandler plots loss at every iteration and plots metrics at every epoch, same as StatsHandler
train_tensorboard_stats_handler = TensorBoardStatsHandler(output_transform=lambda x:x)
train_tensorboard_stats_handler.attach(trainer)

### Ignite - valid 관련 정의
달라진점. 
* dict-form이라 달라진게 아니라 validation도 iter마다 볼수있게 코드 수정
* prepare_batch를 evaluator 만들때 넣어줌

In [12]:
metric_name = "Mean_Dice"
val_metrics = {metric_name: MeanDice()}
validation_every_n_iters = 1    # listform일때, validation_every_n_epochs = 1
post_pred = Compose([EnsureType(), Activations(sigmoid=True), AsDiscrete(threshold=0.5)])
post_label = Compose([EnsureType(), AsDiscrete(threshold=0.5)])   # 정답(label)에 대한 post-proc

## post-processing과정이 까다롭다..
evaluator = create_supervised_evaluator(
    net,
    val_metrics,
    device,
    True,
    # 순서 바꿔도 되야하지 않나
    output_transform=lambda x, y, y_pred: ([post_pred(i) for i in decollate_batch(y_pred)], [post_label(i) for i in decollate_batch(y)]),
    prepare_batch=prepare_batch,
)

###### !! evaluator proc 정의 !!
@trainer.on(Events.ITERATION_COMPLETED(every=validation_every_n_iters))  ## 각 iter끝날때 마다로 변경
def run_validation(engine):
    evaluator.run(val_loader)
    
###### !! EarlyStopping !!
early_stopper = EarlyStopping(
    patience=4,
    score_function=stopping_fn_from_metric(metric_name),
    trainer=trainer
)
evaluator.add_event_handler(
    event_name=Events.EPOCH_COMPLETED, 
    handler=early_stopper
)

###### !! StatsHandler!! : 각 iter와 각 epoch 마다 loss와 metrics를 출력 for validation
val_stats_handler = StatsHandler(
    name="evaluator",
    output_transform=lambda x: None,   # no need to print loss value, so disable per iteration output
    global_epoch_transform=lambda x: trainer.state.epoch,    # trainer에서 global epoch number 가져오기
)
val_stats_handler.attach(evaluator)

###### !! TensorBoardStatsHandler!! : 각 iter, epoch마다 loss와 metric을 plot. statshandler와 같음
val_tensorboard_stats_handler = TensorBoardStatsHandler(
    output_transform=lambda x: None,  # no need to plot loss value, so disable per iteration output
    global_epoch_transform=lambda x: trainer.state.iteration,   # ?? epoch?? iteration????????????????????
)
val_tensorboard_stats_handler.attach(evaluator)

###### !! TensorBoardImageHandler!! : 마지막 배치(?)에서 첫 번째 이미지와 해당 레이블 및 모델 출력을 그리는 핸들러 추가.
# 매 validation spoch에서 그림그리기
# add handler to draw the first image and the corresponding label and model output in the last batch
# here we draw the 3D output as GIF format along Depth axis, at every validation epoch

# batch_transform : ignite.engine.state.batch 에서 이미지와 레이블 가져올 수 있음
# output_transform : ignite.engine.state.output 에서 prediction 결과 이미지 가져옴, output[index] index는 몇번째 element인지
val_tensorboard_image_handler = TensorBoardImageHandler(
    batch_transform=lambda batch: (batch["img"], batch["seg"]),
    output_transform=lambda output: output[0],
    global_iter_transform=lambda x: trainer.state.epoch,
)
evaluator.add_event_handler(
    event_name=Events.ITERATION_COMPLETED(every=2), handler=val_tensorboard_image_handler
)

train_epochs = 5
state = trainer.run(train_loader, train_epochs)
print(state)

2022-01-20 16:20:40,403 ignite.handlers.early_stopping.EarlyStopping INFO: EarlyStopping: Stop training


State:
	iteration: 38
	epoch: 4
	epoch_length: 10
	max_epochs: 5
	output: 0.5253188610076904
	batch: <class 'dict'>
	metrics: <class 'dict'>
	dataloader: <class 'torch.utils.data.dataloader.DataLoader'>
	seed: <class 'NoneType'>
	times: <class 'dict'>



In [None]:
state

In [None]:
state.metrics

In [13]:
engine.state.output

NameError: name 'engine' is not defined

-----------------------

In [None]:
import logging
import os
import sys
import tempfile
from glob import glob

import nibabel as nib
import numpy as np
import torch
from ignite.engine import (
    Events,
    _prepare_batch,
    create_supervised_evaluator,
    create_supervised_trainer,
)
from ignite.handlers import EarlyStopping, ModelCheckpoint
from torch.utils.data import DataLoader

import monai
from monai.data import create_test_image_3d, list_data_collate, decollate_batch
from monai.handlers import (
    MeanDice,
    StatsHandler,
    TensorBoardImageHandler,
    TensorBoardStatsHandler,
    stopping_fn_from_metric,
)
from monai.transforms import (
    Activations,
    AsChannelFirstd,
    AsDiscrete,
    Compose,
    LoadImaged,
    RandCropByPosNegLabeld,
    RandRotate90d,
    ScaleIntensityd,
    EnsureTyped,
    EnsureType,
)

tempdir = './dataset'
monai.config.print_config()
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = '5,6,7,8'

# create a temporary directory and 40 random image, mask pairs
print(f"generating synthetic data to {tempdir} (this may take a while)")
for i in range(40):
    im, seg = create_test_image_3d(128, 128, 128, num_seg_classes=1, channel_dim=-1)

    n = nib.Nifti1Image(im, np.eye(4))
    nib.save(n, os.path.join(tempdir, f"img{i:d}.nii.gz"))

    n = nib.Nifti1Image(seg, np.eye(4))
    nib.save(n, os.path.join(tempdir, f"seg{i:d}.nii.gz"))

images = sorted(glob(os.path.join(tempdir, "img*.nii.gz")))
segs = sorted(glob(os.path.join(tempdir, "seg*.nii.gz")))
train_files = [{"img": img, "seg": seg} for img, seg in zip(images[:20], segs[:20])]
val_files = [{"img": img, "seg": seg} for img, seg in zip(images[-20:], segs[-20:])]

# define transforms for image and segmentation
train_transforms = Compose(
    [
        LoadImaged(keys=["img", "seg"]),
        AsChannelFirstd(keys=["img", "seg"], channel_dim=-1),
        ScaleIntensityd(keys="img"),
        RandCropByPosNegLabeld(
            keys=["img", "seg"],
            label_key="seg",
            spatial_size=[96, 96, 96],
            pos=1,
            neg=1,
            num_samples=4,
        ),
        RandRotate90d(keys=["img", "seg"], prob=0.5, spatial_axes=[0, 2]),
        EnsureTyped(keys=["img", "seg"]),
    ]
)
val_transforms = Compose(
    [
        LoadImaged(keys=["img", "seg"]),
        AsChannelFirstd(keys=["img", "seg"], channel_dim=-1),
        ScaleIntensityd(keys="img"),
        EnsureTyped(keys=["img", "seg"]),
    ]
)

# define dataset, data loader
check_ds = monai.data.Dataset(data=train_files, transform=train_transforms)
# use batch_size=2 to load images and use RandCropByPosNegLabeld to generate 2 x 4 images for network training
check_loader = DataLoader(
    check_ds,
    batch_size=2,
    num_workers=4,
    collate_fn=list_data_collate,
    pin_memory=torch.cuda.is_available(),
)
check_data = monai.utils.misc.first(check_loader)
print(check_data["img"].shape, check_data["seg"].shape)

In [None]:
# create a training data loader
train_ds = monai.data.Dataset(data=train_files, transform=train_transforms)
# use batch_size=2 to load images and use RandCropByPosNegLabeld to generate 2 x 4 images for network training
train_loader = DataLoader(
    train_ds,
    batch_size=2,
    shuffle=True,
    num_workers=4,
    collate_fn=list_data_collate,
    pin_memory=torch.cuda.is_available(),
)
# create a validation data loader
val_ds = monai.data.Dataset(data=val_files, transform=val_transforms)
val_loader = DataLoader(
    val_ds,
    batch_size=5,
    num_workers=8,
    collate_fn=list_data_collate,
    pin_memory=torch.cuda.is_available(),
)

# create UNet, DiceLoss and Adam optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
net = monai.networks.nets.UNet(
    spatial_dims=3,
    in_channels=1,
    out_channels=1,
    channels=(16, 32, 64, 128, 256),
    strides=(2, 2, 2, 2),
    num_res_units=2,
).to(device)
loss = monai.losses.DiceLoss(sigmoid=True)
lr = 1e-3
opt = torch.optim.Adam(net.parameters(), lr)

# Ignite trainer expects batch=(img, seg) and returns output=loss at every iteration,
# user can add output_transform to return other values, like: y_pred, y, etc.
def prepare_batch(batch, device=None, non_blocking=False):
    return _prepare_batch((batch["img"], batch["seg"]), device, non_blocking)

trainer = create_supervised_trainer(
    net, opt, loss, device, False, prepare_batch=prepare_batch
)

# adding checkpoint handler to save models (network params and optimizer stats) during training
checkpoint_handler = ModelCheckpoint(
    "./runs_dict/", "net", n_saved=10, require_empty=False
)
trainer.add_event_handler(
    event_name=Events.EPOCH_COMPLETED,
    handler=checkpoint_handler,
    to_save={"net": net, "opt": opt},
)

# StatsHandler prints loss at every iteration and print metrics at every epoch,
# we don't set metrics for trainer here, so just print loss, user can also customize print functions
# and can use output_transform to convert engine.state.output if it's not loss value
train_stats_handler = StatsHandler(name="trainer", output_transform=lambda x: x)
train_stats_handler.attach(trainer)

# TensorBoardStatsHandler plots loss at every iteration and plots metrics at every epoch, same as StatsHandler
train_tensorboard_stats_handler = TensorBoardStatsHandler(output_transform=lambda x: x)
train_tensorboard_stats_handler.attach(trainer)

validation_every_n_iters = 5
# set parameters for validation
metric_name = "Mean_Dice"
# add evaluation metric to the evaluator engine
val_metrics = {metric_name: MeanDice()}

post_pred = Compose([EnsureType(), Activations(sigmoid=True), AsDiscrete(threshold=0.5)])
post_label = Compose([EnsureType(), AsDiscrete(threshold=0.5)])

# Ignite evaluator expects batch=(img, seg) and returns output=(y_pred, y) at every iteration,
# user can add output_transform to return other values
evaluator = create_supervised_evaluator(
    net,
    val_metrics,
    device,
    True,
    output_transform=lambda x, y, y_pred: ([post_pred(i) for i in decollate_batch(y_pred)], [post_label(i) for i in decollate_batch(y)]),
    prepare_batch=prepare_batch,
)


# validation은 training의 5 iter마다 진행
@trainer.on(Events.ITERATION_COMPLETED(every=validation_every_n_iters))
def run_validation(engine):
    evaluator.run(val_loader)

# add early stopping handler to evaluator
early_stopper = EarlyStopping(
    patience=4, score_function=stopping_fn_from_metric(metric_name), trainer=trainer
)
evaluator.add_event_handler(
    event_name=Events.EPOCH_COMPLETED, handler=early_stopper
)

# add stats event handler to print validation stats via evaluator
val_stats_handler = StatsHandler(
    name="evaluator",
    output_transform=lambda x: None,  # no need to print loss value, so disable per iteration output
    global_epoch_transform=lambda x: trainer.state.iteration,
)  # fetch global epoch number from trainer
val_stats_handler.attach(evaluator)

# add handler to record metrics to TensorBoard at every validation epoch
val_tensorboard_stats_handler = TensorBoardStatsHandler(
    output_transform=lambda x: None,  # no need to plot loss value, so disable per iteration output
    global_epoch_transform=lambda x: trainer.state.iteration,
)  # fetch global iteration number from trainer
val_tensorboard_stats_handler.attach(evaluator)

# add handler to draw the first image and the corresponding label and model output in the last batch
# here we draw the 3D output as GIF format along the depth axis, every 2 validation iterations.
val_tensorboard_image_handler = TensorBoardImageHandler(
    batch_transform=lambda batch: (batch["img"], batch["seg"]),
    output_transform=lambda output: output[0],
    global_iter_transform=lambda x: trainer.state.epoch,
)
# 그림 그리는 이벤트는 iter 2마다 일어나지만 그림은 tensorboard의 그림은 epoch마다 확인가능
evaluator.add_event_handler(
    event_name=Events.ITERATION_COMPLETED(every=2),
    handler=val_tensorboard_image_handler,
)

train_epochs = 5
state = trainer.run(train_loader, train_epochs)
print(state)