constantinpape · constantinpape · Dec 10, 2023 · Dec 10, 2023 · Dec 10, 2023 · Dec 10, 2023
diff --git a/scripts/load_mae_vit.py b/scripts/load_mae_vit.py
@@ -0,0 +1,13 @@
+from collections import OrderedDict
+
+import torch
+from torch_em.model import UNETR
+
+checkpoint = "imagenet.pth"
+encoder_state = torch.load(checkpoint, map_location="cpu")["model"]
+encoder_state = OrderedDict({
+    k: v for k, v in encoder_state.items()
+    if (k != "mask_token" and not k.startswith("decoder"))
+})
+
+unetr_model = UNETR(backbone="mae", encoder="vit_l", encoder_checkpoint=encoder_state)
diff --git a/torch_em/loss/dice.py b/torch_em/loss/dice.py
@@ -84,6 +84,32 @@ def forward(self, input_, target):
         )
 
 
+class BCEDiceLoss(nn.Module):
+
+    def __init__(self, alpha=1., beta=1., channelwise=True, eps=1e-7):
+        super().__init__()
+        self.alpha = alpha
+        self.beta = beta
+        self.channelwise = channelwise
+        self.eps = eps
+
+        # all torch_em classes should store init kwargs to easily recreate the init call
+        self.init_kwargs = {"alpha": alpha, "beta": beta, "channelwise": channelwise, "eps": self.eps}
+
+    def forward(self, input_, target):
+        loss_dice = dice_score(
+            input_,
+            target,
+            invert=True,
+            channelwise=self.channelwise,
+            eps=self.eps
+        )
+        loss_bce = nn.functional.binary_cross_entropy(
+            input_, target
+        )
+        return self.alpha * loss_dice + self.beta * loss_bce
+
+
 # TODO think about how to handle combined losses like this for mixed precision training
 class BCEDiceLossWithLogits(nn.Module):
 

diff --git a/torch_em/loss/distance_based.py b/torch_em/loss/distance_based.py
@@ -16,7 +16,7 @@ class DistanceLoss(nn.Module):
     """
     def __init__(
         self,
-        mask_distances_in_bg: bool,
+        mask_distances_in_bg: bool = True,
         foreground_loss: nn.Module = DiceLoss(),
         distance_loss: nn.Module = nn.MSELoss(reduction="mean")
     ) -> None:
@@ -26,6 +26,8 @@ def __init__(
         self.distance_loss = distance_loss
         self.mask_distances_in_bg = mask_distances_in_bg
 
+        self.init_kwargs = {"mask_distances_in_bg": mask_distances_in_bg}
+
     def forward(self, input_, target):
         assert input_.shape == target.shape, input_.shape
         assert input_.shape[1] == 3, input_.shape

diff --git a/torch_em/model/unetr.py b/torch_em/model/unetr.py
@@ -1,9 +1,10 @@
+from collections import OrderedDict
+from typing import Optional, Tuple, Union
+
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 
-from typing import Tuple
-
 from .unet import Decoder, ConvBlock2d, Upsampler2d
 from .vit import get_vision_transformer
 
@@ -22,37 +23,41 @@ class UNETR(nn.Module):
 
     def _load_encoder_from_checkpoint(self, backbone, encoder, checkpoint):
 
-        if backbone == "sam":
-            # If we have a SAM encoder, then we first try to load the full SAM Model
-            # (using micro_sam) and otherwise fall back on directly loading the encoder state
-            # from the checkpoint
-            try:
-                _, model = get_sam_model(
-                    model_type=encoder,
-                    checkpoint_path=checkpoint,
-                    return_sam=True
-                )
-                encoder_state = model.image_encoder.state_dict()
-            except Exception:
-                # If we have a MAE encoder, then we directly load the encoder state
-                # from the checkpoint.
+        if isinstance(checkpoint, str):
+            if backbone == "sam":
+                # If we have a SAM encoder, then we first try to load the full SAM Model
+                # (using micro_sam) and otherwise fall back on directly loading the encoder state
+                # from the checkpoint
+                try:
+                    _, model = get_sam_model(
+                        model_type=encoder,
+                        checkpoint_path=checkpoint,
+                        return_sam=True
+                    )
+                    encoder_state = model.image_encoder.state_dict()
+                except Exception:
+                    # If we have a MAE encoder, then we directly load the encoder state
+                    # from the checkpoint.
+                    encoder_state = torch.load(checkpoint)
+
+            elif backbone == "mae":
                 encoder_state = torch.load(checkpoint)
 
-        elif backbone == "mae":
-            encoder_state = torch.load(checkpoint)
+        else:
+            encoder_state = checkpoint
 
         self.encoder.load_state_dict(encoder_state)
 
     def __init__(
         self,
-        backbone="sam",
-        encoder="vit_b",
-        decoder=None,
-        out_channels=1,
-        use_sam_stats=False,
-        use_mae_stats=False,
-        encoder_checkpoint_path=None,
-        final_activation=None,
+        backbone: str = "sam",
+        encoder: str = "vit_b",
+        decoder: Optional[nn.Module] = None,
+        out_channels: int = 1,
+        use_sam_stats: bool = False,
+        use_mae_stats: bool = False,
+        encoder_checkpoint: Optional[Union[str, OrderedDict]] = None,
+        final_activation: Optional[Union[str, nn.Module]] = None,
     ) -> None:
         super().__init__()
 
@@ -61,8 +66,8 @@ def __init__(
 
         print(f"Using {encoder} from {backbone.upper()}")
         self.encoder = get_vision_transformer(backbone=backbone, model=encoder)
-        if encoder_checkpoint_path is not None:
-            self._load_encoder_from_checkpoint(backbone, encoder, encoder_checkpoint_path)
+        if encoder_checkpoint is not None:
+            self._load_encoder_from_checkpoint(backbone, encoder, encoder_checkpoint)
 
         # parameters for the decoder network
         depth = 3

diff --git a/torch_em/transform/label.py b/torch_em/transform/label.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 import numpy as np
 import skimage.measure
 import skimage.segmentation
@@ -192,16 +194,25 @@ def __call__(self, labels):
 
 
 class DistanceTransform:
-    """Compute distances to foreground.
+    """Compute distances to foreground in the labels.
+
+    Args:
+        distances: Whether to compute the absolute distances.
+        directed_distances: Whether to compute the directed distances (vector distances).
+        normalize: Whether to normalize the computed distances.
+        max_distance: Maximal distance at which to threshold the distances.
+        foreground_id: Label id to which the distance is compute.
+        invert Whether to invert the distances:
+        func: Normalization function for the distances.
     """
     eps = 1e-7
 
     def __init__(
         self,
-        distances=True,
-        directed_distances=False,
-        normalize=True,
-        max_distance=None,
+        distances: bool = True,
+        directed_distances: bool = False,
+        normalize: bool = True,
+        max_distance: Optional[float] = None,
         foreground_id=1,
         invert=False,
         func=None
@@ -272,6 +283,16 @@ def __call__(self, labels):
 
 class PerObjectDistanceTransform:
     """Compute normalized distances per object in a segmentation.
+
+    Args:
+        distances: Whether to compute the undirected distances.
+        boundary_distances: Whether to compute the distances to the object boundaries.
+        directed_distances: Whether to compute the directed distances (vector distances).
+        foreground: Whether to return a foreground channel.
+        apply_label: Whether to apply connected components to the labels before computing distances.
+        correct_centers: Whether to correct centers that are not in the objects.
+        min_size: Minimal size of objects for distance calculdation.
+        distance_fill_value: Fill value for the distances outside of objects.
     """
     eps = 1e-7