In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import monai
from monai.networks.nets import UNet
from monai.transforms import (
    LoadImaged,
    EnsureChannelFirstd,
    ScaleIntensityd,
    Orientationd,
    Spacingd,
    CropForegroundd,
    ToTensord,
    Compose,
)
from monai.data import Dataset, DataLoader, PersistentDataset
from monai.losses import DiceFocalLoss
from monai.metrics import DiceMetric
from monai.inferers import SlidingWindowInferer
import pandas as pd
import numpy as np
import yaml
from tqdm import tqdm
import munch

# Add MetaTensor to safe globals for pickle
from monai.data.meta_tensor import MetaTensor

torch.serialization.add_safe_globals([MetaTensor])


def load_config(fn: str = "config.yaml"):
    "Load config from YAML and return a serialized dictionary object"
    with open(fn, "r") as stream:
        config = yaml.safe_load(stream)
    config = munch.munchify(config)

    if not config.overwrite:
        i = 1
        while os.path.exists(config.run_id + f"_{i}"):
            i += 1
        config.run_id += f"_{i}"

    config.out_dir = os.path.join(config.run_id, config.out_dir)
    config.log_dir = os.path.join(config.run_id, config.log_dir)

    if not isinstance(config.data.image_cols, (tuple, list)):
        config.data.image_cols = [config.data.image_cols]
    if not isinstance(config.data.label_cols, (tuple, list)):
        config.data.label_cols = [config.data.label_cols]

    config.transforms.mode = ("bilinear",) * len(config.data.image_cols) + (
        "nearest",
    ) * len(config.data.label_cols)
    return config


# Simple 3D UNet Trainer
class SimpleSegmentationTrainer:
    def __init__(self, config_file="tumor.yaml"):
        self.config = load_config(config_file)
        self.device = torch.device(
            self.config.device if torch.cuda.is_available() else "cpu"
        )

        # Create output directories
        self.setup_directories()

        # Set random seed
        torch.manual_seed(self.config.seed)
        if torch.cuda.is_available():
            torch.cuda.manual_seed(self.config.seed)

        # Initialize the model
        self.create_model()

        # Setup data loaders
        self.setup_data()

        # Setup loss function and optimizer
        self.setup_training()

    def setup_directories(self):
        """Create necessary directories for outputs"""
        os.makedirs(self.config.out_dir, exist_ok=True)
        os.makedirs(self.config.model_dir, exist_ok=True)
        os.makedirs(self.config.log_dir, exist_ok=True)

    def create_model(self):
        """Initialize a simple UNet model"""
        self.model = UNet(
            spatial_dims=self.config.ndim,
            in_channels=self.config.model.in_channels,
            out_channels=self.config.model.out_channels,
            channels=self.config.model.channels,
            strides=self.config.model.strides,
            dropout=self.config.model.dropout,
            num_res_units=self.config.model.num_res_units,
        )

        self.model = self.model.to(self.device)
        print(f"Model created: {type(self.model).__name__}")

    def setup_data(self):
        """Setup data loaders for training and validation"""
        # Read CSV files
        train_df = pd.read_csv(self.config.data.train_csv)
        valid_df = pd.read_csv(self.config.data.valid_csv)

        if self.config.debug:
            train_df = train_df.sample(4)
            valid_df = valid_df.sample(2)

        # Process data paths
        data_dir = self.config.data.data_dir
        image_cols = (
            self.config.data.image_cols
            if isinstance(self.config.data.image_cols, list)
            else [self.config.data.image_cols]
        )
        label_cols = (
            self.config.data.label_cols
            if isinstance(self.config.data.label_cols, list)
            else [self.config.data.label_cols]
        )

        # Create data dictionaries
        train_files = []
        for _, row in train_df.iterrows():
            data_dict = {}
            for col in image_cols:
                data_dict[col] = os.path.join(data_dir, row[col])
            for col in label_cols:
                data_dict[col] = os.path.join(data_dir, row[col])
            train_files.append(data_dict)

        val_files = []
        for _, row in valid_df.iterrows():
            data_dict = {}
            for col in image_cols:
                data_dict[col] = os.path.join(data_dir, row[col])
            for col in label_cols:
                data_dict[col] = os.path.join(data_dir, row[col])
            val_files.append(data_dict)

        # Create transforms
        train_transforms = self.get_transforms(image_cols, label_cols)
        val_transforms = self.get_transforms(image_cols, label_cols)

        # Create datasets and data loaders
        if self.config.data.dataset_type == "persistent":
            # Create cache directory
            os.makedirs(self.config.data.cache_dir, exist_ok=True)
            train_ds = Dataset(data=train_files, transform=train_transforms)
            val_ds = Dataset(data=val_files, transform=val_transforms)
        else:
            train_ds = Dataset(data=train_files, transform=train_transforms)
            val_ds = Dataset(data=val_files, transform=val_transforms)

        self.train_loader = DataLoader(
            train_ds,
            batch_size=self.config.data.batch_size,
            shuffle=True,
            num_workers=4,
            persistent_workers=False,  # Make sure this is False
        )

        self.val_loader = DataLoader(
            val_ds,
            batch_size=1,  # Always use batch size 1 for validation
            num_workers=4,
            persistent_workers=False,  # Make sure this is False
        )

        print(
            f"Data loaders created - Train: {len(self.train_loader)} batches, Val: {len(self.val_loader)} batches"
        )

    def get_transforms(self, image_cols, label_cols):
        """Create transforms with proper mode handling"""
        all_keys = [*image_cols, *label_cols]

        # Generate modes with bilinear for images and nearest for labels
        modes = ["bilinear"] * len(image_cols) + ["nearest"] * len(label_cols)

        # Basic transforms
        transforms_list = [
            LoadImaged(keys=all_keys),
            EnsureChannelFirstd(keys=all_keys),
            Spacingd(
                keys=all_keys,
                pixdim=self.config.transforms.spacing,
                mode=modes,
            ),
            Orientationd(keys=all_keys, axcodes=self.config.transforms.orientation),
            ScaleIntensityd(keys=image_cols),
            CropForegroundd(keys=all_keys, source_key=image_cols[0]),
            ToTensord(keys=all_keys),
        ]

        return Compose(transforms_list)

    def setup_training(self):
        """Setup loss function, optimizer and learning rate scheduler"""
        # Loss function
        loss_params = self.config.loss.DiceFocalLoss
        print(f"loss_params: This is the loss params {loss_params}")

        # Configure loss function
        self.loss_function = DiceFocalLoss(
            include_background=loss_params.include_background,
            to_onehot_y=loss_params.to_onehot_y,
            softmax=loss_params.softmax,
        )

        # Print the optimizer structure to debug
        print(f"Optimizer structure: {self.config.optimizer}")

        # Optimizer - use Novograd with hardcoded parameters from tumor.yaml
        from monai.optimizers import Novograd

        self.optimizer = Novograd(
            self.model.parameters(),
            lr=0.001,  # Use hardcoded value from your YAML
            weight_decay=0.01,  # Use hardcoded value from your YAML
            amsgrad=True,  # Use hardcoded value from your YAML
        )

        # Learning rate scheduler - assume OneCycleLR
        scheduler_name = "OneCycleLR"
        scheduler_params = self.config.lr_scheduler.OneCycleLR

        self.scheduler = optim.lr_scheduler.OneCycleLR(
            self.optimizer,
            max_lr=scheduler_params.max_lr,
            steps_per_epoch=len(self.train_loader),
            epochs=self.config.training.max_epochs,
        )

        # Metrics
        self.metric = DiceMetric(include_background=False, reduction="mean")

        # Inferer for sliding window inference
        self.inferer = SlidingWindowInferer(
            roi_size=(64, 64, 64), sw_batch_size=4, overlap=0.5
        )

        print(
            f"Training setup complete - Loss: {type(self.loss_function).__name__}, Optimizer: Novograd"
        )

    def validate(self, epoch):
        """Run validation"""
        self.model.eval()
        metric_values = []

        with torch.no_grad():
            for batch_data in tqdm(self.val_loader, desc="Validation"):
                # Process inputs
                inputs = torch.cat(
                    [batch_data[key] for key in self.config.data.image_cols], dim=1
                ).to(self.device)
                label_key = self.config.data.label_cols
                if isinstance(label_key, list):
                    label_key = label_key[0]
                labels = batch_data[label_key].to(self.device)

                # Use sliding window inference for validation
                outputs = self.inferer(inputs, self.model)

                # Calculate metrics
                self.metric(y_pred=outputs, y=labels)
                metric_values.append(self.metric.aggregate().item())
                self.metric.reset()

        # Calculate mean Dice score
        mean_metric = np.mean(metric_values)
        print(f"Epoch {epoch} - Validation Dice: {mean_metric:.4f}")

        return mean_metric

    def train_epoch(self, epoch):
        """Run one epoch of training"""
        self.model.train()
        epoch_loss = 0
        progress = tqdm(
            self.train_loader, desc=f"Epoch {epoch}/{self.config.training.max_epochs}"
        )

        for batch_data in progress:
            # Process inputs
            inputs = torch.cat(
                [batch_data[key] for key in self.config.data.image_cols], dim=1
            ).to(self.device)
            label_key = self.config.data.label_cols
            if isinstance(label_key, list):
                label_key = label_key[0]
            labels = batch_data[label_key].to(self.device)

            # Forward pass
            self.optimizer.zero_grad()
            outputs = self.model(inputs)

            # Calculate loss
            loss = self.loss_function(outputs, labels)

            # Backward pass
            loss.backward()
            self.optimizer.step()

            # Update learning rate if using OneCycleLR
            if hasattr(self, "scheduler"):
                self.scheduler.step()

            # Update progress bar
            epoch_loss += loss.item()
            progress.set_postfix({"loss": loss.item()})

        # Return average loss for the epoch
        return epoch_loss / len(self.train_loader)

    def train(self):
        """Main training loop"""
        best_metric = -1
        best_epoch = -1
        patience_counter = 0
        train_losses = []
        val_metrics = []

        print(f"Starting training for {self.config.training.max_epochs} epochs")

        for epoch in range(1, self.config.training.max_epochs + 1):
            # Train for one epoch
            train_loss = self.train_epoch(epoch)
            train_losses.append(train_loss)

            # Validate
            val_metric = self.validate(epoch)
            val_metrics.append(val_metric)

            # Save best model
            if val_metric > best_metric:
                best_metric = val_metric
                best_epoch = epoch
                patience_counter = 0

                # Save model
                model_path = os.path.join(
                    self.config.model_dir, f"{self.config.run_id}_best_model.pth"
                )
                torch.save(
                    {
                        "epoch": epoch,
                        "model_state_dict": self.model.state_dict(),
                        "optimizer_state_dict": self.optimizer.state_dict(),
                        "val_metric": val_metric,
                    },
                    model_path,
                )

                print(
                    f"Saved new best model at epoch {epoch} with Dice: {val_metric:.4f}"
                )
            else:
                patience_counter += 1

            # Early stopping
            if patience_counter >= self.config.training.early_stopping_patience:
                print(f"Early stopping triggered after {epoch} epochs")
                break

            # Save training progress
            self.save_metrics(train_losses, val_metrics)

        print(
            f"Training completed. Best model at epoch {best_epoch} with Dice: {best_metric:.4f}"
        )
        return best_metric, best_epoch

    def save_metrics(self, train_losses, val_metrics):
        """Save metrics to files"""
        # Save training loss
        with open(os.path.join(self.config.log_dir, "train_logs.csv"), "w") as f:
            f.write("epoch,loss\n")
            for i, loss in enumerate(train_losses):
                f.write(f"{i+1},{loss}\n")

        # Save validation metrics
        with open(os.path.join(self.config.log_dir, "metric_logs.csv"), "w") as f:
            f.write("epoch,dice\n")
            for i, metric in enumerate(val_metrics):
                f.write(f"{i+1},{metric}\n")


trainer = SimpleSegmentationTrainer("tumor.yaml")
best_metric, best_epoch = trainer.train()

monai.transforms.croppad.dictionary CropForegroundd.__init__:allow_smaller: Current default value of argument `allow_smaller=True` has been deprecated since version 1.2. It will be changed to `allow_smaller=False` in version 1.5.


Model created: UNet
Data loaders created - Train: 119 batches, Val: 20 batches
loss_params: This is the loss params Munch({'include_background': False, 'softmax': True, 'to_onehot_y': True})
Optimizer structure: Munch({'Novograd': Munch({'lr': 0.001, 'weight_decay': 0.01, 'amsgrad': True})})
Training setup complete - Loss: DiceFocalLoss, Optimizer: Novograd
Starting training for 1000 epochs


Epoch 1/1000:   0%|          | 0/119 [00:29<?, ?it/s]


RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 31 but got size 32 for tensor number 1 in the list.

In [37]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import monai
from monai.networks.nets import UNet
from monai.transforms import (
    LoadImaged,
    EnsureChannelFirstd,
    ScaleIntensityd,
    Orientationd,
    Spacingd,
    CropForegroundd,
    ToTensord,
    Compose,
    RandCropByPosNegLabeld,
    SpatialPadd,
    ResizeWithPadOrCropd,
)
from monai.data import Dataset, DataLoader
from monai.losses import DiceFocalLoss
from monai.metrics import DiceMetric
from monai.inferers import SlidingWindowInferer
import pandas as pd
import numpy as np
import yaml
from tqdm import tqdm
import munch

# Add MetaTensor to safe globals for pickle
from monai.data.meta_tensor import MetaTensor

torch.serialization.add_safe_globals([MetaTensor])


def load_config(fn: str = "config.yaml"):
    "Load config from YAML and return a serialized dictionary object"
    with open(fn, "r") as stream:
        config = yaml.safe_load(stream)
    config = munch.munchify(config)

    if not config.overwrite:
        i = 1
        while os.path.exists(config.run_id + f"_{i}"):
            i += 1
        config.run_id += f"_{i}"

    config.out_dir = os.path.join(config.run_id, config.out_dir)
    config.log_dir = os.path.join(config.run_id, config.log_dir)

    if not isinstance(config.data.image_cols, (tuple, list)):
        config.data.image_cols = [config.data.image_cols]
    if not isinstance(config.data.label_cols, (tuple, list)):
        config.data.label_cols = [config.data.label_cols]

    config.transforms.mode = ("bilinear",) * len(config.data.image_cols) + (
        "nearest",
    ) * len(config.data.label_cols)
    return config


# Simple 3D UNet Trainer
class SimpleSegmentationTrainer:
    def __init__(self, config_file="tumor.yaml"):
        self.config = load_config(config_file)
        self.device = torch.device(
            self.config.device if torch.cuda.is_available() else "cpu"
        )

        # Create output directories
        self.setup_directories()

        # Set random seed
        torch.manual_seed(self.config.seed)
        if torch.cuda.is_available():
            torch.cuda.manual_seed(self.config.seed)

        # Initialize the model
        self.create_model()

        # Setup data loaders
        self.setup_data()

        # Setup loss function and optimizer
        self.setup_training()

    def setup_directories(self):
        """Create necessary directories for outputs"""
        os.makedirs(self.config.out_dir, exist_ok=True)
        os.makedirs(self.config.model_dir, exist_ok=True)
        os.makedirs(self.config.log_dir, exist_ok=True)

    def create_model(self):
        """Initialize a simple UNet model"""
        self.model = UNet(
            spatial_dims=self.config.ndim,
            in_channels=self.config.model.in_channels,
            out_channels=self.config.model.out_channels,
            channels=self.config.model.channels,
            strides=self.config.model.strides,
            dropout=self.config.model.dropout,
            num_res_units=self.config.model.num_res_units,
        )

        self.model = self.model.to(self.device)
        print(f"Model created: {type(self.model).__name__}")

    def setup_data(self):
        """Setup data loaders for training and validation"""
        # Read CSV files
        train_df = pd.read_csv(self.config.data.train_csv)
        valid_df = pd.read_csv(self.config.data.valid_csv)

        if self.config.debug:
            train_df = train_df.sample(4)
            valid_df = valid_df.sample(2)

        # Process data paths
        data_dir = self.config.data.data_dir
        image_cols = (
            self.config.data.image_cols
            if isinstance(self.config.data.image_cols, list)
            else [self.config.data.image_cols]
        )
        label_cols = (
            self.config.data.label_cols
            if isinstance(self.config.data.label_cols, list)
            else [self.config.data.label_cols]
        )

        # Create data dictionaries
        train_files = []
        for _, row in train_df.iterrows():
            data_dict = {}
            for col in image_cols:
                data_dict[col] = os.path.join(data_dir, row[col])
            for col in label_cols:
                data_dict[col] = os.path.join(data_dir, row[col])
            train_files.append(data_dict)

        val_files = []
        for _, row in valid_df.iterrows():
            data_dict = {}
            for col in image_cols:
                data_dict[col] = os.path.join(data_dir, row[col])
            for col in label_cols:
                data_dict[col] = os.path.join(data_dir, row[col])
            val_files.append(data_dict)

        # Create transforms
        train_transforms = self.get_transforms(image_cols, label_cols)
        val_transforms = self.get_transforms(image_cols, label_cols)

        # Create datasets and data loaders
        train_ds = Dataset(data=train_files, transform=train_transforms)
        val_ds = Dataset(data=val_files, transform=val_transforms)

        self.train_loader = DataLoader(
            train_ds,
            batch_size=self.config.data.batch_size,
            shuffle=True,
            num_workers=4,
            persistent_workers=False,  # Make sure this is False
        )

        self.val_loader = DataLoader(
            val_ds,
            batch_size=1,  # Always use batch size 1 for validation
            num_workers=4,
            persistent_workers=False,  # Make sure this is False
        )

        print(
            f"Data loaders created - Train: {len(self.train_loader)} batches, Val: {len(self.val_loader)} batches"
        )

    def get_transforms(self, image_cols, label_cols):
        """Create transforms with proper mode handling and ensuring consistent sizes"""
        all_keys = [*image_cols, *label_cols]

        # Generate modes with bilinear for images and nearest for labels
        modes = ["bilinear"] * len(image_cols) + ["nearest"] * len(label_cols)

        # Define a fixed size to enforce consistent dimensions
        # Using a multiple of 8 or 16 is good for UNet architectures
        # This helps avoid size mismatch issues in skip connections
        roi_size = (64, 64, 64)  # Fixed size that works well with UNet

        # Basic transforms
        transforms_list = [
            LoadImaged(keys=all_keys),
            EnsureChannelFirstd(keys=all_keys),
            Spacingd(
                keys=all_keys,
                pixdim=self.config.transforms.spacing,
                mode=modes,
            ),
            Orientationd(keys=all_keys, axcodes=self.config.transforms.orientation),
            ScaleIntensityd(keys=image_cols),
            CropForegroundd(keys=all_keys, source_key=image_cols[0]),
            # Add padding to ensure dimensions are multiples of 8
            SpatialPadd(keys=all_keys, spatial_size=roi_size),
            # Crop or pad to the exact ROI size
            ResizeWithPadOrCropd(keys=all_keys, spatial_size=roi_size),
            ToTensord(keys=all_keys),
        ]

        return Compose(transforms_list)

    def setup_training(self):
        """Setup loss function, optimizer and learning rate scheduler"""
        # Loss function
        loss_params = self.config.loss.DiceFocalLoss
        print(f"loss_params: This is the loss params {loss_params}")

        # Configure loss function
        self.loss_function = DiceFocalLoss(
            include_background=loss_params.include_background,
            to_onehot_y=loss_params.to_onehot_y,
            softmax=loss_params.softmax,
        )

        # Print the optimizer structure to debug
        print(f"Optimizer structure: {self.config.optimizer}")

        # Optimizer - use Novograd with hardcoded parameters from tumor.yaml
        from monai.optimizers import Novograd

        self.optimizer = Novograd(
            self.model.parameters(),
            lr=0.001,  # Use hardcoded value from your YAML
            weight_decay=0.01,  # Use hardcoded value from your YAML
            amsgrad=True,  # Use hardcoded value from your YAML
        )

        # Learning rate scheduler - assume OneCycleLR
        scheduler_params = self.config.lr_scheduler.OneCycleLR

        self.scheduler = optim.lr_scheduler.OneCycleLR(
            self.optimizer,
            max_lr=scheduler_params.max_lr,
            steps_per_epoch=len(self.train_loader),
            epochs=self.config.training.max_epochs,
        )

        # Metrics
        self.metric = DiceMetric(include_background=False, reduction="mean")

        # Inferer for sliding window inference
        self.inferer = SlidingWindowInferer(
            roi_size=(64, 64, 64), sw_batch_size=4, overlap=0.5
        )

        print(
            f"Training setup complete - Loss: {type(self.loss_function).__name__}, Optimizer: Novograd"
        )

    def validate(self, epoch):
        """Run validation"""
        self.model.eval()
        metric_values = []

        with torch.no_grad():
            for batch_data in tqdm(self.val_loader, desc="Validation"):
                # Process inputs
                inputs = torch.cat(
                    [batch_data[key] for key in self.config.data.image_cols], dim=1
                ).to(self.device)
                label_key = self.config.data.label_cols
                if isinstance(label_key, list):
                    label_key = label_key[0]
                labels = batch_data[label_key].to(self.device)

                # Use sliding window inference for validation
                outputs = self.inferer(inputs, self.model)

                # Calculate metrics
                self.metric(y_pred=outputs, y=labels)
                metric_values.append(self.metric.aggregate().item())
                self.metric.reset()

        # Calculate mean Dice score
        mean_metric = np.mean(metric_values)
        print(f"Epoch {epoch} - Validation Dice: {mean_metric:.4f}")

        return mean_metric

    def train_epoch(self, epoch):
        """Run one epoch of training"""
        self.model.train()
        epoch_loss = 0
        progress = tqdm(
            self.train_loader, desc=f"Epoch {epoch}/{self.config.training.max_epochs}"
        )

        for batch_data in progress:
            # Process inputs
            inputs = torch.cat(
                [batch_data[key] for key in self.config.data.image_cols], dim=1
            ).to(self.device)
            label_key = self.config.data.label_cols
            if isinstance(label_key, list):
                label_key = label_key[0]
            labels = batch_data[label_key].to(self.device)

            # Forward pass
            self.optimizer.zero_grad()
            outputs = self.model(inputs)

            # Calculate loss
            loss = self.loss_function(outputs, labels)

            # Backward pass
            loss.backward()
            self.optimizer.step()

            # Update learning rate if using OneCycleLR
            if hasattr(self, "scheduler"):
                self.scheduler.step()

            # Update progress bar
            epoch_loss += loss.item()
            progress.set_postfix({"loss": loss.item()})

        # Return average loss for the epoch
        return epoch_loss / len(self.train_loader)

    def train(self):
        """Main training loop"""
        best_metric = -1
        best_epoch = -1
        patience_counter = 0
        train_losses = []
        val_metrics = []

        print(f"Starting training for {self.config.training.max_epochs} epochs")

        for epoch in range(1, self.config.training.max_epochs + 1):
            # Train for one epoch
            train_loss = self.train_epoch(epoch)
            train_losses.append(train_loss)

            # Validate
            val_metric = self.validate(epoch)
            val_metrics.append(val_metric)

            # Save best model
            if val_metric > best_metric:
                best_metric = val_metric
                best_epoch = epoch
                patience_counter = 0

                # Save model
                model_path = os.path.join(
                    self.config.model_dir, f"{self.config.run_id}_best_model.pth"
                )
                torch.save(
                    {
                        "epoch": epoch,
                        "model_state_dict": self.model.state_dict(),
                        "optimizer_state_dict": self.optimizer.state_dict(),
                        "val_metric": val_metric,
                    },
                    model_path,
                )

                print(
                    f"Saved new best model at epoch {epoch} with Dice: {val_metric:.4f}"
                )
            else:
                patience_counter += 1

            # Early stopping
            if patience_counter >= self.config.training.early_stopping_patience:
                print(f"Early stopping triggered after {epoch} epochs")
                break

            # Save training progress
            self.save_metrics(train_losses, val_metrics)

        print(
            f"Training completed. Best model at epoch {best_epoch} with Dice: {best_metric:.4f}"
        )
        return best_metric, best_epoch

    def save_metrics(self, train_losses, val_metrics):
        """Save metrics to files"""
        # Save training loss
        with open(os.path.join(self.config.log_dir, "train_logs.csv"), "w") as f:
            f.write("epoch,loss\n")
            for i, loss in enumerate(train_losses):
                f.write(f"{i+1},{loss}\n")

        # Save validation metrics
        with open(os.path.join(self.config.log_dir, "metric_logs.csv"), "w") as f:
            f.write("epoch,dice\n")
            for i, metric in enumerate(val_metrics):
                f.write(f"{i+1},{metric}\n")


trainer = SimpleSegmentationTrainer("tumor.yaml")
best_metric, best_epoch = trainer.train()

monai.transforms.croppad.dictionary CropForegroundd.__init__:allow_smaller: Current default value of argument `allow_smaller=True` has been deprecated since version 1.2. It will be changed to `allow_smaller=False` in version 1.5.


Model created: UNet
Data loaders created - Train: 119 batches, Val: 20 batches
loss_params: This is the loss params Munch({'include_background': False, 'softmax': True, 'to_onehot_y': True})
Optimizer structure: Munch({'Novograd': Munch({'lr': 0.001, 'weight_decay': 0.01, 'amsgrad': True})})
Training setup complete - Loss: DiceFocalLoss, Optimizer: Novograd
Starting training for 1000 epochs


Epoch 1/1000: 100%|██████████| 119/119 [02:32<00:00,  1.29s/it, loss=1.35] 
Validation: 100%|██████████| 20/20 [00:40<00:00,  2.03s/it]


Epoch 1 - Validation Dice: 0.0483
Saved new best model at epoch 1 with Dice: 0.0483


Epoch 2/1000: 100%|██████████| 119/119 [02:33<00:00,  1.29s/it, loss=1.21] 
Validation: 100%|██████████| 20/20 [00:40<00:00,  2.03s/it]


Epoch 2 - Validation Dice: 0.0483


Epoch 3/1000: 100%|██████████| 119/119 [03:27<00:00,  1.75s/it, loss=1.04] 
Validation: 100%|██████████| 20/20 [00:40<00:00,  2.03s/it]


Epoch 3 - Validation Dice: 0.0483


Epoch 4/1000: 100%|██████████| 119/119 [02:30<00:00,  1.26s/it, loss=1.13] 
Validation: 100%|██████████| 20/20 [00:40<00:00,  2.01s/it]


Epoch 4 - Validation Dice: 0.0483


Epoch 5/1000: 100%|██████████| 119/119 [02:31<00:00,  1.27s/it, loss=1.13]
Validation: 100%|██████████| 20/20 [00:40<00:00,  2.03s/it]


Epoch 5 - Validation Dice: 0.0483


Epoch 6/1000: 100%|██████████| 119/119 [02:33<00:00,  1.29s/it, loss=1.11] 
Validation: 100%|██████████| 20/20 [00:40<00:00,  2.01s/it]


Epoch 6 - Validation Dice: 0.0483


Epoch 7/1000: 100%|██████████| 119/119 [02:36<00:00,  1.31s/it, loss=0.77] 
Validation: 100%|██████████| 20/20 [00:40<00:00,  2.00s/it]


Epoch 7 - Validation Dice: 0.0483


Epoch 8/1000: 100%|██████████| 119/119 [02:34<00:00,  1.30s/it, loss=1.09] 
Validation: 100%|██████████| 20/20 [00:40<00:00,  2.01s/it]


Epoch 8 - Validation Dice: 0.0483


Epoch 9/1000: 100%|██████████| 119/119 [02:31<00:00,  1.27s/it, loss=1.09] 
Validation: 100%|██████████| 20/20 [00:40<00:00,  2.01s/it]


Epoch 9 - Validation Dice: 0.0483


Epoch 10/1000: 100%|██████████| 119/119 [02:32<00:00,  1.29s/it, loss=1.08] 
Validation: 100%|██████████| 20/20 [00:40<00:00,  2.02s/it]


Epoch 10 - Validation Dice: 0.0483


Epoch 11/1000: 100%|██████████| 119/119 [02:30<00:00,  1.27s/it, loss=1.07] 
Validation: 100%|██████████| 20/20 [00:40<00:00,  2.01s/it]


Epoch 11 - Validation Dice: 0.0483


Epoch 12/1000: 100%|██████████| 119/119 [02:35<00:00,  1.30s/it, loss=0.979]
Validation: 100%|██████████| 20/20 [00:40<00:00,  2.01s/it]


Epoch 12 - Validation Dice: 0.0483


Epoch 13/1000: 100%|██████████| 119/119 [02:32<00:00,  1.28s/it, loss=1.06] 
Validation: 100%|██████████| 20/20 [00:41<00:00,  2.06s/it]


Epoch 13 - Validation Dice: 0.0483


Epoch 14/1000: 100%|██████████| 119/119 [02:34<00:00,  1.30s/it, loss=1.06] 
Validation: 100%|██████████| 20/20 [00:40<00:00,  2.03s/it]


Epoch 14 - Validation Dice: 0.0483


Epoch 15/1000: 100%|██████████| 119/119 [02:31<00:00,  1.28s/it, loss=1.02] 
Validation: 100%|██████████| 20/20 [00:40<00:00,  2.00s/it]


Epoch 15 - Validation Dice: 0.0483


Epoch 16/1000: 100%|██████████| 119/119 [02:33<00:00,  1.29s/it, loss=1.05] 
Validation: 100%|██████████| 20/20 [00:40<00:00,  2.02s/it]


Epoch 16 - Validation Dice: 0.0483


Epoch 17/1000: 100%|██████████| 119/119 [02:32<00:00,  1.29s/it, loss=1.05] 
Validation: 100%|██████████| 20/20 [00:40<00:00,  2.01s/it]


Epoch 17 - Validation Dice: 0.0483


Epoch 18/1000: 100%|██████████| 119/119 [02:33<00:00,  1.29s/it, loss=1.01] 
Validation: 100%|██████████| 20/20 [00:40<00:00,  2.02s/it]


Epoch 18 - Validation Dice: 0.0483


Epoch 19/1000: 100%|██████████| 119/119 [02:32<00:00,  1.28s/it, loss=1.04] 
Validation: 100%|██████████| 20/20 [00:40<00:00,  2.00s/it]


Epoch 19 - Validation Dice: 0.0483


Epoch 20/1000: 100%|██████████| 119/119 [02:33<00:00,  1.29s/it, loss=1.04] 
Validation: 100%|██████████| 20/20 [00:40<00:00,  2.02s/it]


Epoch 20 - Validation Dice: 0.0483


Epoch 21/1000: 100%|██████████| 119/119 [02:32<00:00,  1.28s/it, loss=0.741]
Validation: 100%|██████████| 20/20 [00:40<00:00,  2.03s/it]


Epoch 21 - Validation Dice: 0.0483


Epoch 22/1000: 100%|██████████| 119/119 [02:33<00:00,  1.29s/it, loss=0.952]
Validation: 100%|██████████| 20/20 [00:40<00:00,  2.03s/it]


Epoch 22 - Validation Dice: 0.0483


Epoch 23/1000: 100%|██████████| 119/119 [02:34<00:00,  1.29s/it, loss=0.893]
Validation: 100%|██████████| 20/20 [00:40<00:00,  2.02s/it]


Epoch 23 - Validation Dice: 0.0483


Epoch 24/1000: 100%|██████████| 119/119 [02:30<00:00,  1.27s/it, loss=1.03] 
Validation: 100%|██████████| 20/20 [00:40<00:00,  2.01s/it]


Epoch 24 - Validation Dice: 0.0483


Epoch 25/1000: 100%|██████████| 119/119 [02:31<00:00,  1.27s/it, loss=1.02] 
Validation: 100%|██████████| 20/20 [00:40<00:00,  2.01s/it]


Epoch 25 - Validation Dice: 0.0483


Epoch 26/1000:  56%|█████▋    | 67/119 [01:33<01:12,  1.40s/it, loss=0.807]


KeyboardInterrupt: 