In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import configparser
import logging
import os
import sys

import numpy as np
import torch
import wandb
from architectures import TempRedUNet, UNetConvLSTM
from custom_losses import FocalLoss, LovaszSoftmax3d, SumFocalLovasz, mySoftDiceLoss, LovaszSoftmax
from datasets import SparkDataset, SparkDatasetLSTM
from new_unet import UNet
from torch import nn, optim
# from torch.cuda.amp import GradScaler
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from training_inference_tools import (
    compute_class_weights,
    random_flip,
    random_flip_noise,
    sampler,
    test_function,
    training_step,
    weights_init,
    myTrainingManager
)

import unet

logger = logging.getLogger(__name__)
torch.set_float32_matmul_precision('high')



Please cite the following paper when using nnUNet:

Isensee, F., Jaeger, P.F., Kohl, S.A.A. et al. "nnU-Net: a self-configuring method for deep learning-based biomedical image segmentation." Nat Methods (2020). https://doi.org/10.1038/s41592-020-01008-z


If you have questions or suggestions, feel free to open an issue at https://github.com/MIC-DKFZ/nnUNet



In [3]:
############################# fixed parameters #############################

# General params
logfile = None  # change this when publishing finished project on github
wandb_project_name = "TEST"
#wandb_project_name = "sparks2"
output_relative_path = "runs"  # directory where output, saved params and
# testing results are saved

# Dataset parameters
ignore_index = 4  # label ignored during training
num_classes = 4  # i.e., BG, sparks, waves, puffs
ndims = 3  # using 3D data

In [4]:
############################# configure logger #############################

# set verbosity
debug_mode = True#c.getboolean("general", "debug_mode", fallback=False)
verbosity = 3 if debug_mode else 2

level_map = {
    3: logging.DEBUG,
    2: logging.INFO,
    1: logging.WARNING,
    0: logging.ERROR,
}
log_level = level_map[verbosity]
log_handlers = (logging.StreamHandler(sys.stdout),)

logging.basicConfig(
    level=log_level,
    format="[{asctime}] [{levelname:^8s}] [{name:^12s}] <{lineno:^4d}> -- {message:s}",
    style="{",
    datefmt="%H:%M:%S",
    handlers=log_handlers,
)

In [5]:
############################# load config file #############################

#CONFIG_FILE = os.path.join("config_files", "config.ini") # provare anche con nuovo training per vedere se ci mette tanto
CONFIG_FILE = os.path.join("config_files", "config_openai_unet.ini")
c = configparser.ConfigParser()
if os.path.isfile(CONFIG_FILE):
    logger.info(f"Loading {CONFIG_FILE}")
    c.read(CONFIG_FILE)
else:
    logger.warning(
        f"No config file found at {CONFIG_FILE}, trying to use fallback values."
    )

[14:43:31] [  INFO  ] [  __main__  ] < 7  > -- Loading config_files\config_openai_unet.ini


In [6]:
############################## set parameters ##############################

params = {}

# training params
params["run_name"] = c.get("training", "run_name", fallback="TEST")  # Run name
params["load_run_name"] = c.get("training", "load_run_name", fallback=None)
params["load_epoch"] = c.getint("training", "load_epoch", fallback=0)
params["train_epochs"] = c.getint("training", "train_epochs", fallback=5000)
params["criterion"] = c.get("training", "criterion", fallback="nll_loss")
params["lr_start"] = c.getfloat("training", "lr_start", fallback=1e-4)
params["ignore_frames_loss"] = c.getint("training", "ignore_frames_loss", fallback=0)
if (params["criterion"] == "focal_loss") or (params["criterion"] == "sum_losses"):
    params["gamma"] = c.getfloat("training", "gamma", fallback=2.0)
if params["criterion"] == "sum_losses":
    params["w"] = c.getfloat("training", "w", fallback=0.5)
params["cuda"] = c.getboolean("training", "cuda")
params["scheduler"] = c.get("training", "scheduler", fallback=None)
if params["scheduler"] == "step":
    params["scheduler_step_size"] = c.getint("training", "step_size")
    params["scheduler_gamma"] = c.getfloat("training", "gamma")
params["optimizer"] = c.get("training", "optimizer", fallback="adam")

# dataset params
params["relative_path"] = c.get("dataset", "relative_path")
params["dataset_size"] = c.get("dataset", "dataset_size", fallback="full")
params["batch_size"] = c.getint("dataset", "batch_size", fallback=1)
params["num_workers"] = 0#c.getint("dataset", "num_workers", fallback=1)
params["data_duration"] = c.getint("dataset", "data_duration")
params["data_step"] = c.getint("dataset", "data_step", fallback=1)
params["testing_data_step"] = c.getint("testing", "data_step")
params["data_smoothing"] = c.get("dataset", "data_smoothing", fallback="2d")
params["norm_video"] = c.get("dataset", "norm_video", fallback="chunk")
params["remove_background"] = c.get(
    "dataset", "remove_background", fallback="average"
)
params["only_sparks"] = c.getboolean("dataset", "only_sparks", fallback=False)
params["noise_data_augmentation"] = c.getboolean(
    "dataset", "noise_data_augmentation", fallback=False
)
params["sparks_type"] = c.get("dataset", "sparks_type", fallback="peaks")
params["inference"] = c.get("dataset", "inference", fallback="overlap")

# UNet params
params["nn_architecture"] = c.get(
    "network", "nn_architecture", fallback="pablos_unet"
)
if params["nn_architecture"] == "unet_lstm":
    params["bidirectional"] = c.getboolean("network", "bidirectional")
params["unet_steps"] = c.getint("network", "unet_steps")
params["first_layer_channels"] = c.getint("network", "first_layer_channels")
params["num_channels"] = c.getint("network", "num_channels", fallback=1)
params["dilation"] = c.getboolean("network", "dilation", fallback=1)
params["border_mode"] = c.get("network", "border_mode")
params["batch_normalization"] = c.get(
    "network", "batch_normalization", fallback="none"
)
params["temporal_reduction"] = c.getboolean(
    "network", "temporal_reduction", fallback=False
)
params["initialize_weights"] = c.getboolean(
    "network", "initialize_weights", fallback=False
)
if params["nn_architecture"] == "github_unet":
    params["attention"] = c.getboolean("network", "attention")
    params["up_mode"] = c.get("network", "up_mode")

In [7]:
############################# configure wandb ##############################

wandb_log = c.getboolean("general", "wandb_enable", fallback=False)
if wandb_log:
    # only resume when loading the same saved model
    if params["load_epoch"] > 0 and params["load_run_name"] is None:
        resume = "must"
    else:
        resume = None

    wandb.init(
        project=wandb_project_name,
        # name=params["run_name"],
        notes=c.get("general", "wandb_notes", fallback=None),
        id=params["run_name"],
        resume=resume,
        allow_val_change=True
    )
    logging.getLogger("wandb").setLevel(logging.DEBUG)
    # wandb.save(CONFIG_FILE)

In [8]:
############################# print parameters #############################

logger.info("Command parameters:")
for k, v in params.items():
    logger.info(f"{k:>18s}: {v}")
    # load parameters to wandb
    if wandb_log:
        wandb.config[k] = v
    # TODO: AGGIUNGERE TUTTI I PARAMS NECESSARI DA PRINTARE

[14:43:31] [  INFO  ] [  __main__  ] < 3  > -- Command parameters:
[14:43:31] [  INFO  ] [  __main__  ] < 5  > --           run_name: final_model_shallow
[14:43:31] [  INFO  ] [  __main__  ] < 5  > --      load_run_name: None
[14:43:31] [  INFO  ] [  __main__  ] < 5  > --         load_epoch: 0
[14:43:31] [  INFO  ] [  __main__  ] < 5  > --       train_epochs: 100000
[14:43:31] [  INFO  ] [  __main__  ] < 5  > --          criterion: lovasz_softmax
[14:43:31] [  INFO  ] [  __main__  ] < 5  > --           lr_start: 0.0001
[14:43:31] [  INFO  ] [  __main__  ] < 5  > -- ignore_frames_loss: 6
[14:43:31] [  INFO  ] [  __main__  ] < 5  > --               cuda: True
[14:43:31] [  INFO  ] [  __main__  ] < 5  > --          scheduler: None
[14:43:31] [  INFO  ] [  __main__  ] < 5  > --          optimizer: adam
[14:43:31] [  INFO  ] [  __main__  ] < 5  > --      relative_path: ../data/sparks_dataset
[14:43:31] [  INFO  ] [  __main__  ] < 5  > --       dataset_size: minimal
[14:43:31] [  INFO  ] [  

In [9]:
############################ configure datasets ############################

# select samples that are used for training and testing
if params["dataset_size"] == "full":
    train_sample_ids = ["01","02","03","04","06","07","08","09",
                        "11","12","13","14","16","17","18","19",
                        "21","22","23","24","27","28","29","30",
                        "33","35","36","38","39","41","42","43",
                        "44","46"]
    test_sample_ids = ["05", "10", "15", "20", "25", "32", "34", "40", "45"]
elif params["dataset_size"] == "minimal":
    train_sample_ids = ["01"]
    test_sample_ids = ["34"]
else:
    logger.error(f"{params['dataset_size']} is not a valid dataset size.")
    exit()

# detect CUDA devices
if params["cuda"]:
#if False:
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    pin_memory = True
else:
    device = "cpu"
    pin_memory = False
n_gpus = torch.cuda.device_count()
logger.info(f"Using torch device {device}, with {n_gpus} GPUs")

# set if temporal reduction is used
if params["temporal_reduction"]:
    logger.info(f"Using temporal reduction with {params['num_channels']} channels")

# normalize whole videos or chunks individually
if params["norm_video"] == "chunk":
    logger.info("Normalizing each chunk using min and max")
elif params["norm_video"] == "movie":
    logger.info("Normalizing whole video using min and max")
elif params["norm_video"] == "abs_max":
    logger.info("Normalizing whole video using 16-bit absolute max")

# initialize training dataset
dataset_path = os.path.realpath(f"{params['relative_path']}")
assert os.path.isdir(dataset_path), f'"{dataset_path}" is not a directory'
logger.info(f"Using {dataset_path} as dataset root path")
if params["nn_architecture"] in ['pablos_unet', 'github_unet', 'openai_unet']:
    dataset = SparkDataset(
        base_path=dataset_path,
        sample_ids=train_sample_ids,
        testing=False,
        smoothing=params["data_smoothing"],
        step=params["data_step"],
        duration=params["data_duration"],
        remove_background=params["remove_background"],
        temporal_reduction=params["temporal_reduction"],
        num_channels=params["num_channels"],
        normalize_video=params["norm_video"],
        only_sparks=params["only_sparks"],
        sparks_type=params["sparks_type"],
        ignore_index=ignore_index,
        inference=None,
    )
elif params["nn_architecture"] == 'unet_lstm':
    dataset = SparkDatasetLSTM(
        base_path=dataset_path,
        sample_ids=train_sample_ids,
        testing=False,
        duration=params["data_duration"],
        smoothing=params["data_smoothing"],
        remove_background=params["remove_background"],
        temporal_reduction=params["temporal_reduction"],
        num_channels=params["num_channels"],
        normalize_video=params["norm_video"],
        only_sparks=params["only_sparks"],
        sparks_type=params["sparks_type"],
        ignore_index=ignore_index,
        inference=None
    )
else:
    logger.error(f"{params['nn_architecture']} is not a valid nn architecture.")
    exit()

# transforms are applied when getting a sample from the dataset
if params["noise_data_augmentation"]:
    dataset = unet.TransformedDataset(dataset, random_flip_noise)
else:
    dataset = unet.TransformedDataset(dataset, random_flip)

logger.info(f"Samples in training dataset: {len(dataset)}")

# initialize testing dataset
pattern_test_filenames = os.path.join(
    f"{dataset_path}", "videos_test", "[0-9][0-9]_video.tif"
)

if params["nn_architecture"] in ['pablos_unet', 'github_unet', 'openai_unet']:
    testing_datasets = [
        SparkDataset(
            base_path=dataset_path,
            sample_ids=[sample_id],
            testing=True,
            smoothing=params["data_smoothing"],
            step=params["testing_data_step"],
            duration=params["data_duration"],
            remove_background=params["remove_background"],
            temporal_reduction=params["temporal_reduction"],
            num_channels=params["num_channels"],
            normalize_video=params["norm_video"],
            only_sparks=params["only_sparks"],
            sparks_type=params["sparks_type"],
            ignore_frames=params["ignore_frames_loss"],
            ignore_index=ignore_index,
            inference=params["inference"],
        )
        for sample_id in test_sample_ids
    ]
elif params["nn_architecture"] == 'unet_lstm':
    testing_datasets = [
        SparkDatasetLSTM(
            base_path=dataset_path,
            sample_ids=[sample_id],
            testing=True,
            duration=params["data_duration"],
            smoothing=params["data_smoothing"],
            remove_background=params["remove_background"],
            temporal_reduction=params["temporal_reduction"],
            num_channels=params["num_channels"],
            normalize_video=params["norm_video"],
            only_sparks=params["only_sparks"],
            sparks_type=params["sparks_type"],
            ignore_index=ignore_index,
            inference=params["inference"]
        )
        for sample_id in test_sample_ids
    ]
else:
    logger.error(f"{params['nn_architecture']} is not a valid nn architecture.")
    exit()

for i, tds in enumerate(testing_datasets):
    logger.info(f"Testing dataset {i} contains {len(tds)} samples")

# initialize data loaders
dataset_loader = DataLoader(
    dataset,
    batch_size=params["batch_size"],
    shuffle=True,
    num_workers=params["num_workers"],
    pin_memory=pin_memory,
)

[14:43:31] [  INFO  ] [  __main__  ] < 27 > -- Using torch device cuda, with 1 GPUs
[14:43:31] [  INFO  ] [  __main__  ] < 39 > -- Normalizing whole video using 16-bit absolute max
[14:43:31] [  INFO  ] [  __main__  ] < 44 > -- Using C:\Users\dotti\sparks_project\data\sparks_dataset as dataset root path
[14:43:32] [ DEBUG  ] [  datasets  ] <293 > -- Added padding of 12 frames to video with unsuitable duration
[14:43:32] [  INFO  ] [  __main__  ] < 88 > -- Samples in training dataset: 15
[14:43:32] [ DEBUG  ] [  datasets  ] <189 > -- Computing spark peaks...
[14:43:35] [ DEBUG  ] [  datasets  ] <196 > -- Sample 34 contains 16 sparks.
[14:43:35] [ DEBUG  ] [  datasets  ] <293 > -- Added padding of 24 frames to video with unsuitable duration
[14:43:35] [  INFO  ] [  __main__  ] <140 > -- Testing dataset 0 contains 28 samples


In [10]:
x, y = next(iter(dataset_loader))
x = x.to(device, non_blocking=True)
y = y.to(device, non_blocking=True)

In [11]:
import unet_openai

In [12]:
model = unet_openai.UNetModel(
        # image_size=x.shape[1:],
        in_channels=1,
        model_channels=8,
        out_channels=4,
        num_res_blocks=2,
        attention_resolutions=[],
        dropout=0.0,
        dims=3
    )
model = model.to(device, non_blocking=True)

In [13]:
model_parameters = filter(lambda p: p.requires_grad, model.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])

In [14]:
params

2481860

In [15]:
# Forward pass with mixed precision
with torch.cuda.amp.autocast():  # autocast as a context managery 
    y = model(x[:,None])

In [16]:
y.shape

torch.Size([2, 4, 64, 64, 512])

In [None]:
batch_norm = {"batch": True, "none": False}

unet_config = unet.UNetConfig(
    steps=4,
    first_layer_channels=8,
    num_classes=4,
    ndims=3,
    dilation=False,
    border_mode='same',
    batch_normalization=False,
    num_input_channels=1,
)

network = unet.UNetClassifier(unet_config)

In [None]:
model_parameters = filter(lambda p: p.requires_grad, network.parameters())
params = sum([np.prod(p.size()) for p in model_parameters])

In [None]:
params

1411796

In [None]:
############################## configure UNet ##############################

if params["nn_architecture"] == "pablos_unet":

    batch_norm = {"batch": True, "none": False}

    unet_config = unet.UNetConfig(
        steps=params["unet_steps"],
        first_layer_channels=params["first_layer_channels"],
        num_classes=num_classes,
        ndims=ndims,
        dilation=params["dilation"],
        border_mode=params["border_mode"],
        batch_normalization=batch_norm[params["batch_normalization"]],
        num_input_channels=params["num_channels"],
    )

    if not params["temporal_reduction"]:
        network = unet.UNetClassifier(unet_config)
    else:
        assert (
            params["data_duration"] % params["num_channels"] == 0
        ), "using temporal reduction chunks_duration must be a multiple of num_channels"
        network = TempRedUNet(unet_config)

elif params["nn_architecture"] == "github_unet":
    network = UNet(
        in_channels=params["num_channels"],
        out_channels=num_classes,
        n_blocks=params["unet_steps"] + 1,
        start_filts=params["first_layer_channels"],
        up_mode=params["up_mode"],
        # up_mode = 'transpose', # TESTARE DIVERSE POSSIBILTÀ
        # up_mode='resizeconv_nearest',  # Enable to avoid checkerboard artifacts
        merge_mode="concat",  # Default, dicono che funziona meglio
        # planar_blocks=(0,), # magari capire cos'è e testarlo ??
        activation="relu",
        normalization=params[
            "batch_normalization"
        ],  # Penso che nell'implementazione di Pablo è 'none'
        attention=params["attention"],  # magari da testare con 'True' ??
        # full_norm=False,  # Uncomment to restore old sparse normalization scheme
        dim=ndims,
        conv_mode=params["border_mode"],  # 'valid' ha dei vantaggi a quanto pare...
    )

elif params["nn_architecture"] == "unet_lstm":

    batch_norm = {"batch": True, "none": False}
    ndims = 2 # convolutions applied to single frames

    unet_config = unet.UNetConfig(
        steps=params["unet_steps"],
        first_layer_channels=params["first_layer_channels"],
        num_classes=num_classes,
        ndims=ndims,
        dilation=params["dilation"],
        border_mode=params["border_mode"],
        batch_normalization=batch_norm[params["batch_normalization"]],
        #num_input_channels=params["data_duration"], # frames seen as modalities
        num_input_channels=params["num_channels"]
    )

    network = UNetConvLSTM(unet_config, bidirectional=params["bidirectional"])

if device != "cpu":
    network = nn.DataParallel(network).to(device)
    torch.backends.cudnn.benchmark = True

if wandb_log:
    wandb.watch(network)

if params["initialize_weights"]:
    logger.info("Initializing UNet weights...")
    network.apply(weights_init)

#torch.compile(network, mode="default", backend="inductor")
# does not work on windows

In [None]:
########################### initialize training ############################

if params["optimizer"] == "adam":
    optimizer = optim.Adam(network.parameters(), lr=params["lr_start"])
elif params["optimizer"] == "adadelta":
    optimizer = optim.Adadelta(network.parameters(), lr=params["lr_start"])
else:
    logger.error(f"{params['optimizer']} is not a valid optimizer.")
    exit()

if params["scheduler"] == "step":
    scheduler = optim.lr_scheduler.StepLR(
        optimizer,
        step_size=params["scheduler_step_size"],
        gamma=params["scheduler_gamma"],
    )
else:
    scheduler = None

network.train()

output_path = os.path.join(output_relative_path, params["run_name"])
logger.info(f"Output directory: {output_path}")

summary_writer = SummaryWriter(os.path.join(output_path, "summary"), purge_step=0)

if params["load_run_name"] != None:
    load_path = os.path.join(output_relative_path, params["load_run_name"])
    logger.info(f"Model loaded from directory: {load_path}")
else:
    load_path = None

# class weights
if params["criterion"] in ["nll_loss", "focal_loss", "sum_losses"]:
    class_weights = compute_class_weights(dataset)
    logger.info(
        "Using class weights: {}".format(
            ", ".join(str(w.item()) for w in class_weights)
        )
    )

if params["criterion"] == "nll_loss":
    criterion = nn.NLLLoss(
        ignore_index=ignore_index, weight=class_weights.to(device)
    )
elif params["criterion"] == "focal_loss":
    criterion = FocalLoss(
        reduction="mean",
        ignore_index=ignore_index,
        alpha=class_weights,
        gamma=params["gamma"],
    )
elif params["criterion"] == "lovasz_softmax":
    if params["nn_architecture"] in ['pablos_unet', 'github_unet', 'openai_unet']:
        criterion = LovaszSoftmax3d(
            classes="present", per_image=False, ignore=ignore_index
        )
    elif params["nn_architecture"] == 'unet_lstm':
        criterion = LovaszSoftmax(
            classes="present", per_image=False, ignore=ignore_index
        )
elif params["criterion"] == "sum_losses":
    criterion = SumFocalLovasz(
        classes="present",
        per_image=False,
        ignore=ignore_index,
        alpha=class_weights,
        gamma=params["gamma"],
        reduction="mean",
        w=params["w"],
    )
elif params["criterion"] == "dice_loss":
    softmax = nn.Softmax(dim=1)
    criterion = mySoftDiceLoss(apply_nonlin=softmax,
                               batch_dice=True,
                               do_bg=False)

# directory where predicted class movies are saved
preds_output_dir = os.path.join(output_path, "predictions")
os.makedirs(preds_output_dir, exist_ok=True)

# generate dict of managed objects
managed_objects = {"network": network, "optimizer": optimizer}
if scheduler is not None:
    managed_objects["scheduler"] = scheduler

trainer = myTrainingManager(
    # training items
    training_step=lambda _: training_step(
        sampler=sampler,
        network=network,
        optimizer=optimizer,
        # scaler=GradScaler(),
        scheduler=scheduler,
        device=device,
        criterion=criterion,
        dataset_loader=dataset_loader,
        ignore_frames=params["ignore_frames_loss"],
    ),
    save_every=c.getint("training", "save_every", fallback=5000),
    load_path=load_path,
    save_path=output_path,
    managed_objects=unet.managed_objects(managed_objects),
    # testing items
    test_function=lambda _: test_function(
        network=network,
        device=device,
        criterion=criterion,
        testing_datasets=testing_datasets,
        ignore_frames=params["ignore_frames_loss"],
        training_name=params["run_name"],
        output_dir=preds_output_dir,
        training_mode=True,
        debug=debug_mode,
    ),
    test_every=c.getint("training", "test_every", fallback=1000),
    plot_every=c.getint("training", "test_every", fallback=1000),
    summary_writer=summary_writer,
)

[10:52:31] [  INFO  ] [  __main__  ] < 23 > -- Output directory: runs\unet_lstm_bidirectional


In [None]:
############################ init random seeds #############################
import random

torch.manual_seed(0)
random.seed(0)
np.random.seed(0)

In [None]:
#for load_epoch in [10000,20000,30000,40000,50000,60000,70000,80000,90000,100000]:
# for load_epoch in [100000]:
#     trainer.load(load_epoch)
#     logger.info("Starting final validation")
#     trainer.run_validation(wandb_log=wandb_log)
# if wandb_log:
#     wandb.finish()

In [None]:
############################## load model ##############################
if params["load_epoch"] != 0:
    trainer.load(params["load_epoch"])


In [None]:
############################## start training ##############################

if c.getboolean("general", "training", fallback=False):  # Run training procedure on data
    # logger.info("Validate network before training")
    # trainer.run_validation(wandb_log=wandb_log)
    logger.info("Starting training")
    trainer.train(
        params["train_epochs"],
        print_every=c.getint("training", "print_every", fallback=100),
        wandb_log=wandb_log,
    )


[17:14:02] [  INFO  ] [  __main__  ] < 6  > -- Starting training
[17:14:03] [ DEBUG  ] [training_inference_tools] <156 > -- Time to load data: 0.07s
[17:14:05] [ DEBUG  ] [training_inference_tools] <165 > -- Time to run sample in UNet: 2.90s
[17:14:05] [ DEBUG  ] [training_inference_tools] <185 > -- Time to compute loss: 0.01s
[17:14:16] [ DEBUG  ] [training_inference_tools] <191 > -- Time to update parameters: 10.09s
Runtime for 1 training step: 13.074729919433594
[17:14:16] [  INFO  ] [training_inference_tools] <102 > -- Iteration 0...
[17:14:16] [  INFO  ] [training_inference_tools] <103 > -- 	Training loss: 0.7409
[17:14:16] [  INFO  ] [training_inference_tools] <104 > -- 	Time elapsed: 8.20s
[17:14:16] [ DEBUG  ] [training_inference_tools] <156 > -- Time to load data: 0.05s
[17:14:17] [ DEBUG  ] [training_inference_tools] <165 > -- Time to run sample in UNet: 1.12s
[17:14:17] [ DEBUG  ] [training_inference_tools] <185 > -- Time to compute loss: 0.04s
[17:14:36] [ DEBUG  ] [trainin

In [None]:
############################## run final validation ##############################

if c.getboolean("general", "testing", fallback=False):  
    logger.info("Starting final validation")
    trainer.run_validation(wandb_log=wandb_log)

[12:05:56] [  INFO  ] [  __main__  ] < 4  > -- Starting final validation
[12:05:56] [  INFO  ] [training_inference_tools] < 46 > -- Validating network at iteration 0...
[12:05:56] [ DEBUG  ] [training_inference_tools] <590 > -- Testing function: running sample 34 in UNet
[12:16:13] [ DEBUG  ] [in_out_tools] <278 > -- Writing videos on directory c:\Users\dotti\sparks_project\sparks\runs\unet_lstm_bidirectional\predictions ..
[12:16:13] [ DEBUG  ] [training_inference_tools] <620 > -- Time to run sample 34 in UNet: 617.48 s
[12:16:13] [ DEBUG  ] [training_inference_tools] <627 > -- Testing function: re-organising annotations
[12:16:16] [ DEBUG  ] [training_inference_tools] <656 > -- Time to re-organise annotations: 3.08 s
[12:16:16] [ DEBUG  ] [training_inference_tools] <662 > -- Testing function: getting processed output (segmentation and instances)
[12:16:17] [ DEBUG  ] [data_processing_tools] <459 > -- Events detection threshold: 0.766
[12:16:26] [ DEBUG  ] [data_processing_tools] <525

In [None]:
if wandb_log:
    wandb.finish()


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
U-Net training loss,█▇▃▁▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄
average/correctly_classified,▁
average/detected,▁
average/precision,▁
average/recall,▁
puffs/correctly_classified,▁
puffs/detected,▁
puffs/precision,▁
puffs/recall,▁
segmentation/average_IoU,▁

0,1
U-Net training loss,0.66843
average/correctly_classified,0.0
average/detected,0.0
average/precision,0.0
average/recall,0.0
puffs/correctly_classified,0.0
puffs/detected,0.0
puffs/precision,0.0
puffs/recall,0.0
segmentation/average_IoU,0.0


[15:13:18] [ DEBUG  ] [urllib3.connectionpool] <1003> -- Starting new HTTPS connection (1): o151352.ingest.sentry.io:443
[15:13:18] [ DEBUG  ] [urllib3.connectionpool] <456 > -- https://o151352.ingest.sentry.io:443 "POST /api/4504800232407040/envelope/ HTTP/1.1" 200 2


# Visualize UNet architecture

In [None]:
# # get number of trainable parameters
# num_params = sum(p.numel() for p in network.parameters() if p.requires_grad)
# logger.debug(f"Number of trainable parameters: {num_params}")
# # get dummy unet input
# batch = next(iter(dataset_loader))
# x = batch[0].to(device)
# yhat = network(x[:,None]) # Give dummy batch to forward()
# from torchviz import make_dot
# make_dot(yhat, params=dict(list(network.named_parameters()))).render("unet_model", format="png")
# a = [0,1,2,3,4,5,6,7,8,9,10,11,12,13]

# len(a[0:4])