# Dacapo

## Imports

In [4]:
from pathlib import PosixPath
from dacapo.experiments.datasplits.datasets.arrays import (
    BinarizeArrayConfig,
    IntensitiesArrayConfig,
    MissingAnnotationsMaskConfig,
    ResampledArrayConfig,
    ZarrArrayConfig,
)
from dacapo.experiments.tasks import DistanceTaskConfig
from dacapo.experiments.architectures import CNNectomeUNetConfig
from dacapo.experiments.trainers import GunpowderTrainerConfig
from dacapo.experiments.trainers.gp_augments import (
    ElasticAugmentConfig,
    GammaAugmentConfig,
    IntensityAugmentConfig,
    IntensityScaleShiftAugmentConfig,
)
from dacapo.experiments.datasplits import TrainValidateDataSplitConfig
from dacapo.experiments.datasplits.datasets import RawGTDatasetConfig
from dacapo.experiments.starts import StartConfig
from dacapo.experiments import RunConfig

## Config Store

In [5]:
from dacapo.store.create_store import create_config_store
config_store = create_config_store()

## Task

In [6]:
task_config = DistanceTaskConfig(
    name="example_distances_4nm_many",
    channels=[
        "ecs",
        "plasma_membrane",
        "mito",
        "mito_membrane",
        "vesicle",
        "vesicle_membrane",
        "mvb",
        "mvb_membrane",
        "er",
        "er_membrane",
        "eres",
        "nucleus",
        "microtubules",
        "microtubules_out",
    ],
    clip_distance=40.0,
    tol_distance=40.0,
    scale_factor=80.0,
    mask_distances=True,
)
config_store.store_task_config(task_config)

## Architecture

In [7]:
architecture_config = CNNectomeUNetConfig(
    name="example_upsample-unet",
    input_shape=(216, 216, 216),
    fmaps_out=72,
    fmaps_in=1,
    num_fmaps=12,
    fmap_inc_factor=6,
    downsample_factors=[(2, 2, 2), (3, 3, 3), (3, 3, 3)],
    kernel_size_down=None,
    kernel_size_up=None,
    eval_shape_increase=(72, 72, 72),
    upsample_factors=[(2, 2, 2)],
    constant_upsample=True,
    padding="valid",
)
config_store.store_architecture_config(architecture_config)

## Trainer

In [8]:
trainer_config = GunpowderTrainerConfig(
    name="example_default",
    batch_size=2,
    learning_rate=0.0001,
    num_data_fetchers=20,
    augments=[
        ElasticAugmentConfig(
            control_point_spacing=[100, 100, 100],
            control_point_displacement_sigma=[10.0, 10.0, 10.0],
            rotation_interval=(0.0, 1.5707963267948966),
            subsample=8,
            uniform_3d_rotation=True,
        ),
        IntensityAugmentConfig(scale=(0.25, 1.75), shift=(-0.5, 0.35), clip=True),
        GammaAugmentConfig(gamma_range=(0.5, 2.0)),
        IntensityScaleShiftAugmentConfig(scale=2.0, shift=-1.0),
    ],
    snapshot_interval=10000,
    min_masked=0.05,
    clip_raw=True,
)
config_store.store_trainer_config(trainer_config)

## Datasplit

In [10]:
datasplit_config = TrainValidateDataSplitConfig(
    name="example_jrc_mus-liver-zon-1_many_4nm",
    train_configs=[
        RawGTDatasetConfig(
            name="jrc_mus-liver-zon-1_266_many_4nm",
            weight=1,
            raw_config=IntensitiesArrayConfig(
                name="jrc_mus-liver-zon-1_raw",
                source_array_config=ZarrArrayConfig(
                    name="jrc_mus-liver-zon-1_raw_uint8",
                    file_name=PosixPath(
                        "/nrs/cellmap/data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5"
                    ),
                    dataset="em/fibsem-uint8/s0",
                    snap_to_grid=(8, 8, 8),
                    axes=None,
                ),
                min=0.0,
                max=255.0,
            ),
            gt_config=BinarizeArrayConfig(
                name="jrc_mus-liver-zon-1_266_many_4nm_gt",
                source_array_config=ZarrArrayConfig(
                    name="jrc_mus-liver-zon-1_266_gt",
                    file_name=PosixPath(
                        "/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5"
                    ),
                    dataset="volumes/groundtruth/crop266/labels//all",
                    snap_to_grid=(8, 8, 8),
                    axes=None,
                ),
                groupings=[
                    ("ecs", [1]),
                    ("plasma_membrane", [2]),
                    ("mito", [3, 4, 5]),
                    ("mito_membrane", [3]),
                    ("vesicle", [8, 9]),
                    ("vesicle_membrane", [8]),
                    ("mvb", [10, 11]),
                    ("mvb_membrane", [10]),
                    ("er", [16, 17, 18, 19, 20, 21, 22, 23]),
                    ("er_membrane", [16, 18, 20]),
                    ("eres", [18, 19]),
                    ("nucleus", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),
                    ("microtubules", [30, 31, 36]),
                    ("microtubules_out", [30]),
                ],
                background=0,
            ),
            mask_config=MissingAnnotationsMaskConfig(
                name="jrc_mus-liver-zon-1_266_many_4nm_mask",
                source_array_config=ZarrArrayConfig(
                    name="jrc_mus-liver-zon-1_266_gt",
                    file_name=PosixPath(
                        "/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5"
                    ),
                    dataset="volumes/groundtruth/crop266/labels//all",
                    snap_to_grid=(8, 8, 8),
                    axes=None,
                ),
                groupings=[
                    ("ecs", [1]),
                    ("plasma_membrane", [2]),
                    ("mito", [3, 4, 5]),
                    ("mito_membrane", [3]),
                    ("vesicle", [8, 9]),
                    ("vesicle_membrane", [8]),
                    ("mvb", [10, 11]),
                    ("mvb_membrane", [10]),
                    ("er", [16, 17, 18, 19, 20, 21, 22, 23]),
                    ("er_membrane", [16, 18, 20]),
                    ("eres", [18, 19]),
                    ("nucleus", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),
                    ("microtubules", [30, 31, 36]),
                    ("microtubules_out", [30]),
                ],
            ),
            sample_points=None,
        ),
        RawGTDatasetConfig(
            name="jrc_mus-liver-zon-1_267_many_4nm",
            weight=1,
            raw_config=IntensitiesArrayConfig(
                name="jrc_mus-liver-zon-1_raw",
                source_array_config=ZarrArrayConfig(
                    name="jrc_mus-liver-zon-1_raw_uint8",
                    file_name=PosixPath(
                        "/nrs/cellmap/data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5"
                    ),
                    dataset="em/fibsem-uint8/s0",
                    snap_to_grid=(8, 8, 8),
                    axes=None,
                ),
                min=0.0,
                max=255.0,
            ),
            gt_config=BinarizeArrayConfig(
                name="jrc_mus-liver-zon-1_267_many_4nm_gt",
                source_array_config=ZarrArrayConfig(
                    name="jrc_mus-liver-zon-1_267_gt",
                    file_name=PosixPath(
                        "/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5"
                    ),
                    dataset="volumes/groundtruth/crop267/labels//all",
                    snap_to_grid=(8, 8, 8),
                    axes=None,
                ),
                groupings=[
                    ("ecs", [1]),
                    ("plasma_membrane", [2]),
                    ("mito", [3, 4, 5]),
                    ("mito_membrane", [3]),
                    ("vesicle", [8, 9]),
                    ("vesicle_membrane", [8]),
                    ("mvb", [10, 11]),
                    ("mvb_membrane", [10]),
                    ("er", [16, 17, 18, 19, 20, 21, 22, 23]),
                    ("er_membrane", [16, 18, 20]),
                    ("eres", [18, 19]),
                    ("nucleus", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),
                    ("microtubules", [30, 31, 36]),
                    ("microtubules_out", [30]),
                ],
                background=0,
            ),
            mask_config=MissingAnnotationsMaskConfig(
                name="jrc_mus-liver-zon-1_267_many_4nm_mask",
                source_array_config=ZarrArrayConfig(
                    name="jrc_mus-liver-zon-1_267_gt",
                    file_name=PosixPath(
                        "/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5"
                    ),
                    dataset="volumes/groundtruth/crop267/labels//all",
                    snap_to_grid=(8, 8, 8),
                    axes=None,
                ),
                groupings=[
                    ("ecs", [1]),
                    ("plasma_membrane", [2]),
                    ("mito", [3, 4, 5]),
                    ("mito_membrane", [3]),
                    ("vesicle", [8, 9]),
                    ("vesicle_membrane", [8]),
                    ("mvb", [10, 11]),
                    ("mvb_membrane", [10]),
                    ("er", [16, 17, 18, 19, 20, 21, 22, 23]),
                    ("er_membrane", [16, 18, 20]),
                    ("eres", [18, 19]),
                    ("nucleus", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),
                    ("microtubules", [30, 31, 36]),
                    ("microtubules_out", [30]),
                ],
            ),
            sample_points=None,
        ),
        RawGTDatasetConfig(
            name="jrc_mus-liver-zon-1_268_many_4nm",
            weight=1,
            raw_config=IntensitiesArrayConfig(
                name="jrc_mus-liver-zon-1_raw",
                source_array_config=ZarrArrayConfig(
                    name="jrc_mus-liver-zon-1_raw_uint8",
                    file_name=PosixPath(
                        "/nrs/cellmap/data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5"
                    ),
                    dataset="em/fibsem-uint8/s0",
                    snap_to_grid=(8, 8, 8),
                    axes=None,
                ),
                min=0.0,
                max=255.0,
            ),
            gt_config=BinarizeArrayConfig(
                name="jrc_mus-liver-zon-1_268_many_4nm_gt",
                source_array_config=ZarrArrayConfig(
                    name="jrc_mus-liver-zon-1_268_gt",
                    file_name=PosixPath(
                        "/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5"
                    ),
                    dataset="volumes/groundtruth/crop268/labels//all",
                    snap_to_grid=(8, 8, 8),
                    axes=None,
                ),
                groupings=[
                    ("ecs", [1]),
                    ("plasma_membrane", [2]),
                    ("mito", [3, 4, 5]),
                    ("mito_membrane", [3]),
                    ("vesicle", [8, 9]),
                    ("vesicle_membrane", [8]),
                    ("mvb", [10, 11]),
                    ("mvb_membrane", [10]),
                    ("er", [16, 17, 18, 19, 20, 21, 22, 23]),
                    ("er_membrane", [16, 18, 20]),
                    ("eres", [18, 19]),
                    ("nucleus", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),
                    ("microtubules", [30, 31, 36]),
                    ("microtubules_out", [30]),
                ],
                background=0,
            ),
            mask_config=MissingAnnotationsMaskConfig(
                name="jrc_mus-liver-zon-1_268_many_4nm_mask",
                source_array_config=ZarrArrayConfig(
                    name="jrc_mus-liver-zon-1_268_gt",
                    file_name=PosixPath(
                        "/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5"
                    ),
                    dataset="volumes/groundtruth/crop268/labels//all",
                    snap_to_grid=(8, 8, 8),
                    axes=None,
                ),
                groupings=[
                    ("ecs", [1]),
                    ("plasma_membrane", [2]),
                    ("mito", [3, 4, 5]),
                    ("mito_membrane", [3]),
                    ("vesicle", [8, 9]),
                    ("vesicle_membrane", [8]),
                    ("mvb", [10, 11]),
                    ("mvb_membrane", [10]),
                    ("er", [16, 17, 18, 19, 20, 21, 22, 23]),
                    ("er_membrane", [16, 18, 20]),
                    ("eres", [18, 19]),
                    ("nucleus", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),
                    ("microtubules", [30, 31, 36]),
                    ("microtubules_out", [30]),
                ],
            ),
            sample_points=None,
        ),
    ],
    validate_configs=[
        RawGTDatasetConfig(
            name="jrc_mus-liver-zon-1_270_many_4nm",
            weight=1,
            raw_config=IntensitiesArrayConfig(
                name="jrc_mus-liver-zon-1_raw",
                source_array_config=ZarrArrayConfig(
                    name="jrc_mus-liver-zon-1_raw_uint8",
                    file_name=PosixPath(
                        "/nrs/cellmap/data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5"
                    ),
                    dataset="em/fibsem-uint8/s0",
                    snap_to_grid=(8, 8, 8),
                    axes=None,
                ),
                min=0.0,
                max=255.0,
            ),
            gt_config=BinarizeArrayConfig(
                name="jrc_mus-liver-zon-1_270_many_4nm_gt",
                source_array_config=ResampledArrayConfig(
                    name="jrc_mus-liver-zon-1_270_gt_resampled_4nm",
                    source_array_config=ZarrArrayConfig(
                        name="jrc_mus-liver-zon-1_270_gt",
                        file_name=PosixPath(
                            "/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5"
                        ),
                        dataset="volumes/groundtruth/crop270/labels//all",
                        snap_to_grid=(8, 8, 8),
                        axes=None,
                    ),
                    upsample=(2, 2, 2),
                    downsample=(0, 0, 0),
                    interp_order=False,
                ),
                groupings=[
                    ("ecs", [1]),
                    ("plasma_membrane", [2]),
                    ("mito", [3, 4, 5]),
                    ("mito_membrane", [3]),
                    ("vesicle", [8, 9]),
                    ("vesicle_membrane", [8]),
                    ("mvb", [10, 11]),
                    ("mvb_membrane", [10]),
                    ("er", [16, 17, 18, 19, 20, 21, 22, 23]),
                    ("er_membrane", [16, 18, 20]),
                    ("eres", [18, 19]),
                    ("nucleus", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),
                    ("microtubules", [30, 31, 36]),
                    ("microtubules_out", [30]),
                ],
                background=0,
            ),
            mask_config=MissingAnnotationsMaskConfig(
                name="jrc_mus-liver-zon-1_270_many_4nm_mask",
                source_array_config=ResampledArrayConfig(
                    name="jrc_mus-liver-zon-1_270_gt_resampled_4nm",
                    source_array_config=ZarrArrayConfig(
                        name="jrc_mus-liver-zon-1_270_gt",
                        file_name=PosixPath(
                            "/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5"
                        ),
                        dataset="volumes/groundtruth/crop270/labels//all",
                        snap_to_grid=(8, 8, 8),
                        axes=None,
                    ),
                    upsample=(2, 2, 2),
                    downsample=(0, 0, 0),
                    interp_order=False,
                ),
                groupings=[
                    ("ecs", [1]),
                    ("plasma_membrane", [2]),
                    ("mito", [3, 4, 5]),
                    ("mito_membrane", [3]),
                    ("vesicle", [8, 9]),
                    ("vesicle_membrane", [8]),
                    ("mvb", [10, 11]),
                    ("mvb_membrane", [10]),
                    ("er", [16, 17, 18, 19, 20, 21, 22, 23]),
                    ("er_membrane", [16, 18, 20]),
                    ("eres", [18, 19]),
                    ("nucleus", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),
                    ("microtubules", [30, 31, 36]),
                    ("microtubules_out", [30]),
                ],
            ),
            sample_points=None,
        ),
        RawGTDatasetConfig(
            name="jrc_mus-liver-zon-1_272_many_4nm",
            weight=1,
            raw_config=IntensitiesArrayConfig(
                name="jrc_mus-liver-zon-1_raw",
                source_array_config=ZarrArrayConfig(
                    name="jrc_mus-liver-zon-1_raw_uint8",
                    file_name=PosixPath(
                        "/nrs/cellmap/data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5"
                    ),
                    dataset="em/fibsem-uint8/s0",
                    snap_to_grid=(8, 8, 8),
                    axes=None,
                ),
                min=0.0,
                max=255.0,
            ),
            gt_config=BinarizeArrayConfig(
                name="jrc_mus-liver-zon-1_272_many_4nm_gt",
                source_array_config=ZarrArrayConfig(
                    name="jrc_mus-liver-zon-1_272_gt",
                    file_name=PosixPath(
                        "/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5"
                    ),
                    dataset="volumes/groundtruth/crop272/labels//all",
                    snap_to_grid=(8, 8, 8),
                    axes=None,
                ),
                groupings=[
                    ("ecs", [1]),
                    ("plasma_membrane", [2]),
                    ("mito", [3, 4, 5]),
                    ("mito_membrane", [3]),
                    ("vesicle", [8, 9]),
                    ("vesicle_membrane", [8]),
                    ("mvb", [10, 11]),
                    ("mvb_membrane", [10]),
                    ("er", [16, 17, 18, 19, 20, 21, 22, 23]),
                    ("er_membrane", [16, 18, 20]),
                    ("eres", [18, 19]),
                    ("nucleus", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),
                    ("microtubules", [30, 31, 36]),
                    ("microtubules_out", [30]),
                ],
                background=0,
            ),
            mask_config=MissingAnnotationsMaskConfig(
                name="jrc_mus-liver-zon-1_272_many_4nm_mask",
                source_array_config=ZarrArrayConfig(
                    name="jrc_mus-liver-zon-1_272_gt",
                    file_name=PosixPath(
                        "/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5"
                    ),
                    dataset="volumes/groundtruth/crop272/labels//all",
                    snap_to_grid=(8, 8, 8),
                    axes=None,
                ),
                groupings=[
                    ("ecs", [1]),
                    ("plasma_membrane", [2]),
                    ("mito", [3, 4, 5]),
                    ("mito_membrane", [3]),
                    ("vesicle", [8, 9]),
                    ("vesicle_membrane", [8]),
                    ("mvb", [10, 11]),
                    ("mvb_membrane", [10]),
                    ("er", [16, 17, 18, 19, 20, 21, 22, 23]),
                    ("er_membrane", [16, 18, 20]),
                    ("eres", [18, 19]),
                    ("nucleus", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),
                    ("microtubules", [30, 31, 36]),
                    ("microtubules_out", [30]),
                ],
            ),
            sample_points=None,
        ),
        RawGTDatasetConfig(
            name="jrc_mus-liver-zon-1_279_many_4nm",
            weight=1,
            raw_config=IntensitiesArrayConfig(
                name="jrc_mus-liver-zon-1_raw",
                source_array_config=ZarrArrayConfig(
                    name="jrc_mus-liver-zon-1_raw_uint8",
                    file_name=PosixPath(
                        "/nrs/cellmap/data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5"
                    ),
                    dataset="em/fibsem-uint8/s0",
                    snap_to_grid=(8, 8, 8),
                    axes=None,
                ),
                min=0.0,
                max=255.0,
            ),
            gt_config=BinarizeArrayConfig(
                name="jrc_mus-liver-zon-1_279_many_4nm_gt",
                source_array_config=ZarrArrayConfig(
                    name="jrc_mus-liver-zon-1_279_gt",
                    file_name=PosixPath(
                        "/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5"
                    ),
                    dataset="volumes/groundtruth/crop279/labels//all",
                    snap_to_grid=(8, 8, 8),
                    axes=None,
                ),
                groupings=[
                    ("ecs", [1]),
                    ("plasma_membrane", [2]),
                    ("mito", [3, 4, 5]),
                    ("mito_membrane", [3]),
                    ("vesicle", [8, 9]),
                    ("vesicle_membrane", [8]),
                    ("mvb", [10, 11]),
                    ("mvb_membrane", [10]),
                    ("er", [16, 17, 18, 19, 20, 21, 22, 23]),
                    ("er_membrane", [16, 18, 20]),
                    ("eres", [18, 19]),
                    ("nucleus", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),
                    ("microtubules", [30, 31, 36]),
                    ("microtubules_out", [30]),
                ],
                background=0,
            ),
            mask_config=MissingAnnotationsMaskConfig(
                name="jrc_mus-liver-zon-1_279_many_4nm_mask",
                source_array_config=ZarrArrayConfig(
                    name="jrc_mus-liver-zon-1_279_gt",
                    file_name=PosixPath(
                        "/nrs/cellmap/ackermand/data/tmp_data/jrc_mus-liver-zon-1/jrc_mus-liver-zon-1.n5"
                    ),
                    dataset="volumes/groundtruth/crop279/labels//all",
                    snap_to_grid=(8, 8, 8),
                    axes=None,
                ),
                groupings=[
                    ("ecs", [1]),
                    ("plasma_membrane", [2]),
                    ("mito", [3, 4, 5]),
                    ("mito_membrane", [3]),
                    ("vesicle", [8, 9]),
                    ("vesicle_membrane", [8]),
                    ("mvb", [10, 11]),
                    ("mvb_membrane", [10]),
                    ("er", [16, 17, 18, 19, 20, 21, 22, 23]),
                    ("er_membrane", [16, 18, 20]),
                    ("eres", [18, 19]),
                    ("nucleus", [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 37]),
                    ("microtubules", [30, 31, 36]),
                    ("microtubules_out", [30]),
                ],
            ),
            sample_points=None,
        ),
    ],
)
config_store.store_datasplit_config(datasplit_config)

## Run

In [12]:
start_config = StartConfig(
    "setup04",
    "best",
)
iterations = 200000
validation_interval = 5000
repetitions = 3
run_configs = []
for i in range(repetitions):
    run_config = RunConfig(
        name=("_").join(
            [
                "example",
                "scratch" if start_config is None else "finetuned",
                task_config.name,
                architecture_config.name,
                trainer_config.name,
                datasplit_config.name,
            ]
        )
        + f"__{i}",
        task_config=task_config,
        architecture_config=architecture_config,
        trainer_config=trainer_config,
        datasplit_config=datasplit_config,
        num_iterations=iterations,
        validation_interval=validation_interval,
        repetition=i,
        start_config=start_config,
    )
    config_store.store_run_config(run_config)
    print(run_config.name)

example_finetuned_example_distances_4nm_many_example_upsample-unet_example_default_example_jrc_mus-liver-zon-1_many_4nm__0
example_finetuned_example_distances_4nm_many_example_upsample-unet_example_default_example_jrc_mus-liver-zon-1_many_4nm__1
example_finetuned_example_distances_4nm_many_example_upsample-unet_example_default_example_jrc_mus-liver-zon-1_many_4nm__2
