In [3]:
import os

from azureml.core import Environment, Experiment, ScriptRunConfig, Workspace
from azureml.core.runconfig import DockerConfiguration
from dotenv import load_dotenv

load_dotenv()
AZURE_SUBSCRIPTION_ID = os.getenv("AZURE_SUBSCRIPTION_ID")
AZURE_RESOURCE_GROUP = os.getenv("AZURE_RESOURCE_GROUP")
AZURE_WORKSPACE_NAME = os.getenv("AZURE_WORKSPACE_NAME")


In [4]:
experiment_name = "oc-p8-experiment-1"
model_choices = ["unet_xception", "deeplab_v3plus"]
augment_choices = [False, True]  # [False, True]
resize_choices = [
    64,
    80,
    128,
    160,
    256,
    320,
    512,
    640,
]  # [64, 80, 128, 160, 256, 320, 512, 640, 800, 1024]
batch_size_per_resize = {
    64: 128,
    80: 128,
    128: 64,
    160: 64,
    256: 32,
    320: 32,
    512: 16,
    640: 8,
    800: 4,
    1024: 2,
}

source_directory = "./train"
train_script = "train.py"
compute_target = "ocp8-cluster-gpu"


# connect to your workspace
ws = Workspace(
    subscription_id=AZURE_SUBSCRIPTION_ID,
    resource_group=AZURE_RESOURCE_GROUP,
    workspace_name=AZURE_WORKSPACE_NAME,
)


try:
    env = Environment.get(workspace=ws, name=experiment_name)
except:
    env = Environment(name=experiment_name)
    env.docker.base_image = None
    env.docker.base_dockerfile = "./train/Dockerfile"
    env.python.user_managed_dependencies = True
    env.register(workspace=ws)


for resize in resize_choices:
    for model in model_choices:
        for augment in augment_choices:
            batch_size = batch_size_per_resize[resize]
            run_config = ScriptRunConfig(
                source_directory=source_directory,
                script=train_script,
                arguments=[
                    "--experiment",
                    experiment_name,
                    "--model",
                    model,
                    "--resize",
                    resize,
                    "--batch",
                    batch_size,
                    "--augment" if augment else "--no-augment",
                ],
                compute_target=compute_target,
                environment=env,
                docker_runtime_config=DockerConfiguration(
                    use_docker=True,
                    arguments=[
                        # see : https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.data.filedataset?view=azure-ml-py#azureml-data-filedataset-mount
                        "--cap-add",
                        "SYS_ADMIN",
                        "--device",
                        "/dev/fuse",
                    ],
                ),
            )

            # create an experiment
            exp = Experiment(workspace=ws, name=experiment_name)

            # submit the run configuration to start the job
            run = exp.submit(run_config)
            run.tag("model", model)
            run.tag("augment", augment)
            run.tag("resize", resize)

            print(f"Submitted Run : {run.display_name}")
            print(f"Tags : {run.get_tags()}")


Converting non-string tag to string: (augment: False)
Converting non-string tag to string: (resize: 80)


Submitted Run : salmon_king_616ps0p3
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'unet_xception', 'augment': 'False', 'resize': '80'}


Converting non-string tag to string: (augment: True)
Converting non-string tag to string: (resize: 80)


Submitted Run : silly_beard_qx97y99s
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'unet_xception', 'augment': 'True', 'resize': '80'}


Converting non-string tag to string: (augment: False)
Converting non-string tag to string: (resize: 80)


Submitted Run : silly_map_lfxrvyd5
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'deeplab_v3plus', 'augment': 'False', 'resize': '80'}


Converting non-string tag to string: (augment: True)
Converting non-string tag to string: (resize: 80)


Submitted Run : jovial_crayon_7z6yn44s
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'deeplab_v3plus', 'augment': 'True', 'resize': '80'}


Converting non-string tag to string: (augment: False)
Converting non-string tag to string: (resize: 128)


Submitted Run : joyful_leather_qn8sjf0l
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'unet_xception', 'augment': 'False', 'resize': '128'}


Converting non-string tag to string: (augment: True)
Converting non-string tag to string: (resize: 128)


Submitted Run : olive_rocket_3b93c6xn
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'unet_xception', 'augment': 'True', 'resize': '128'}


Converting non-string tag to string: (augment: False)
Converting non-string tag to string: (resize: 128)


Submitted Run : neat_fennel_hrqp42s8
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'deeplab_v3plus', 'augment': 'False', 'resize': '128'}


Converting non-string tag to string: (augment: True)
Converting non-string tag to string: (resize: 128)


Submitted Run : jolly_hair_25x2fg9f
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'deeplab_v3plus', 'augment': 'True', 'resize': '128'}


Converting non-string tag to string: (augment: False)
Converting non-string tag to string: (resize: 160)


Submitted Run : busy_quill_sdfsmr9s
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'unet_xception', 'augment': 'False', 'resize': '160'}


Converting non-string tag to string: (augment: True)
Converting non-string tag to string: (resize: 160)


Submitted Run : sweet_boot_lg12z83p
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'unet_xception', 'augment': 'True', 'resize': '160'}


Converting non-string tag to string: (augment: False)
Converting non-string tag to string: (resize: 160)


Submitted Run : quirky_pea_vvwlfqx8
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'deeplab_v3plus', 'augment': 'False', 'resize': '160'}


Converting non-string tag to string: (augment: True)
Converting non-string tag to string: (resize: 160)


Submitted Run : frank_jelly_2msyczjf
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'deeplab_v3plus', 'augment': 'True', 'resize': '160'}


Converting non-string tag to string: (augment: False)
Converting non-string tag to string: (resize: 256)


Submitted Run : ashy_cushion_wnr1qsln
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'unet_xception', 'augment': 'False', 'resize': '256'}


Converting non-string tag to string: (augment: True)
Converting non-string tag to string: (resize: 256)


Submitted Run : eager_salt_c1dyym4z
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'unet_xception', 'augment': 'True', 'resize': '256'}


Converting non-string tag to string: (augment: False)
Converting non-string tag to string: (resize: 256)


Submitted Run : happy_oven_jphzq6wk
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'deeplab_v3plus', 'augment': 'False', 'resize': '256'}


Converting non-string tag to string: (augment: True)
Converting non-string tag to string: (resize: 256)


Submitted Run : epic_zoo_ysxzkc9y
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'deeplab_v3plus', 'augment': 'True', 'resize': '256'}


Converting non-string tag to string: (augment: False)
Converting non-string tag to string: (resize: 320)


Submitted Run : ivory_rose_8w96wchr
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'unet_xception', 'augment': 'False', 'resize': '320'}


Converting non-string tag to string: (augment: True)
Converting non-string tag to string: (resize: 320)


Submitted Run : joyful_cart_5vb9vybx
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'unet_xception', 'augment': 'True', 'resize': '320'}


Converting non-string tag to string: (augment: False)
Converting non-string tag to string: (resize: 320)


Submitted Run : modest_boot_tjtqn5qj
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'deeplab_v3plus', 'augment': 'False', 'resize': '320'}


Converting non-string tag to string: (augment: True)
Converting non-string tag to string: (resize: 320)


Submitted Run : goofy_sugar_y7xf8tfl
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'deeplab_v3plus', 'augment': 'True', 'resize': '320'}


Converting non-string tag to string: (augment: False)
Converting non-string tag to string: (resize: 512)


Submitted Run : great_machine_t8nj6jds
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'unet_xception', 'augment': 'False', 'resize': '512'}


Converting non-string tag to string: (augment: True)
Converting non-string tag to string: (resize: 512)


Submitted Run : stoic_fennel_6sjdqbzg
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'unet_xception', 'augment': 'True', 'resize': '512'}


Converting non-string tag to string: (augment: False)
Converting non-string tag to string: (resize: 512)


Submitted Run : maroon_night_k4mv2sgf
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'deeplab_v3plus', 'augment': 'False', 'resize': '512'}


Converting non-string tag to string: (augment: True)
Converting non-string tag to string: (resize: 512)


Submitted Run : purple_brush_h814l12y
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'deeplab_v3plus', 'augment': 'True', 'resize': '512'}


Converting non-string tag to string: (augment: False)
Converting non-string tag to string: (resize: 640)


Submitted Run : loyal_salt_2xpt0ry1
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'unet_xception', 'augment': 'False', 'resize': '640'}


Converting non-string tag to string: (augment: True)
Converting non-string tag to string: (resize: 640)


Submitted Run : boring_celery_gc1l6ljn
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'unet_xception', 'augment': 'True', 'resize': '640'}


Converting non-string tag to string: (augment: False)
Converting non-string tag to string: (resize: 640)


Submitted Run : joyful_gold_y5kmw9rh
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'deeplab_v3plus', 'augment': 'False', 'resize': '640'}


Converting non-string tag to string: (augment: True)
Converting non-string tag to string: (resize: 640)


Submitted Run : modest_music_d5730djv
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'deeplab_v3plus', 'augment': 'True', 'resize': '640'}
