In [1]:
import os

from azureml.core import Environment, Experiment, ScriptRunConfig, Workspace
from azureml.core.runconfig import DockerConfiguration
from dotenv import load_dotenv

load_dotenv()
AZURE_SUBSCRIPTION_ID = os.getenv("AZURE_SUBSCRIPTION_ID")
AZURE_RESOURCE_GROUP = os.getenv("AZURE_RESOURCE_GROUP")
AZURE_WORKSPACE_NAME = os.getenv("AZURE_WORKSPACE_NAME")


In [2]:
experiment_name = "oc-p8-experiment-1"
model_choices = ["unet_xception"]  # ["unet_xception"]
augment_choices = [False, True]  # [False, True]
resize_choices = [
    64,
    80,
    128,
    160,
    256,
    320,
    512,
]  # [64, 80, 128, 160, 256, 320, 512, 640, 800, 1024]

source_directory = "./train"
train_script = "train.py"
compute_target = "ocp8-cluster-gpu"


# connect to your workspace
ws = Workspace(
    subscription_id=AZURE_SUBSCRIPTION_ID,
    resource_group=AZURE_RESOURCE_GROUP,
    workspace_name=AZURE_WORKSPACE_NAME,
)


try:
    env = Environment.get(workspace=ws, name=experiment_name)
except:
    env = Environment(name=experiment_name)
    env.docker.base_image = None
    env.docker.base_dockerfile = "./train/Dockerfile"
    env.python.user_managed_dependencies = True
    env.register(workspace=ws)


for model in model_choices:
    for augment in augment_choices:
        for resize in resize_choices:
            run_config = ScriptRunConfig(
                source_directory=source_directory,
                script=train_script,
                arguments=[
                    "--experiment",
                    experiment_name,
                    "--model",
                    model,
                    "--resize",
                    resize,
                    "--augment" if augment else "--no-augment",
                ],
                compute_target=compute_target,
                environment=env,
                docker_runtime_config=DockerConfiguration(
                    use_docker=True,
                    arguments=[
                        # see : https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.data.filedataset?view=azure-ml-py#azureml-data-filedataset-mount
                        "--cap-add",
                        "SYS_ADMIN",
                        "--device",
                        "/dev/fuse",
                    ],
                ),
            )

            # create an experiment
            exp = Experiment(workspace=ws, name=experiment_name)

            # submit the run configuration to start the job
            run = exp.submit(run_config)
            run.tag("model", model)
            run.tag("augment", augment)
            run.tag("resize", resize)

            print(f"Submitted Run : {run.display_name}")
            print(f"Tags : {run.get_tags()}")


Converting non-string tag to string: (augment: False)
Converting non-string tag to string: (resize: 64)


Submitted Run : clever_soccer_bb5kkkw3
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":0,"CurrentNodeCount":0}', 'model': 'unet_xception', 'augment': 'False', 'resize': '64'}


Converting non-string tag to string: (augment: False)
Converting non-string tag to string: (resize: 80)


Submitted Run : olden_carnival_nj4xmtxn
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":0,"CurrentNodeCount":0}', 'model': 'unet_xception', 'augment': 'False', 'resize': '80'}


Converting non-string tag to string: (augment: False)
Converting non-string tag to string: (resize: 128)


Submitted Run : jolly_lunch_9p9zwt20
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":0,"CurrentNodeCount":0}', 'model': 'unet_xception', 'augment': 'False', 'resize': '128'}


Converting non-string tag to string: (augment: False)
Converting non-string tag to string: (resize: 160)


Submitted Run : blue_vulture_c16pxqwj
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":0,"CurrentNodeCount":0}', 'model': 'unet_xception', 'augment': 'False', 'resize': '160'}


Converting non-string tag to string: (augment: False)
Converting non-string tag to string: (resize: 256)


Submitted Run : funny_shirt_swcgbd7d
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":0,"CurrentNodeCount":0}', 'model': 'unet_xception', 'augment': 'False', 'resize': '256'}


Converting non-string tag to string: (augment: False)
Converting non-string tag to string: (resize: 320)


Submitted Run : red_quince_kn4j0c37
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":0,"CurrentNodeCount":0}', 'model': 'unet_xception', 'augment': 'False', 'resize': '320'}


Converting non-string tag to string: (augment: False)
Converting non-string tag to string: (resize: 512)


Submitted Run : plucky_diamond_whgsn2q9
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"resizing","PreparingNodeCount":0,"RunningNodeCount":0,"CurrentNodeCount":0}', 'model': 'unet_xception', 'augment': 'False', 'resize': '512'}


Converting non-string tag to string: (augment: True)
Converting non-string tag to string: (resize: 64)


Submitted Run : ashy_battery_hd0gsc9y
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"resizing","PreparingNodeCount":0,"RunningNodeCount":0,"CurrentNodeCount":0}', 'model': 'unet_xception', 'augment': 'True', 'resize': '64'}


Converting non-string tag to string: (augment: True)
Converting non-string tag to string: (resize: 80)


Submitted Run : lime_grass_yrs16jhr
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"resizing","PreparingNodeCount":0,"RunningNodeCount":0,"CurrentNodeCount":0}', 'model': 'unet_xception', 'augment': 'True', 'resize': '80'}


Converting non-string tag to string: (augment: True)
Converting non-string tag to string: (resize: 128)


Submitted Run : wheat_gold_zgd4x9k6
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"resizing","PreparingNodeCount":0,"RunningNodeCount":0,"CurrentNodeCount":0}', 'model': 'unet_xception', 'augment': 'True', 'resize': '128'}


Converting non-string tag to string: (augment: True)
Converting non-string tag to string: (resize: 160)


Submitted Run : quirky_date_l269d3p9
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"resizing","PreparingNodeCount":0,"RunningNodeCount":0,"CurrentNodeCount":0}', 'model': 'unet_xception', 'augment': 'True', 'resize': '160'}


Converting non-string tag to string: (augment: True)
Converting non-string tag to string: (resize: 256)


Submitted Run : lime_chicken_rtg90k0s
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"resizing","PreparingNodeCount":0,"RunningNodeCount":0,"CurrentNodeCount":0}', 'model': 'unet_xception', 'augment': 'True', 'resize': '256'}


Converting non-string tag to string: (augment: True)
Converting non-string tag to string: (resize: 320)


Submitted Run : good_camera_xl16rpyn
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"resizing","PreparingNodeCount":0,"RunningNodeCount":0,"CurrentNodeCount":0}', 'model': 'unet_xception', 'augment': 'True', 'resize': '320'}


Converting non-string tag to string: (augment: True)
Converting non-string tag to string: (resize: 512)


Submitted Run : blue_beet_0z1grs3s
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"resizing","PreparingNodeCount":0,"RunningNodeCount":0,"CurrentNodeCount":0}', 'model': 'unet_xception', 'augment': 'True', 'resize': '512'}
