In [9]:
import os

from azureml.core import Environment, Experiment, ScriptRunConfig, Workspace
from azureml.core.runconfig import DockerConfiguration
from dotenv import load_dotenv

load_dotenv()
AZURE_SUBSCRIPTION_ID = os.getenv("AZURE_SUBSCRIPTION_ID")
AZURE_RESOURCE_GROUP = os.getenv("AZURE_RESOURCE_GROUP")
AZURE_WORKSPACE_NAME = os.getenv("AZURE_WORKSPACE_NAME")


In [10]:
experiment_name = "oc-p8-experiment-1"
model_choices = ["unet_xception"]  # ["unet_xception"]
augment_choices = [False, True]  # [False, True]
resize_choices = [
    64,
    80,
    128,
    160,
    256,
    320,
    512,
    640,
]  # [64, 80, 128, 160, 256, 320, 512, 640, 800, 1024]
batch_size_per_resize = {
    64: 128,
    80: 128,
    128: 64,
    160: 64,
    256: 32,
    320: 32,
    512: 16,
    640: 8,
    800: 4,
    1024: 2,
}

source_directory = "./train"
train_script = "train.py"
compute_target = "ocp8-cluster-gpu"


# connect to your workspace
ws = Workspace(
    subscription_id=AZURE_SUBSCRIPTION_ID,
    resource_group=AZURE_RESOURCE_GROUP,
    workspace_name=AZURE_WORKSPACE_NAME,
)


try:
    env = Environment.get(workspace=ws, name=experiment_name)
except:
    env = Environment(name=experiment_name)
    env.docker.base_image = None
    env.docker.base_dockerfile = "./train/Dockerfile"
    env.python.user_managed_dependencies = True
    env.register(workspace=ws)

for resize in resize_choices:
    for model in model_choices:
        for augment in augment_choices:
            batch_size = batch_size_per_resize[resize]
            run_config = ScriptRunConfig(
                source_directory=source_directory,
                script=train_script,
                arguments=[
                    "--experiment",
                    experiment_name,
                    "--model",
                    model,
                    "--resize",
                    resize,
                    "--batch",
                    batch_size,
                    "--augment" if augment else "--no-augment",
                ],
                compute_target=compute_target,
                environment=env,
                docker_runtime_config=DockerConfiguration(
                    use_docker=True,
                    arguments=[
                        # see : https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.data.filedataset?view=azure-ml-py#azureml-data-filedataset-mount
                        "--cap-add",
                        "SYS_ADMIN",
                        "--device",
                        "/dev/fuse",
                    ],
                ),
            )

            # create an experiment
            exp = Experiment(workspace=ws, name=experiment_name)

            # submit the run configuration to start the job
            run = exp.submit(run_config)
            run.tag("model", model)
            run.tag("augment", augment)
            run.tag("resize", resize)

            print(f"Submitted Run : {run.display_name}")
            print(f"Tags : {run.get_tags()}")


Converting non-string tag to string: (augment: False)
Converting non-string tag to string: (resize: 64)


Submitted Run : amiable_star_7mw8w411
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'unet_xception', 'augment': 'False', 'resize': '64'}


Converting non-string tag to string: (augment: True)
Converting non-string tag to string: (resize: 64)


Submitted Run : joyful_date_bjmn9z4b
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'unet_xception', 'augment': 'True', 'resize': '64'}


Converting non-string tag to string: (augment: False)
Converting non-string tag to string: (resize: 80)


Submitted Run : bubbly_holiday_x9800pyr
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'unet_xception', 'augment': 'False', 'resize': '80'}


Converting non-string tag to string: (augment: True)
Converting non-string tag to string: (resize: 80)


Submitted Run : plucky_grass_7lb87dx5
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'unet_xception', 'augment': 'True', 'resize': '80'}


Converting non-string tag to string: (augment: False)
Converting non-string tag to string: (resize: 128)


Submitted Run : bright_hook_rhcwk1qg
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'unet_xception', 'augment': 'False', 'resize': '128'}


Converting non-string tag to string: (augment: True)
Converting non-string tag to string: (resize: 128)


Submitted Run : tender_gyro_6qh065b8
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'unet_xception', 'augment': 'True', 'resize': '128'}


Converting non-string tag to string: (augment: False)
Converting non-string tag to string: (resize: 160)


Submitted Run : funny_giraffe_pv0g8zbz
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'unet_xception', 'augment': 'False', 'resize': '160'}


Converting non-string tag to string: (augment: True)
Converting non-string tag to string: (resize: 160)


Submitted Run : bubbly_berry_q9sfxjwx
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'unet_xception', 'augment': 'True', 'resize': '160'}


Converting non-string tag to string: (augment: False)
Converting non-string tag to string: (resize: 256)


Submitted Run : gentle_sock_dccs8h6v
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'unet_xception', 'augment': 'False', 'resize': '256'}


Converting non-string tag to string: (augment: True)
Converting non-string tag to string: (resize: 256)


Submitted Run : cyan_rocket_dhc2qsf7
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'unet_xception', 'augment': 'True', 'resize': '256'}


Converting non-string tag to string: (augment: False)
Converting non-string tag to string: (resize: 320)


Submitted Run : crimson_lemon_3pwmbb9h
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'unet_xception', 'augment': 'False', 'resize': '320'}


Converting non-string tag to string: (augment: True)
Converting non-string tag to string: (resize: 320)


Submitted Run : polite_puppy_qcw88ph4
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'unet_xception', 'augment': 'True', 'resize': '320'}


Converting non-string tag to string: (augment: False)
Converting non-string tag to string: (resize: 512)


Submitted Run : good_energy_2235n7k0
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'unet_xception', 'augment': 'False', 'resize': '512'}


Converting non-string tag to string: (augment: True)
Converting non-string tag to string: (resize: 512)


Submitted Run : jolly_frame_fnwd2xc8
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'unet_xception', 'augment': 'True', 'resize': '512'}


Converting non-string tag to string: (augment: False)
Converting non-string tag to string: (resize: 640)


Submitted Run : kind_rhythm_vmfxrfjn
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'unet_xception', 'augment': 'False', 'resize': '640'}


Converting non-string tag to string: (augment: True)
Converting non-string tag to string: (resize: 640)


Submitted Run : coral_cassava_xfj767r4
Tags : {'_aml_system_ComputeTargetStatus': '{"AllocationState":"steady","PreparingNodeCount":0,"RunningNodeCount":1,"CurrentNodeCount":1}', 'model': 'unet_xception', 'augment': 'True', 'resize': '640'}
