In [19]:
import azureml.core
from azureml.core import Workspace, Experiment, RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.runconfig import DataReferenceConfiguration
from azureml.core.script_run_config import ScriptRunConfig
from azureml.widgets import RunDetails
import dotenv
%load_ext dotenv
%dotenv
import os

The dotenv extension is already loaded. To reload it, use:
  %reload_ext dotenv


In [2]:
ws = Workspace.from_config()

If you run your code in unattended mode, i.e., where you can't give a user input, then we recommend to use ServicePrincipalAuthentication or MsiAuthentication.
Please refer to aka.ms/aml-notebook-auth for different authentication mechanisms in azureml-sdk.


In [3]:
blob_datastore = ws.get_default_datastore()

In [17]:
blob_datastore.upload_files(['./dir_placeholder.txt'], target_path='bsdata', overwrite=True)

Uploading ./dir_placeholder.txt
Uploaded ./dir_placeholder.txt, 1 files out of an estimated total of 1


$AZUREML_DATAREFERENCE_6e9a622820104c71b39326acc1ea6dca

In [4]:
from azureml.core.compute import AmlCompute
from azureml.core.compute_target import ComputeTargetException

aml_compute_target = "cpucluster"
try:
    aml_compute = AmlCompute(ws, aml_compute_target)
    print("found existing compute target.")
except ComputeTargetException:
    print("creating new compute target")
    
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = "STANDARD_D2_V2",
                                                                min_nodes = 2, 
                                                                max_nodes = 4)    
    aml_compute = ComputeTarget.create(ws, aml_compute_target, provisioning_config)
    aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
    
print("Aml Compute attached")

found existing compute target.
Aml Compute attached


In [5]:
dataref_config = DataReferenceConfiguration(datastore_name=blob_datastore.name, 
                   path_on_datastore='bsdata', 
                   mode='mount', # download files from datastore to compute target
                   overwrite=True)

In [13]:
run_config = RunConfiguration(framework="PySpark")

# enable Docker 
run_config.environment.docker.enabled = True

# use conda_dependencies.yml to create a conda environment in the Docker image for execution
run_config.environment.python.user_managed_dependencies = False

# auto-prepare the Docker image when used for execution (if it is not already prepared)
run_config.auto_prepare_environment = True

run_config.environment.docker.base_image = "microsoft/mmlspark:0.12"

run_config.target = aml_compute
run_config.environment.python.conda_dependencies = CondaDependencies.create(conda_packages=['pandas'])
run_config.node_count = 2

run_config.environment.spark.repositories = []
run_config.environment.spark.packages = []

run_config.data_references = {blob_datastore.name: dataref_config}

In [36]:
script_run_config = ScriptRunConfig(source_directory=".",
                                    script="test_table.py",
                                    run_config=run_config,
                                    arguments=["--data_dir",str(blob_datastore.as_mount())])

run = Experiment(ws, 'script_run').submit(script_run_config)
run

Experiment,Id,Type,Status,Details Page,Docs Page
script_run,script_run_1558623665_faf9b914,azureml.scriptrun,Queued,Link to Azure Portal,Link to Documentation


In [37]:
RunDetails(run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…