In [1]:
# Connect to workspace
import azureml.core
from azureml.core import Workspace

# Load the workspace from the saved config file
ws = Workspace.from_config()
print('Ready to use Azure ML {} to work with {}'.format(azureml.core.VERSION, ws.name))

Ready to use Azure ML 1.13.0 to work with seed


In [2]:
import os, shutil

# Create a folder for the experiment files
training_folder = 'house-price-training-AML'
os.makedirs(training_folder, exist_ok=True)

# Copy the data file into the experiment folder
shutil.copy('train.csv', os.path.join(training_folder, "train.csv"))

'house-price-training-AML/train.csv'

In [3]:
# Create and register a dataset in the datastore
from azureml.core import Dataset

default_ds = ws.get_default_datastore() # Setting default datastore

if 'house-price' not in ws.datasets:
    default_ds.upload_files(files = ('train.csv') , # uploading the train dataset
    target_path = 'house-price-data/', # Put in a folder path in datastore
    overwrite = True,
    show_progress=True)
    
    # create a tabular dataset from the path on the datastore
    tab_data_set = Dataset.Tabular.from_delimited_files(path=(default_ds,'house-price-data/*.csv'))
    
    # Register the tabular dataset
    try:
        tab_data_set = tab_data_set.register(workspace = ws,
                                            name = 'house-price',
                                            description = 'house price dataset',
                                            tags = {'format':'csv'},
                                            create_new_version=True)
        print('House price dataset was created')
    except Exception as ex:
            print(ex)
else:
        print('House price dataset was registered already')
        


House price dataset was registered already


In [4]:
# Creating a python Environment for the house training experiment

from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies

house_price_env = Environment("house-price-experiment-env")
house_price_env.python.user_managed_dependencies = False # Let Azure manage dependencies
house_price_env.docker.enabled = True # use a a docker container

# Create a set of package dependencies
house_price_packages = CondaDependencies.create(conda_packages=['scikit-learn'],
                                               pip_packages=['azureml-defaults','azureml-dataprep[pandas]'])
# Add the dependencies to the enviroment
house_price_env.python.conda_dependencies = house_price_packages

print("house_price_env", ' defined.')

# Register the environment
house_price_env.register(workspace=ws)

house_price_env  defined.


{
    "databricks": {
        "eggLibraries": [],
        "jarLibraries": [],
        "mavenLibraries": [],
        "pypiLibraries": [],
        "rcranLibraries": []
    },
    "docker": {
        "arguments": [],
        "baseDockerfile": null,
        "baseImage": "mcr.microsoft.com/azureml/intelmpi2018.3-ubuntu16.04:20200821.v1",
        "baseImageRegistry": {
            "address": null,
            "password": null,
            "registryIdentity": null,
            "username": null
        },
        "enabled": true,
        "platform": {
            "architecture": "amd64",
            "os": "Linux"
        },
        "sharedVolumes": true,
        "shmSize": null
    },
    "environmentVariables": {
        "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
    },
    "inferencingStackVersion": null,
    "name": "house-price-experiment-env",
    "python": {
        "baseCondaEnvironment": null,
        "condaDependencies": {
            "channels": [
                "anaconda",
                

In [5]:
# Creating an cluster
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

cluster_name = "cluster-ml"

try:
    # Check for existing compute target
    training_cluster = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    # If it doesn't already exist, create it
    try:
        compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS11_V2', max_nodes=2)
        training_cluster = ComputeTarget.create(ws, cluster_name, compute_config)
        training_cluster.wait_for_completion(show_output=True)
    except Exception as ex:
        print(ex)

Found existing cluster, use it.


In [6]:
from azureml.train.estimator import Estimator
from azureml.core import Experiment


# Get the training dataset
house_price_ds = ws.datasets.get('house-price')


# Create an estimator
estimator = Estimator(source_directory=training_folder,
                      entry_script='house-price-training-Forest.ipynb',
                      inputs=[house_price_ds.as_named_input('house_price')],
                      compute_target = 'local',
                      environment_definition = house_price_env
                      )

# Create an experiment
experiment_name = 'house-price-training-AML'
experiment = Experiment(workspace = ws, name = experiment_name)

# Run the experiment based on the estimator
run = experiment.submit(config=estimator)
run.wait_for_completion(show_output=False)



{'runId': 'house-price-training-AML_1599986771_73e96ca8',
 'target': 'local',
 'status': 'Completed',
 'startTimeUtc': '2020-09-13T08:46:20.021298Z',
 'endTimeUtc': '2020-09-13T08:47:25.893949Z',
 'properties': {'_azureml.ComputeTargetType': 'local',
  'ContentSnapshotId': '39b60269-2577-4dd5-a489-b0ba89197dc3'},
 'inputDatasets': [{'dataset': {'id': '9e5e5d0a-1248-492b-997d-25323880b8de'}, 'consumptionDetails': {'type': 'RunInput', 'inputName': 'house_price', 'mechanism': 'Direct'}}],
 'outputDatasets': [],
 'runDefinition': {'script': 'papermill_notebook_run_handler.py',
  'scriptType': None,
  'useAbsolutePath': False,
  'arguments': ['-i',
   'house-price-training-Forest.ipynb',
   '-o',
   './outputs/house-price-training-Forest.output.ipynb',
   '-e',
   '{"history": true, "timeout": null}',
   '-p',
   '{"engine_name": "azureml_engine"}',
   '-n',
   '{}'],
  'sourceDirectoryDataStore': None,
  'framework': 'Python',
  'communicator': 'None',
  'target': 'local',
  'dataReference

In [7]:
from azureml.widgets import RunDetails

RunDetails(run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

In [8]:
# Get logged metrics
metrics = run.get_metrics()
for key in metrics.keys():
        print(key, metrics.get(key))
print('\n')
for file in run.get_file_names():
    print(file)

max tree depth 10.0
Num of trees 100
Agorithm Random Forest
Root Mean square error 27177.315460774666
Mean square error 738606475.6544616
R^2 0.9748302662681689


azureml-logs/60_control_log.txt
azureml-logs/70_driver_log.txt
logs/azureml/8_azureml.log
logs/azureml/dataprep/backgroundProcess.log
logs/azureml/dataprep/backgroundProcess_Telemetry.log
logs/azureml/dataprep/engine_spans_l_4925e709-3d74-49d3-b0d2-640da1dfd32f.jsonl
logs/azureml/dataprep/python_span_l_4925e709-3d74-49d3-b0d2-640da1dfd32f.jsonl
outputs/house-price-training-Forest.output.ipynb
outputs/house_price_Forest_model.pkl


In [17]:
# Register model
from azureml.core import Model

run.register_model(model_path = 'outputs/house_price_Forest_model.pkl', model_name = 'house_price_Forest_model',
                       tags = {'Model':'Random Forest'},
                       properties = {'RMSE':metrics.get('Root Mean square error')})

Model(workspace=Workspace.create(name='seed', subscription_id='347c4df8-4298-408d-b3b2-98cb005c49a0', resource_group='mlseed'), name=house_price_Forest_model, id=house_price_Forest_model:2, version=2, tags={'Model': 'Random Forest'}, properties={'RMSE': '27177.315460774666'})

In [None]:
service_name = 'house-price-service'
service = Model.deploy(ws, service_name, [model], overwrite=True)
service.wait_for_deployment(show_output=True)