In [None]:
# Check core SDK version number
import azureml.core

print("SDK version:", azureml.core.VERSION)

In [None]:
from azureml.telemetry import set_diagnostics_collection

set_diagnostics_collection(send_diagnostics=True)

In [None]:
from azureml.core.workspace import Workspace

ws = Workspace.from_config()
print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

In [None]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# choose a name for your cluster
cluster_name = "compute-cluster"

try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing compute target')
except ComputeTargetException:
    print('Creating a new compute target...')
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', 
                                                           max_nodes=4)

    # create the cluster
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

# can poll for a minimum number of nodes and for a specific timeout. 
# if no min node count is provided it uses the scale settings for the cluster
compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)

# use get_status() to get a detailed status for the current cluster. 
print(compute_target.get_status().serialize())

In [None]:

import os

project_folder = './capstone-project'
os.makedirs(project_folder, exist_ok=True)

In [None]:
import shutil

shutil.copy('train.py', project_folder)

# Create an experiment

In [None]:
from azureml.core import Experiment

experiment_name = 'House_Price_Predication'
experiment = Experiment(ws, name=experiment_name)

# Dataset

In [None]:
# from azureml.core.authentication import ServicePrincipalAuthentication
# svc_pr_password = os.environ.get("AZUREML_PASSWORD")

# svc_pr = ServicePrincipalAuthentication(
#     tenant_id="",
#     service_principal_id="",
#     service_principal_password="")


# ws = Workspace(subscription_id="",
#                resource_group="azureML",
#                workspace_name="creditcard",
#                auth=svc_pr)

# print("Found workspace {} at location {}".format(ws.name, ws.location))


# # ws = Workspace.from_config()
# # ws.auth=svc_pr
# experiment_name = 'House_Price_Predication'
# experiment=Experiment(ws, experiment_name)

# datastore=ws.get_default_datastore()
# dataset=Dataset.Tabular.from_delimited_files(datastore.path('UI/02-09-2021_034445_UTC/BankChurners.csv'))


# os.makedirs('data',exist_ok=True)
# local_path='data/prepared_data.csv'
# workspace=Workspace(ws.subscription_id,ws.resource_group,ws._workspace_name)
# # x_df=dataset.to_pandas_dataframe().head()
# # cat_cols=[col for col in x_df if x_df[col].dtype =='O']
# # num_cols=[col for col in x_df if x_df[col].dtype !='O']
# # pd.get_dummies(x_df).columns

## Create an environment
Define a conda environment YAML file with your training script dependencies and create an Azure ML environment.

In [None]:
%%writefile conda_dependencies.yml

# Conda environment specification. The dependencies defined in this file will
# be automatically provisioned for runs with userManagedDependencies=False.

# Details about the Conda environment file format:
# https://conda.io/docs/user-guide/tasks/manage-environments.html#create-env-file-manually

name: project_environment
dependencies:
  # The python interpreter version.
  # Currently Azure ML only supports 3.5.2 and later.
- python=3.6.2

- pip:
  - azureml-train-automl-runtime==1.21.0
  - inference-schema
  - azureml-interpret==1.21.0
  - azureml-defaults==1.21.0
- numpy>=1.16.0,<1.19.0
- pandas==0.25.1
- scikit-learn==0.22.1
- xgboost<=1.3.3
- psutil>=5.2.2,<6.0.0
channels:
- anaconda
- conda-forge

In [None]:
from azureml.core import Environment

env = Environment.from_conda_specification(name = 'capstone-project-env', file_path = './conda_dependencies.yml')

In [None]:
from azureml.core import ScriptRunConfig

src = ScriptRunConfig(source_directory=project_folder,
                      script='train.py',
                      arguments=['--max_depth', '5'
                                 ,'--learning_rate',  0.1
                                 ,'--colsample_bytree',0.3
                                 ,'--alpha',10
                                 ,'--n_estimators',10],
                      compute_target=compute_target,
                      environment=env)

In [None]:
run = experiment.submit(src)

# Monitor The Run

In [None]:
from azureml.widgets import RunDetails

RunDetails(run).show()
#run.wait_for_completion(show_output=True)

# Tune model hyperparameters

In [None]:
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.parameter_expressions import choice,uniform, randint
from azureml.train.hyperdrive.policy import BanditPolicy, MedianStoppingPolicy
from azureml.train.hyperdrive.parameter_expressions import uniform, randint, choice
from sklearn.metrics import mean_squared_error,accuracy_score

early_termination_policy = BanditPolicy(slack_factor=0.01)
another_early_termination_policy = MedianStoppingPolicy(evaluation_interval=1, delay_evaluation=5)

## Hyper Parameter Optimization
hyperparameter_grid = RandomParameterSampling({
    '--max_depth':choice(2, 3, 5, 10),
    '--learning_rate':choice(0.05,0.1,0.15,0.20),
    '--colsample_bytree':choice(0.3,0.5,0.7,0.9),
    '--alpha':choice(10,20,30,40),
    '--n_estimators':choice(100, 500, 900, 1100)
    }
)
                
hyperdrive_config = HyperDriveConfig(run_config=src,
                                     hyperparameter_sampling=hyperparameter_grid, 
                                     primary_metric_name='mean_squared_error',
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                     policy=early_termination_policy,
                                     max_total_runs=10,
                                     max_concurrent_runs=5)

In [None]:
# start the HyperDrive run
hyperdrive_run = experiment.submit(hyperdrive_config)

# Monitor HyperDrive runs

In [None]:
RunDetails(hyperdrive_run).show()

In [None]:
hyperdrive_run.wait_for_completion(show_output=True)

In [None]:
assert(hyperdrive_run.get_status() == "Completed")

In [None]:
hyperdrive_run.id

# Find and register best model

In [None]:
# Get your best run and save the model from that run.
best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
parameter_values = best_run.get_details()['runDefinition']['arguments']
best_run

In [None]:
best_hyperdrive_model = best_run.register_model(model_name="best_hyperdrive_model.pkl"
                                                ,model_path='./outputs/'  
 )

In [None]:
best_hyperdrive_model

# Retrieve the Best Model

In [None]:
# #best_run.download_file("./best_hyperdrive_model.pkl", "./best_hyperdrive_model.pkl")
# import joblib

# joblib.dump(best_run, 'best_hyperdrive_model.pkl')

In [None]:
from azureml.core.webservice import AciWebservice
from azureml.core.webservice import Webservice
from azureml.core.model import InferenceConfig
from azureml.core.environment import Environment
#from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies

In [59]:
# register best AutoML model for future deployment
from azureml.core.model import Model
description = 'AutoML Model trained on the House Price Predication'
tags = {'area': 'data science beginners', 'type': 'Regression'}

automl_model = Model.register(workspace =ws,
                              model_name = 'House_Price_MLmodel',
                              model_path = 'best_hyperdrive_model.pkl',
                             description = description, tags = tags)

automl_model

Registering model House_Price_MLmodel


Model(workspace=Workspace.create(name='quick-starts-ws-139482', subscription_id='3d1a56d2-7c81-4118-9790-f85d1acf0c77', resource_group='aml-quickstarts-139482'), name=House_Price_MLmodel, id=House_Price_MLmodel:3, version=3, tags={'area': 'data science beginners', 'type': 'Regression'}, properties={})

In [60]:
env = Environment.get(workspace=ws, name='AzureML-AutoML')

In [61]:
env.save_to_directory('./environ', overwrite=True)

In [62]:

#Chekc environment dependencies
print("packages", env.python.conda_dependencies.serialize_to_string())

#get the environment Details and stored them into a file:
f = open("env.yml", "w")
f.write(env.python.conda_dependencies.serialize_to_string())
f.close()

packages channels:
- anaconda
- conda-forge
- pytorch
dependencies:
- python=3.6.2
- pip=20.2.4
- pip:
  - azureml-core==1.23.0
  - azureml-pipeline-core==1.23.0
  - azureml-telemetry==1.23.0
  - azureml-defaults==1.23.0
  - azureml-interpret==1.23.0
  - azureml-automl-core==1.23.0
  - azureml-automl-runtime==1.23.0
  - azureml-train-automl-client==1.23.0
  - azureml-train-automl-runtime==1.23.0
  - azureml-dataset-runtime==1.23.0
  - azureml-mlflow==1.23.0
  - inference-schema
  - py-cpuinfo==5.0.0
  - boto3==1.15.18
  - botocore==1.18.18
- numpy~=1.18.0
- scikit-learn==0.22.1
- pandas~=0.25.0
- py-xgboost<=0.90
- fbprophet==0.5
- holidays==0.9.11
- setuptools-git
- psutil>5.0.0,<6.0.0
name: azureml_661474bbe74e96b5d8added5888dfc85



In [63]:
aci_config = AciWebservice.deploy_configuration(
            cpu_cores=1,
            memory_gb=4, 
            enable_app_insights=True,
            auth_enabled=True,
            tags={"data":"house price regression"},
            description='house price regression Model',
            )

In [64]:
inference_config = InferenceConfig(entry_script='score.py', environment=env)
inference_config

InferenceConfig(entry_script=score.py, runtime=None, conda_file=None, extra_docker_file_steps=None, source_directory=None, enable_gpu=None, base_image=None, base_image_registry=<azureml.core.container_registry.ContainerRegistry object at 0x7fb0ac67c358>)

In [None]:
service_name = 'house-price-ml-service'

model = Model(ws,name='House_Price_MLmodel')
service = Model.deploy(workspace=ws,
                       name=service_name,
                       models=[model],
                       inference_config=inference_config,
                       deployment_config=aci_config,
                       overwrite=True)

service.wait_for_deployment(show_output=True)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running...........................................

In [None]:
service.get_logs()

In [None]:
# print service state
print(service.state)
# print scoring URI
print('scoring URI: ' + service.scoring_uri)
# print Swagger URI
print('Swagger URI: ' + service.swagger_uri)
# retrieve authentication keys
primary, secondary = service.get_keys()
# print primary authenticaton key
print('Primary Authentication Key: ' + primary)

In [None]:
#Store the uri's in variables:
scoring_uri = 'http://c21f86a8-ecdb-43a3-a25e-2887c610f5b2.southcentralus.azurecontainer.io/score'

key = 'ZwRGMEfMrOaSy8dMLO4uhGnLAfK5PVkC'

# Consume the Endpoint

In [None]:
#let's test requests:
import json
import requests

scoring_uri = scoring_uri
key = key

headers = {'Content-Type':'application/json'}
headers['Authorization'] = f'Bearer {key}'

# Convert to JSON string
input_data = dataset.to_json()

# Make the request and display the response
resp = requests.post(scoring_uri, input_data, headers=headers)
print(resp.text)

#load the returned prediction and read it into a pandas dataframe
pred = json.loads(resp.text)
pred = pd.read_json(pred)

# Test the Model

# Load Test Data

In [None]:
dataset_test = Dataset.Tabular.from_delimited_files(path='https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/bankmarketing_train.csv')
df_test = dataset_test.to_pandas_dataframe()
df_test = df_test[pd.notnull(df_test['y'])]

y_test = df_test['y']
X_test = df_test.drop(['y'], axis=1)

# Testing Our Best Fitted Model