In [1]:
# Check core SDK version number
import azureml.core

print("SDK version:", azureml.core.VERSION)

SDK version: 1.22.0


In [2]:
from azureml.telemetry import set_diagnostics_collection

set_diagnostics_collection(send_diagnostics=True)

Turning diagnostics collection on. 


In [3]:
from azureml.core.workspace import Workspace

ws = Workspace.from_config()
print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

Workspace name: quick-starts-ws-139721
Azure region: southcentralus
Subscription id: 9b72f9e6-56c5-4c16-991b-19c652994860
Resource group: aml-quickstarts-139721


In [4]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# choose a name for your cluster
cluster_name = "compute-cluster"

try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing compute target')
except ComputeTargetException:
    print('Creating a new compute target...')
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', 
                                                           max_nodes=4)

    # create the cluster
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

# can poll for a minimum number of nodes and for a specific timeout. 
# if no min node count is provided it uses the scale settings for the cluster
compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)

# use get_status() to get a detailed status for the current cluster. 
print(compute_target.get_status().serialize())

Found existing compute target
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned
{'currentNodeCount': 0, 'targetNodeCount': 0, 'nodeStateCounts': {'preparingNodeCount': 0, 'runningNodeCount': 0, 'idleNodeCount': 0, 'unusableNodeCount': 0, 'leavingNodeCount': 0, 'preemptedNodeCount': 0}, 'allocationState': 'Steady', 'allocationStateTransitionTime': '2021-03-02T17:33:18.607000+00:00', 'errors': None, 'creationTime': '2021-03-02T17:11:16.631284+00:00', 'modifiedTime': '2021-03-02T17:11:31.985785+00:00', 'provisioningState': 'Succeeded', 'provisioningStateTransitionTime': None, 'scaleSettings': {'minNodeCount': 0, 'maxNodeCount': 4, 'nodeIdleTimeBeforeScaleDown': 'PT120S'}, 'vmPriority': 'Dedicated', 'vmSize': 'STANDARD_D2_V2'}


In [5]:

import os

project_folder = './capstone-project'
os.makedirs(project_folder, exist_ok=True)

In [6]:
import shutil

shutil.copy('train.py', project_folder)

'./capstone-project/train.py'

# Create an experiment

In [7]:
from azureml.core import Experiment

experiment_name = 'House_Price_Predication'
experiment = Experiment(ws, name=experiment_name)

# Dataset

In [8]:
import azureml.core
from azureml.core import Workspace, Datastore,Dataset
from azureml.data.dataset_factory import TabularDatasetFactory
from azureml.core.run import Run


# subscription_id = '9b72f9e6-56c5-4c16-991b-19c652994860'
# resource_group = 'aml-quickstarts-139721'
# workspace_name = 'quick-starts-ws-139721'

# workspace = Workspace(subscription_id, resource_group, workspace_name)
ws = Workspace.from_config()
#ws

# get the name of defult Datastore associated with the workspace.
default_dsname = ws.get_default_datastore().name
default_ds = ws.get_default_datastore()
print('default Datastore = ', default_dsname)

#Upload the files in default datastore
default_ds.upload_files(files=['./house-price-train-data.csv']
                        ,target_path='capstoneproject/'
                        ,overwrite=True, show_progress=True)

flower_data_ref = default_ds.path('capstoneproject').as_download('ex_capstoneproject')
print('reference_path = ',flower_data_ref)


# Creating tabular dataset from files in datastore.
tab_dataset = Dataset.Tabular.from_delimited_files(path=(default_ds,'/capstoneproject/*.csv'))
#tab_dataset.take(10).to_pandas_dataframe()

# register tabular dataset in Workspace
tab_dataset = tab_dataset.register(workspace=ws, 
                                   name='house price train data', 
                                   description='Kaggle house price Dataset in tabular format', 
                                   tags={'format':'CSV'}, create_new_version=True)

# Get the data Using Dataset name which is registered.
dataset = Dataset.get_by_name(ws, name='house price train data')
ds=dataset.to_pandas_dataframe()
ds.shape

## Create an environment
Define a conda environment YAML file with your training script dependencies and create an Azure ML environment.

In [9]:
%%writefile conda_dependencies.yml

# Conda environment specification. The dependencies defined in this file will
# be automatically provisioned for runs with userManagedDependencies=False.

# Details about the Conda environment file format:
# https://conda.io/docs/user-guide/tasks/manage-environments.html#create-env-file-manually

name: project_environment
dependencies:
  # The python interpreter version.
  # Currently Azure ML only supports 3.5.2 and later.
- python=3.6.2

- pip:
  - azureml-train-automl-runtime==1.21.0
  - inference-schema
  - azureml-interpret==1.21.0
  - azureml-defaults==1.21.0
- numpy>=1.16.0,<1.19.0
- pandas==0.25.1
- scikit-learn==0.22.1
- xgboost<=1.3.3
- psutil>=5.2.2,<6.0.0
channels:
- anaconda
- conda-forge

Overwriting conda_dependencies.yml


In [10]:
from azureml.core import Environment

env = Environment.from_conda_specification(name = 'capstone-project-env', file_path = './conda_dependencies.yml')

In [11]:
from azureml.core import ScriptRunConfig

src = ScriptRunConfig(source_directory=project_folder,
                      script='train.py',
                      arguments=['--max_depth', '5'
                                 ,'--learning_rate',  0.1
                                 ,'--colsample_bytree',0.3
                                 ,'--alpha',10
                                 ,'--n_estimators',10],
                      compute_target=compute_target,
                      environment=env)

In [12]:
run = experiment.submit(src)

# Monitor The Run

In [13]:
from azureml.widgets import RunDetails

RunDetails(run).show()
#run.wait_for_completion(show_output=True)

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': True, 'log_level': 'INFO', 's…

# Tune model hyperparameters

In [None]:
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.parameter_expressions import choice,uniform, randint
from azureml.train.hyperdrive.policy import BanditPolicy, MedianStoppingPolicy
from azureml.train.hyperdrive.parameter_expressions import uniform, randint, choice
from sklearn.metrics import mean_squared_error,accuracy_score

early_termination_policy = BanditPolicy(slack_factor=0.01)
another_early_termination_policy = MedianStoppingPolicy(evaluation_interval=1, delay_evaluation=5)

## Hyper Parameter Optimization
hyperparameter_grid = RandomParameterSampling({
    '--max_depth':choice(2, 3, 5, 10),
    '--learning_rate':choice(0.05,0.1,0.15,0.20),
    '--colsample_bytree':choice(0.3,0.5,0.7,0.9),
    '--alpha':choice(10,20,30,40),
    '--n_estimators':choice(100, 500, 900, 1100)
    }
)
                
hyperdrive_config = HyperDriveConfig(run_config=src,
                                     hyperparameter_sampling=hyperparameter_grid, 
                                     primary_metric_name='mean_squared_error',
                                     primary_metric_goal=PrimaryMetricGoal.MINIMIZE,
                                     policy=early_termination_policy,
                                     max_total_runs=10,
                                     max_concurrent_runs=5)

In [None]:
# start the HyperDrive run
hyperdrive_run = experiment.submit(hyperdrive_config)

# Monitor HyperDrive runs

In [None]:
RunDetails(hyperdrive_run).show()

In [None]:
hyperdrive_run.wait_for_completion(show_output=True)

In [None]:
assert(hyperdrive_run.get_status() == "Completed")

In [None]:
hyperdrive_run.id

# Find and register best model

In [None]:
# Get your best run and save the model from that run.
best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
parameter_values = best_run.get_details()['runDefinition']['arguments']
best_run

# Retrieve the Best Model

In [None]:
import joblib
best_hyperdrive_model = best_run.register_model(
    model_name="House_Price_Model",
    model_path='outputs/house_price_model.pkl',
   
)
best_run.download_file("outputs/house_price_model.pkl", "outputs/house_price_model.pkl")

# Model Deployment

In [None]:
from azureml.core.webservice import AciWebservice
from azureml.core.webservice import Webservice
from azureml.core.model import InferenceConfig
from azureml.core.environment import Environment
#from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.model import Model

In [None]:
aci_config = AciWebservice.deploy_configuration(
            cpu_cores=1,
            memory_gb=4, 
            enable_app_insights=True,
            auth_enabled=True,
            tags={"data":"house price regression"},
            description='house price regression Model',
            )

In [None]:
inference_config = InferenceConfig(entry_script='score.py', environment=env)
inference_config

In [None]:
service = Model.deploy(workspace=ws,
                       name="house-price-ml-service",
                       models=[best_hyperdrive_model], 
                       inference_config=inference_config,
                       deployment_config=aci_config,
                       overwrite=True)

service.wait_for_deployment(show_output=True)
print(service.state)

In [None]:
logs = service.get_logs()
for line in logs.split('\n'):
     print(line)

In [None]:
# print service state
print(service.state)
# print scoring URI
print('scoring URI: ' + service.scoring_uri)
# print Swagger URI
print('Swagger URI: ' + service.swagger_uri)
# retrieve authentication keys
primary, secondary = service.get_keys()
# print primary authenticaton key
print('Primary Authentication Key: ' + primary)

In [None]:
#Store the uri's in variables:
scoring_uri = 'http://b7e34bc0-f1f3-4b47-8837-c46f3e8899af.southcentralus.azurecontainer.io/score'

key = 'SEMOR6g83ld8yjaB5OV05dxuct8eillI'

# Consume the Endpoint and Testing

In [None]:
# #connect to dataset
# #https://medium.com/analytics-vidhya/deploy-your-ml-models-using-5-easy-steps-with-azure-machine-learning-workspace-c1ca5b6aa284
# dataset = Dataset.get_by_name(ws, name='<Name of dataset in AMLW>')
# dataset = dataset.to_pandas_dataframe()

# #package and run input data to model
# #input data
# input_data = dataset.to_json()

# #run model
# pred = service.run(input_data)
# #Convert returned json back to a pandas dataframe
# pred = pd.read_json(pred)

from azureml.data.dataset_factory import TabularDatasetFactory
from azureml.core import Workspace, Dataset

dataset_test = Dataset.Tabular.from_delimited_files(path='https://raw.githubusercontent.com/ddgope/Udacity-Capstone-House-Price-Predication-Using-Azure-ML/master/testdata.csv')
df_Test = dataset_test.to_pandas_dataframe()
df_Test.drop(['Column1','SalePrice'],axis=1,inplace=True) 
#df_Test.head()
#df_Test.shape

In [None]:
#input data
input_data = df_Test.to_json(orient="table",index=False)
#print(input_data)
input_data=json.dumps({"data": json.loads(input_data)["data"]},indent=4)
#run model
pred = service.run(input_data)
#Convert returned json back to a pandas dataframe

In [None]:
#print the result
pred

In [None]:
#df_Test.to_json('./testdata.json')

In [None]:
#let's test requests:
import json
import requests

scoring_uri = scoring_uri
key = key

headers = {'Content-Type':'application/json'}
headers['Authorization'] = f'Bearer {key}'

# Convert to JSON string
input_data = df_Test.to_json(orient="table",index=False)
#print(input_data)
input_data=json.dumps({"data": json.loads(input_data)["data"]},indent=4)

# Make the request and display the response
resp = requests.post(scoring_uri, input_data, headers=headers)
print(resp.text)

#load the returned prediction and read it into a pandas dataframe
pred = json.loads(resp.text)
pred = pd.read_json(pred)

In [None]:
type(pred)

# Create the Submission File

In [None]:
##Create Sample Submission file and Submit using ANN
pred=pd.DataFrame(pred)
sub_df=pd.read_csv('https://raw.githubusercontent.com/ddgope/Udacity-Capstone-House-Price-Predication-Using-Azure-ML/master/sample_submission.csv')
datasets=pd.concat([sub_df['Id'],pred],axis=1)
datasets.columns=['Id','SalePrice']
datasets.to_csv('sample_submission.csv',index=False)