In [None]:
# Check core SDK version number
import azureml.core

print("SDK version:", azureml.core.VERSION)

In [None]:
from azureml.telemetry import set_diagnostics_collection

set_diagnostics_collection(send_diagnostics=True)

In [None]:
from azureml.core.workspace import Workspace

ws = Workspace.from_config()
print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

In [None]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# choose a name for your cluster
cluster_name = "compute-cluster"

try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing compute target')
except ComputeTargetException:
    print('Creating a new compute target...')
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', 
                                                           max_nodes=4)

    # create the cluster
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

# can poll for a minimum number of nodes and for a specific timeout. 
# if no min node count is provided it uses the scale settings for the cluster
compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)

# use get_status() to get a detailed status for the current cluster. 
print(compute_target.get_status().serialize())

In [None]:

import os

project_folder = './capstone-project'
os.makedirs(project_folder, exist_ok=True)

In [None]:
import shutil

shutil.copy('train.py', project_folder)

# Create an experiment

In [None]:
from azureml.core import Experiment

experiment_name = 'House_Price_Predication'
experiment = Experiment(ws, name=experiment_name)

# Dataset

In [None]:
# from azureml.core.authentication import ServicePrincipalAuthentication
# svc_pr_password = os.environ.get("AZUREML_PASSWORD")

# svc_pr = ServicePrincipalAuthentication(
#     tenant_id="",
#     service_principal_id="",
#     service_principal_password="")


# ws = Workspace(subscription_id="",
#                resource_group="azureML",
#                workspace_name="creditcard",
#                auth=svc_pr)

# print("Found workspace {} at location {}".format(ws.name, ws.location))


# # ws = Workspace.from_config()
# # ws.auth=svc_pr
# experiment_name = 'House_Price_Predication'
# experiment=Experiment(ws, experiment_name)

# datastore=ws.get_default_datastore()
# dataset=Dataset.Tabular.from_delimited_files(datastore.path('UI/02-09-2021_034445_UTC/BankChurners.csv'))


# os.makedirs('data',exist_ok=True)
# local_path='data/prepared_data.csv'
# workspace=Workspace(ws.subscription_id,ws.resource_group,ws._workspace_name)
# # x_df=dataset.to_pandas_dataframe().head()
# # cat_cols=[col for col in x_df if x_df[col].dtype =='O']
# # num_cols=[col for col in x_df if x_df[col].dtype !='O']
# # pd.get_dummies(x_df).columns

## Create an environment
Define a conda environment YAML file with your training script dependencies and create an Azure ML environment.

In [None]:
%%writefile conda_dependencies.yml

# Conda environment specification. The dependencies defined in this file will
# be automatically provisioned for runs with userManagedDependencies=False.

# Details about the Conda environment file format:
# https://conda.io/docs/user-guide/tasks/manage-environments.html#create-env-file-manually

name: project_environment
dependencies:
  # The python interpreter version.
  # Currently Azure ML only supports 3.5.2 and later.
- python=3.6.2

- pip:
  - azureml-train-automl-runtime==1.21.0
  - inference-schema
  - azureml-interpret==1.21.0
  - azureml-defaults==1.21.0
- numpy>=1.16.0,<1.19.0
- pandas==0.25.1
- scikit-learn==0.22.1
- xgboost<=1.3.3
- psutil>=5.2.2,<6.0.0
channels:
- anaconda
- conda-forge

In [None]:
from azureml.core import Environment

env = Environment.from_conda_specification(name = 'capstone-project-env', file_path = './conda_dependencies.yml')

In [None]:
from azureml.core import ScriptRunConfig

src = ScriptRunConfig(source_directory=project_folder,
                      script='train.py',
                      arguments=['--max_depth', '5'
                                 ,'--learning_rate',  0.1
                                 ,'--colsample_bytree',0.3
                                 ,'--alpha',10
                                 ,'--n_estimators',10],
                      compute_target=compute_target,
                      environment=env)

In [None]:
run = experiment.submit(src)

# Monitor The Run

In [None]:
from azureml.widgets import RunDetails

RunDetails(run).show()
#run.wait_for_completion(show_output=True)

# Tune model hyperparameters

In [None]:
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.parameter_expressions import choice,uniform, randint
from azureml.train.hyperdrive.policy import BanditPolicy, MedianStoppingPolicy
from azureml.train.hyperdrive.parameter_expressions import uniform, randint, choice
from sklearn.metrics import mean_squared_error,accuracy_score

early_termination_policy = BanditPolicy(slack_factor=0.01)
another_early_termination_policy = MedianStoppingPolicy(evaluation_interval=1, delay_evaluation=5)

## Hyper Parameter Optimization
hyperparameter_grid = RandomParameterSampling({
    '--max_depth':choice(2, 3, 5, 10),
    '--learning_rate':choice(0.05,0.1,0.15,0.20),
    '--colsample_bytree':choice(0.3,0.5,0.7,0.9),
    '--alpha':choice(10,20,30,40),
    '--n_estimators':choice(100, 500, 900, 1100)
    }
)
                
hyperdrive_config = HyperDriveConfig(run_config=src,
                                     hyperparameter_sampling=hyperparameter_grid, 
                                     primary_metric_name='mean_squared_error',
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                     policy=early_termination_policy,
                                     max_total_runs=10,
                                     max_concurrent_runs=5)

In [None]:
# start the HyperDrive run
hyperdrive_run = experiment.submit(hyperdrive_config)

# Monitor HyperDrive runs

In [None]:
RunDetails(hyperdrive_run).show()

In [None]:
hyperdrive_run.wait_for_completion(show_output=True)

In [None]:
assert(hyperdrive_run.get_status() == "Completed")

In [None]:
hyperdrive_run.id

# Find and register best model

In [69]:
# Get your best run and save the model from that run.
best_run = hyperdrive_run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
parameter_values = best_run.get_details()['runDefinition']['arguments']
best_run

Experiment,Id,Type,Status,Details Page,Docs Page
House_Price_Predication,HD_460a2398-27d5-4818-ad7d-b6eeaea9b611_2,azureml.scriptrun,Completed,Link to Azure Machine Learning studio,Link to Documentation


# Retrieve the Best Model

In [None]:
import joblib
best_hyperdrive_model = best_run.register_model(
    model_name="House_Price_Model",
    model_path='outputs/house_price_model.pkl',
   
)
best_run.download_file("outputs/house_price_model.pkl", "outputs/house_price_model.pkl")

# Model Deployment

In [70]:
from azureml.core.webservice import AciWebservice
from azureml.core.webservice import Webservice
from azureml.core.model import InferenceConfig
from azureml.core.environment import Environment
#from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies
from azureml.core.model import Model

In [75]:
aci_config = AciWebservice.deploy_configuration(
            cpu_cores=1,
            memory_gb=4, 
            enable_app_insights=True,
            auth_enabled=True,
            tags={"data":"house price regression"},
            description='house price regression Model',
            )

In [76]:
inference_config = InferenceConfig(entry_script='score.py', environment=env)
inference_config

InferenceConfig(entry_script=score.py, runtime=None, conda_file=None, extra_docker_file_steps=None, source_directory=None, enable_gpu=None, base_image=None, base_image_registry=<azureml.core.container_registry.ContainerRegistry object at 0x7fb0ac45b7b8>)

In [None]:
#service_name = 'house-price-ml-service'

#model = Model(ws,name='House_Price_Model')

service = Model.deploy(workspace=ws,
                       name="house-price-ml-service",
                       models=[best_hyperdrive_model] #[model],
                       inference_config=inference_config,
                       deployment_config=aci_config,
                       overwrite=True)

service.wait_for_deployment(show_output=True)
print(service.state)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running..................................................................................................................................................................................................

In [None]:
logs = service.get_logs()
for line in logs.split('\n'):
     print(line)

In [None]:
# print service state
print(service.state)
# print scoring URI
print('scoring URI: ' + service.scoring_uri)
# print Swagger URI
print('Swagger URI: ' + service.swagger_uri)
# retrieve authentication keys
primary, secondary = service.get_keys()
# print primary authenticaton key
print('Primary Authentication Key: ' + primary)

In [None]:
#Store the uri's in variables:
scoring_uri = 'http://c21f86a8-ecdb-43a3-a25e-2887c610f5b2.southcentralus.azurecontainer.io/score'

key = 'ZwRGMEfMrOaSy8dMLO4uhGnLAfK5PVkC'

# Consume the Endpoint and Testing

In [None]:
#connect to dataset
#https://medium.com/analytics-vidhya/deploy-your-ml-models-using-5-easy-steps-with-azure-machine-learning-workspace-c1ca5b6aa284
dataset = Dataset.get_by_name(ws, name='<Name of dataset in AMLW>')
dataset = dataset.to_pandas_dataframe()

#package and run input data to model
#input data
input_data = dataset.to_json()

#run model
pred = service.run(input_data)
#Convert returned json back to a pandas dataframe
pred = pd.read_json(pred)

In [None]:
#let's test requests:
import json
import requests

scoring_uri = scoring_uri
key = key

headers = {'Content-Type':'application/json'}
headers['Authorization'] = f'Bearer {key}'

# Convert to JSON string
input_data = dataset.to_json()

# Make the request and display the response
resp = requests.post(scoring_uri, input_data, headers=headers)
print(resp.text)

#load the returned prediction and read it into a pandas dataframe
pred = json.loads(resp.text)
pred = pd.read_json(pred)

# Test the Model

# Load Test Data

In [None]:
dataset_test = Dataset.Tabular.from_delimited_files(path='https://raw.githubusercontent.com/ddgope/Udacity-Capstone-House-Price-Predication-Using-Azure-ML/master/testdata.csv')
df_Test = dataset_test.to_pandas_dataframe()
df_Test

In [None]:
df_Test.drop(['SalePrice'],axis=1,inplace=True)

In [None]:
df_Test.shape

In [None]:
df_Test.head()

In [None]:
df_Test.drop(['SalePrice'],axis=1).head()

In [None]:
y_pred=regressor.predict(df_Test.drop(['SalePrice'],axis=1))

In [None]:
y_pred

# Testing Our Best Fitted Model

In [None]:
##Create Sample Submission file and Submit using ANN
pred=pd.DataFrame(ann_pred)
sub_df=pd.read_csv('sample_submission.csv')
datasets=pd.concat([sub_df['Id'],pred],axis=1)
datasets.columns=['Id','SalePrice']
datasets.to_csv('sample_submission.csv',index=False)