In [1]:
import azureml.core
from azureml.core import Workspace

# Load the workspace from the saved config file
ws = Workspace.from_config()
print('Ready to use Azure ML {} to work with {}'.format(azureml.core.VERSION, ws.name))

Ready to use Azure ML 1.38.0 to work with aml_ws


In [2]:
# Get the default datastore
from azureml.core import Workspace, Datastore, Dataset
from azureml.data.datapath import DataPath

default_ds = ws.get_default_datastore()

# Enumerate all datastores, indicating which is the default
for ds_name in ws.datastores:
    print(ds_name, "- Default =", ds_name == default_ds.name)

sekuritdatastore - Default = True
azureml_globaldatasets - Default = False
workspacefilestore - Default = False
workspaceartifactstore - Default = False
workspaceblobstore - Default = False


In [3]:
from azureml.core import Dataset

dataset = Dataset.get_by_name(ws, name='sekuritdataset')
data = dataset.to_pandas_dataframe()

In [4]:
data.head()

Unnamed: 0,Model,Length,Type,Style,OEM,Engine Disp,Age of OEM,Year,Mileage,Oil Price,Petrol,Automatic,Price,Sales,Column17
0,Grand Punto,4000,Compact-Regular,Hatchback,Fiat,1.4,8,Jan-14,16,102.1,True,False,5.0,1207,
1,Grand Punto,4000,Compact-Regular,Hatchback,Fiat,1.4,8,Feb-14,16,104.83,True,False,5.0,882,
2,Grand Punto,4000,Compact-Regular,Hatchback,Fiat,1.4,8,Mar-14,16,104.04,True,False,5.0,839,
3,Grand Punto,4000,Compact-Regular,Hatchback,Fiat,1.4,8,Apr-14,16,104.87,True,False,5.0,547,
4,Grand Punto,4000,Compact-Regular,Hatchback,Fiat,1.4,8,May-14,16,105.71,True,False,5.0,571,


In [4]:
import os
# Create a folder for the pipeline step files
experiment_folder = 'sekurit_hyper'
os.makedirs(experiment_folder, exist_ok=True)

print(experiment_folder)

sekurit_hyper


In [5]:
# Import libraries
import os
import argparse
import pandas as pd
from azureml.core import Run
from sklearn.preprocessing import MinMaxScaler
from azureml.core import Dataset
import pandas as pd


# Get the experiment run context
run = Run.get_context()

# Drop unnecessary columns and other cleaning steps
data=data.drop(['Year','Column17'],axis=1)

data['Sales']=pd.to_numeric(data['Sales'],errors='coerce')
data= data[data['Sales'].notna()]


# Label Encoding
list_of_columns = ['Model','Type','Style','OEM']
data[list_of_columns] = data[list_of_columns].apply(lambda col:pd.Categorical(col).codes)

# Boolean Encoding
data[["Petrol", "Automatic"]] *= 1

# Normalize the numeric columns
scaler = MinMaxScaler()
num_cols = ['Length','Engine Disp','Age of OEM','Mileage','Oil Price','Price']
data[num_cols] = scaler.fit_transform(data[num_cols])

# Log processed rows
row_count = (len(data))
run.log('processed_rows', row_count)


if not os.path.isdir("data"):
    os.mkdir("data")
# Save the train data to a csv to be uploaded to the datastore
pd.DataFrame(data).to_csv("data/training_data.csv", index=False)

target_path = default_ds

Dataset.Tabular.register_pandas_dataframe(dataframe=data, target=target_path, name="prepped_data")
# End the run
run.complete()

Attempted to log scalar metric processed_rows:
1151
Validating arguments.
Arguments validated.
Successfully obtained datastore reference and path.
Uploading file to managed-dataset/42bda498-9964-4205-b826-82e92eeb2ea7/
Successfully uploaded file to datastore.
Creating and registering a new dataset.
Successfully created and registered a new dataset.


In [12]:
%%writefile $experiment_folder/sekurit_training.py

# Import libraries
from azureml.core import Run, Model
from sklearn.ensemble import GradientBoostingRegressor
import math
import argparse
import os
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
import pandas as pd
from azureml.core import Dataset
from azureml.data.dataset_factory import TabularDatasetFactory

# Get the experiment run context
run = Run.get_context()

# Get parameters
parser = argparse.ArgumentParser()
parser.add_argument("--training-data", type=str, dest='training_data', help='training data')
parser.add_argument('--n_estimators', type=int, default=100, help="The number of boosting stages to perform")
parser.add_argument('--learning_rate', type=float, default=0.1, help="Learning rate shrinks the contribution of each tree by learning_rate")
args = parser.parse_args()

# Log Hyperparameter values
run.log('learning_rate',  np.float(args.learning_rate))
run.log('n_estimators',  np.int(args.n_estimators))


# load the prepared data file in the training folder-
print("Loading Data...")
data = run.input_datasets['prepped_data'].to_pandas_dataframe() # Get the training data from the estimator input

sales_mean = data['Sales'].mean()

# Separate features and labels
X, y = data[['Model','Length','Type','Style','OEM','Engine Disp','Age of OEM','Mileage','Oil Price','Petrol','Automatic','Price']].values, data['Sales'].values


# Split data into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=0)


model = GradientBoostingRegressor(n_estimators=args.n_estimators, learning_rate=args.learning_rate,
                                    max_depth=1, random_state=0, loss='huber').fit(X_train, y_train)
 
    
    
# calculate accuracy
# MSE 
mse = mean_squared_error(y_test, model.predict(X_test))

# normalized_root_mean_squared_error => to be comparabe with Azure results
metric = math.sqrt(mse)/sales_mean

run.log("normalized_root_mean_squared_error", np.float(metric))
# Metric reported is 'r2_score' => metric to optimize
run.log("r2_score", np.float(model.score(X_test, y_test)))


# Save the trained model in the outputs folder
os.makedirs('outputs', exist_ok=True)
joblib.dump(value=model, filename='outputs/sekurit_hyper_model.pkl')

run.complete()


Overwriting sekurit_hyper/sekurit_training.py


In [7]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

cluster_name = "computecluster"

try:
    # Check for existing compute target
    pipeline_cluster = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    # If it doesn't already exist, create it
    try:
        compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS11_V2', max_nodes=2)
        pipeline_cluster = ComputeTarget.create(ws, cluster_name, compute_config)
        pipeline_cluster.wait_for_completion(show_output=True)
    except Exception as ex:
        print(ex)

Found existing cluster, use it.


In [8]:
%%writefile $experiment_folder/experiment_env.yml

name: hyperdrive_environment
dependencies:
- python=3.6.2
- scikit-learn
- pandas
- numpy
- pip
- pip:
  - azureml-defaults


Overwriting sekurit_hyper/experiment_env.yml


In [9]:
from azureml.core import Environment
from azureml.core.runconfig import RunConfiguration

# Create a Python environment for the experiment (from a .yml file)
experiment_env = Environment.from_conda_specification("experiment_env", experiment_folder + "/experiment_env.yml")

# Register the environment 
experiment_env.register(workspace=ws)
registered_env = Environment.get(ws, 'experiment_env')

# Create a new runconfig object for the pipeline
hyper_run_config = RunConfiguration()

# Use the compute you created above. 
hyper_run_config.target = pipeline_cluster

# Assign the environment to the run configuration
hyper_run_config.environment = registered_env

print ("Run configuration created.")

Run configuration created.


In [13]:
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform # supported by RandomParameterSampling
from azureml.train.hyperdrive.parameter_expressions import choice # supported by RandomParameterSampling
import os
import shutil

from azureml.core import ScriptRunConfig
from azureml.core import Environment


prepped_ds = Dataset.get_by_name(ws, name='prepped_data')

# Parameter sampler for the HyperDrive
ps = RandomParameterSampling(
    {
        '--learning_rate': choice(0.01, 0.1, 0.3),# Contribution of each tree 
        '--n_estimators': choice(10, 100, 150), # Number of learners
    }
)


# This policy compares the value (Y + Y * 0.2) to "best current score", and if smaller, cancels the run.
policy = BanditPolicy(slack_factor=0.01, delay_evaluation = 50)



#ScriptRunConfig
script_config = ScriptRunConfig(source_directory=experiment_folder,
                                script='sekurit_training.py',
                                arguments = ['--training-data', prepped_ds.as_named_input('prepped_data')],
                                compute_target = pipeline_cluster,
                                environment=experiment_env)


# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(hyperparameter_sampling=ps,
                                     policy=policy,
                                     run_config=script_config,
                                     #The name of the primary metric reported by the experiment runs.
                                     primary_metric_name='r2_score',
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                     max_total_runs = 10,
                                     max_duration_minutes=15,
                                     max_concurrent_runs=2) # Number of nodes to change


In [14]:
from azureml.core import Experiment
from azureml.pipeline.core import Pipeline
from azureml.widgets import RunDetails


experiment = Experiment(workspace=ws, name='sekurit-hyperdrive')
run = experiment.submit(config=hyperdrive_config)

RunDetails(run).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

In [16]:
# Get the best run
best_run = run.get_best_run_by_primary_metric()

best_run.get_details()

{'runId': 'HD_b172eab7-107d-4e9c-87fd-fa7f67fec910_5',
 'target': 'computecluster',
 'status': 'Completed',
 'startTimeUtc': '2022-03-03T17:20:22.054195Z',
 'endTimeUtc': '2022-03-03T17:20:25.672136Z',
 'services': {},
 'properties': {'_azureml.ComputeTargetType': 'amlcompute',
  'ContentSnapshotId': '707a65cd-4227-4d6a-86ef-3a32f070361e',
  'ProcessInfoFile': 'azureml-logs/process_info.json',
  'ProcessStatusFile': 'azureml-logs/process_status.json'},
 'inputDatasets': [{'dataset': {'id': '47ba822f-99a2-40c6-a69e-6b12ffddd6b6'}, 'consumptionDetails': {'type': 'RunInput', 'inputName': 'prepped_data', 'mechanism': 'Direct'}}],
 'outputDatasets': [],
 'runDefinition': {'script': 'sekurit_training.py',
  'command': '',
  'useAbsolutePath': False,
  'arguments': ['--training-data',
   'DatasetConsumptionConfig:prepped_data',
   '--learning_rate',
   '0.3',
   '--n_estimators',
   '150'],
  'sourceDirectoryDataStore': None,
  'framework': 'Python',
  'communicator': 'None',
  'target': 'com

In [23]:
# Get the best run, and its metrics and arguments
best_run = run.get_best_run_by_primary_metric()
best_run_metrics = best_run.get_metrics()
script_arguments = best_run.get_details() ['runDefinition']['arguments']

# Register model
best_run.register_model(model_path='outputs/sekurit_hyper_model.pkl', model_name='sekurit_hyper_model', ## Note, use the model path given earlier in training script
                        tags={'Training context':'Hyperdrive'},
                        properties={'r2': best_run_metrics['r2_score']})

Model(workspace=Workspace.create(name='aml_ws', subscription_id='8f35cf98-68ff-457e-b1b3-e05921a0fd46', resource_group='rg-lr-dp100'), name=sekurit_hyper_model, id=sekurit_hyper_model:1, version=1, tags={'Training context': 'Hyperdrive'}, properties={'r2': '0.015308570482805206'})

In [24]:
from azureml.core import Model

for model in Model.list(ws):
    print(model.name, 'version:', model.version)
    for tag_name in model.tags:
        tag = model.tags[tag_name]
        print ('\t',tag_name, ':', tag)
    for prop_name in model.properties:
        prop = model.properties[prop_name]
        print ('\t',prop_name, ':', prop)
    print('\n')

sekurit_hyper_model version: 1
	 Training context : Hyperdrive
	 r2 : 0.015308570482805206


sekurit_automl_model version: 4


sekurit_automl_model version: 3


sekurit_automl_model version: 2


sekurit_automl_model version: 1


sekurit_model version: 7
	 Training context : Pipeline
	 Accuracy : 0.5057803468208093


sekurit_model_initial version: 2


AutoML0b24ea5a20 version: 1


amlstudio-test-deploy-v2 version: 1
	 CreatedByAMLStudio : true


amlstudio-test-endpoint-lr version: 1
	 CreatedByAMLStudio : true


diabetes_model version: 11
	 Training context : Inline Training
	 AUC : 0.8832778417290374
	 Accuracy : 0.8991111111111111


diabetes_mitigated_19 version: 1


diabetes_mitigated_18 version: 1


diabetes_mitigated_17 version: 1


diabetes_mitigated_16 version: 1


diabetes_mitigated_15 version: 1


diabetes_mitigated_14 version: 1


diabetes_mitigated_13 version: 1


diabetes_mitigated_12 version: 1


diabetes_mitigated_11 version: 1


diabetes_mitigated_10 version: 1


diabetes

In [23]:
# Publish the pipeline from the run

published_pipeline = pipeline_run.publish_pipeline(
    name="azml-sekurit-pipeline", description="Trains and registers a logistic regression on sekurit data", version="1.0")

published_pipeline

Name,Id,Status,Endpoint
azml-sekurit-pipeline,4378c267-0738-4788-a7a3-79e6886e88d1,Active,REST Endpoint


In [24]:
rest_endpoint = published_pipeline.endpoint
print(rest_endpoint)

https://southeastasia.api.azureml.ms/pipelines/v1.0/subscriptions/8f35cf98-68ff-457e-b1b3-e05921a0fd46/resourceGroups/rg-lr-dp100/providers/Microsoft.MachineLearningServices/workspaces/aml_ws/PipelineRuns/PipelineSubmit/4378c267-0738-4788-a7a3-79e6886e88d1


In [None]:
## Rerun pipeline from published endpoint - Triggers a fresh pipeline but it will be faster!

In [6]:
from azureml.core.authentication import InteractiveLoginAuthentication

interactive_auth = InteractiveLoginAuthentication()
auth_header = interactive_auth.get_authentication_header()
print("Authentication header ready.")

Authentication header ready.


In [None]:
import requests

experiment_name = 'azml-sekurit-pipeline'

rest_endpoint = published_pipeline.endpoint
response = requests.post(rest_endpoint, 
                         headers=auth_header, 
                         json={"ExperimentName": experiment_name})
run_id = response.json()["Id"]
run_id

In [None]:
from azureml.pipeline.core.run import PipelineRun

published_pipeline_run = PipelineRun(ws.experiments[experiment_name], run_id)
published_pipeline_run.wait_for_completion(show_output=True)

## Lets deploy for real time inferencing

In [25]:
model = ws.models['sekurit_hyper_model']
print(model.name, 'version', model.version)

sekurit_hyper_model version 1


In [29]:
import os
# Create a folder for the inference config files
deployment_folder = 'deployment_hyper'
os.makedirs(deployment_folder, exist_ok=True)

print(deployment_folder)

deployment_hyper


In [35]:
%%writefile $deployment_folder/score.py

import json
import numpy as np
import pandas as pd
import joblib
from azureml.core.model import Model
from inference_schema.parameter_types.standard_py_parameter_type import StandardPythonParameterType
from inference_schema.schema_decorators import input_schema, output_schema


input_sample = [{
    "Model": "Grand Punto",
    "Length": 4000,
    "Type":"Compact-Regular",
    "Style":"Hatchback",
    "OEM":"Fiat",
    "Engine Disp":1.4,
    "Age of OEM":8,
    "Mileage":16,
    "Oil Price":102.1,
    "Petrol":"Y", 
    "Automatic":"N",
    "Price":5
  }]

output_sample = [[1207]]



def init():
    global model
    model_path = Model.get_model_path("sekurit_hyper_model")
    # deserialize the model file back into a sklearn model
    model = joblib.load(model_path)



# Inference_schema generates a schema for your web service
# It then creates an OpenAPI (Swagger) specification for the web service
# at http://<scoring_base_url>/swagger.json

@input_schema('data', StandardPythonParameterType(input_sample))
@output_schema(StandardPythonParameterType(output_sample))

def run(data):
    try:
        df = pd.DataFrame(data)
        
        pred = model.predict(df)
        result = {"predict":pred.tolist()}
        return result
    except Exception as e:
        error = str(e)
        return error

 

Overwriting deployment_hyper/score.py


In [36]:
import os
# Create a folder for the inference config files
config_folder = 'config_hyperml'
os.makedirs(config_folder, exist_ok=True)

print(config_folder)

config_hyperml


In [37]:
%%writefile $config_folder/inference_env.yml

name: experiment_env
dependencies:
- python=3.6.2
- scikit-learn
- ipykernel
- matplotlib
- pandas
- pip
- pip:
  - azureml-defaults
  - azureml-sdk[automl]
  - pyarrow

Overwriting config_hyperml/inference_env.yml


In [38]:
# Define deployment setup
from azureml.core import Environment
from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice
import json
import numpy as np
import pandas as pd
import joblib
from azureml.core.model import Model
    
# Conda enviroment (if we want to use additional Python packages)
env = Environment.from_conda_specification("inference-env", config_folder + "/inference_env.yml")

inference_config = InferenceConfig(entry_script= deployment_folder +'/score.py', environment=env)

# Define how our webservice should look like (resources, security, etc.)
deployment_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1)

In [40]:
service = Model.deploy(ws, "sekurit-hyper-service", [model], inference_config, deployment_config)
service.wait_for_deployment(show_output=True)
print(service.state)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2022-03-03 17:51:47+00:00 Creating Container Registry if not exists.
2022-03-03 17:51:47+00:00 Registering the environment.
2022-03-03 17:51:48+00:00 Use the existing image.
2022-03-03 17:51:48+00:00 Generating deployment configuration.
2022-03-03 17:51:49+00:00 Submitting deployment to compute.
2022-03-03 17:51:55+00:00 Checking the status of deployment sekurit-hyper-service.
Succeeded
ACI service creation operation finished, operation "Succeeded"
Healthy


In [42]:
import requests
import json

url = service.scoring_uri

# test_data = {
#   'data': [{
#     "Model": "Grand Punto",
#     "Length": 4000,
#     "Type":"Compact-Regular",
#     "Style":"Hatchback",
#     "OEM":"Fiat",
    
#     "Engine Disp":1.4,
#     "Age of OEM":8,  
#     "Mileage":16,
#     "Oil Price":102.1,
#     "Petrol":"Y", 
#     "Automatic":"N",
#     "Price":5
#   }]
# }


test_data = {
  'data': [{
    "Model": 0,
    "Length": 4000,
    "Type":0,
    "Style":0,
    "OEM":0,
    "Engine Disp":1.4,
    "Age of OEM":8,  
    "Mileage":16,
    "Oil Price":102.1,
    "Petrol":0, 
    "Automatic":0,
    "Price":5
  }]
}


headers = {'Content-Type':'application/json'}
resp = requests.post(url, json=test_data, headers=headers)

print("Prediction:", resp.text)

Prediction: {"predict": [-537.4727213239458]}


## Batch processing

In [2]:
import azureml.core
from azureml.core import Workspace

# Load the workspace from the saved config file
ws = Workspace.from_config()
print('Ready to use Azure ML {} to work with {}'.format(azureml.core.VERSION, ws.name))

Ready to use Azure ML 1.38.0 to work with aml_ws


In [3]:
from azureml.core import Datastore, Dataset
import pandas as pd
import os

default_ds = ws.get_default_datastore()

# Enumerate all datastores, indicating which is the default
for ds_name in ws.datastores:
    print(ds_name, "- Default =", ds_name == default_ds.name)

sekuritdatastore - Default = True
azureml_globaldatasets - Default = False
workspaceworkingdirectory - Default = False
workspacefilestore - Default = False
workspaceartifactstore - Default = False
workspaceblobstore - Default = False


In [9]:
# Load the data
sekurit = Dataset.get_by_name(ws, name='sekuritdataset')
data = sekurit.to_pandas_dataframe()
# Get a 100-item sample of the feature columns 
sample = data[['Model','Length','Type','Style','OEM','Engine Disp','Age of OEM','Year','Mileage','Oil Price','Petrol','Automatic','Price']].sample(n=100).values

# Create a folder
batch_folder = './batch-data'
os.makedirs(batch_folder, exist_ok=True)
print("Folder created!")

# Save each sample as a separate file
print("Saving files...")
for i in range(100):
    fname = str(i+1) + '.csv'
    sample[i].tofile(os.path.join(batch_folder, fname), sep=",")
print("files saved!")

# Upload the files to the default datastore
print("Uploading files to datastore...")
default_ds = ws.get_default_datastore()
default_ds.upload(src_dir="batch-data", target_path="batch-data", overwrite=True, show_progress=True)

# Register a dataset for the input data
batch_data_set = Dataset.File.from_files(path=(default_ds, 'batch-data/'), validate=False)
try:
    batch_data_set = batch_data_set.register(workspace=ws, 
                                             name='batch-data',
                                             description='batch data',
                                             create_new_version=True)
except Exception as ex:
    print(ex)

print("Done!")

Folder created!
Saving files...
files saved!
Uploading files to datastore...


"Datastore.upload" is deprecated after version 1.0.69. Please use "Dataset.File.upload_directory" to upload your files             from a local directory and create FileDataset in single method call. See Dataset API change notice at https://aka.ms/dataset-deprecation.


Uploading an estimated of 100 files
Uploading batch-data/1.csv
Uploaded batch-data/1.csv, 1 files out of an estimated total of 100
Uploading batch-data/10.csv
Uploaded batch-data/10.csv, 2 files out of an estimated total of 100
Uploading batch-data/100.csv
Uploaded batch-data/100.csv, 3 files out of an estimated total of 100
Uploading batch-data/11.csv
Uploaded batch-data/11.csv, 4 files out of an estimated total of 100
Uploading batch-data/12.csv
Uploaded batch-data/12.csv, 5 files out of an estimated total of 100
Uploading batch-data/13.csv
Uploaded batch-data/13.csv, 6 files out of an estimated total of 100
Uploading batch-data/14.csv
Uploaded batch-data/14.csv, 7 files out of an estimated total of 100
Uploading batch-data/15.csv
Uploaded batch-data/15.csv, 8 files out of an estimated total of 100
Uploading batch-data/16.csv
Uploaded batch-data/16.csv, 9 files out of an estimated total of 100
Uploading batch-data/17.csv
Uploaded batch-data/17.csv, 10 files out of an estimated total 

Done!


In [10]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

cluster_name = "computeclusterlr"

try:
    # Check for existing compute target
    pipeline_cluster = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    # If it doesn't already exist, create it
    try:
        compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS11_V2', max_nodes=2)
        pipeline_cluster = ComputeTarget.create(ws, cluster_name, compute_config)
        pipeline_cluster.wait_for_completion(show_output=True)
    except Exception as ex:
        print(ex)

Found existing cluster, use it.


In [11]:
import os
# Create a folder for the experiment files
experiment_folder = 'batch_pipeline'
os.makedirs(experiment_folder, exist_ok=True)

print(experiment_folder)

batch_pipeline


In [18]:
%%writefile $experiment_folder/batch_sekurit.py
import os
import numpy as np
from azureml.core import Model
import joblib


def init():
    # Runs when the pipeline step is initialized
    global model

    # load the model
    model_path = Model.get_model_path('sekurit_model')
    model = joblib.load(model_path)


def run(mini_batch):
    # This runs for each batch
    resultList = []

    # process each file in the batch
    for f in mini_batch:
        # Read the comma-delimited data into an array
        data = np.genfromtxt(f, delimiter=',')
        # Reshape into a 2-dimensional array for prediction (model expects multiple items)
        prediction = model.predict(data.reshape(1, -1))
        # Append prediction to results
        resultList.append("{}: {}".format(os.path.basename(f), prediction[0]))
    return resultList

Writing batch_pipeline/batch_sekurit.py


In [15]:
%%writefile $experiment_folder/batch_env.yml

name: experiment_env
dependencies:
- python=3.6.2
- scikit-learn
- ipykernel
- matplotlib
- pandas
- pip
- pip:
  - azureml-defaults
  - pyarrow

Writing batch_pipeline/batch_env.yml


In [16]:
from azureml.core import Environment
from azureml.core.runconfig import DEFAULT_CPU_IMAGE

# Create an Environment for the experiment
batch_env = Environment.from_conda_specification("experiment_env", experiment_folder + "/batch_env.yml")
batch_env.docker.base_image = DEFAULT_CPU_IMAGE
print('Configuration ready.')

Configuration ready.


In [19]:
from azureml.pipeline.steps import ParallelRunConfig, ParallelRunStep
from azureml.data import OutputFileDatasetConfig

output_dir = OutputFileDatasetConfig(name='inferences')

parallel_run_config = ParallelRunConfig(
    source_directory=experiment_folder,
    entry_script="batch_sekurit.py",
    mini_batch_size="5",
    error_threshold=10,
    output_action="append_row",
    environment=batch_env,
    compute_target=pipeline_cluster,
    node_count=2)

parallelrun_step = ParallelRunStep(
    name='batch-score-sekurit',
    parallel_run_config=parallel_run_config,
    inputs=[batch_data_set.as_named_input('sekurit_batch')],
    output=output_dir,
    arguments=[],
    allow_reuse=True
)

print('Steps defined')

Steps defined


In [20]:
from azureml.core import Experiment
from azureml.pipeline.core import Pipeline

pipeline = Pipeline(workspace=ws, steps=[parallelrun_step])
pipeline_run = Experiment(ws, 'sekurit-batch').submit(pipeline)
pipeline_run.wait_for_completion(show_output=True)

Created step batch-score-sekurit [a68d958d][169b673a-52c7-4dbe-b358-e72e5ea0d9a3], (This step will run and generate new outputs)
Submitted PipelineRun 8601ef27-fb7d-4aa7-8249-1d3e4e271614
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/8601ef27-fb7d-4aa7-8249-1d3e4e271614?wsid=/subscriptions/8f35cf98-68ff-457e-b1b3-e05921a0fd46/resourcegroups/rg-lr-dp100/workspaces/aml_ws&tid=e339bd4b-2e3b-4035-a452-2112d502f2ff
PipelineRunId: 8601ef27-fb7d-4aa7-8249-1d3e4e271614
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/8601ef27-fb7d-4aa7-8249-1d3e4e271614?wsid=/subscriptions/8f35cf98-68ff-457e-b1b3-e05921a0fd46/resourcegroups/rg-lr-dp100/workspaces/aml_ws&tid=e339bd4b-2e3b-4035-a452-2112d502f2ff
PipelineRun Status: Running


StepRunId: 5e5e0bc7-d9a7-4195-92f0-6be91ba6ce95
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/5e5e0bc7-d9a7-4195-92f0-6be91ba6ce95?wsid=/subscriptions/8f35cf98-68ff-457e-b1b3-e05921a0fd46/resourcegroups/rg-lr-dp100/worksp


Streaming azureml-logs/65_job_prep-tvmps_70e4add24c65bf530f9c4ed9ed5a8cf9b15facfc5b2124ce2243c82c68897141_d.txt
[2022-02-28T19:41:05.120873] Entering job preparation.
[2022-02-28T19:41:05.837097] Starting job preparation.
[2022-02-28T19:41:05.837145] Extracting the control code.
[2022-02-28T19:41:05.837499] Starting extract_project.
[2022-02-28T19:41:05.837549] Starting to extract zip file.
[2022-02-28T19:41:05.856837] Finished extracting zip file.
[2022-02-28T19:41:05.861179] Using urllib.request Python 3.0 or later
[2022-02-28T19:41:05.861347] Start fetching snapshots.
[2022-02-28T19:41:05.861516] Start fetching snapshot.
Starting the daemon thread to refresh tokens in background for process with pid = 57
[2022-02-28T19:41:06.221422] Finished fetching snapshot.
[2022-02-28T19:41:06.221468] Start fetching snapshot.
[2022-02-28T19:41:13.573813] Finished fetching snapshot.
[2022-02-28T19:41:13.573862] Finished fetching snapshots.
[2022-02-28T19:41:13.573877] Finished extract_project.
[



[2022-02-28T19:43:33.301595] The experiment failed. Finalizing run...
Cleaning up all outstanding Run operations, waiting 900.0 seconds
3 items cleaning up...
Cleanup took 0.24070072174072266 seconds
azureml_common.parallel_run.exception_info.Exception: Run failed. Below is the error detail:
EntryScriptException: Entry script error. The number of failed items is 100, which exceeds error threshold 10.
The run() function in the entry script had raised exception for 60 times. Please check logs at logs/user/error/* for details.
  * Error 'Input contains NaN, infinity or a value too large for dtype('float64').' occurred 60 times.

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "driver/amlbi_main.py", line 174, in <module>
    main()
  File "driver/amlbi_main.py", line 123, in main
    boot(driver_dir)
  File "driver/amlbi_main.py", line 58, in boot
    booter.start()
  File "driver/azureml_user/parallel_run/boot.py", line 37

ActivityFailedException: ActivityFailedException:
	Message: Activity Failed:
{
    "error": {
        "code": "UserError",
        "message": "User program failed with Exception: Run failed, please check logs for details. You can check logs/readme.txt for the layout of logs.",
        "messageParameters": {},
        "detailsUri": "https://aka.ms/azureml-run-troubleshooting",
        "details": []
    },
    "time": "0001-01-01T00:00:00.000Z"
}
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "Activity Failed:\n{\n    \"error\": {\n        \"code\": \"UserError\",\n        \"message\": \"User program failed with Exception: Run failed, please check logs for details. You can check logs/readme.txt for the layout of logs.\",\n        \"messageParameters\": {},\n        \"detailsUri\": \"https://aka.ms/azureml-run-troubleshooting\",\n        \"details\": []\n    },\n    \"time\": \"0001-01-01T00:00:00.000Z\"\n}"
    }
}

In [22]:
pipeline_run.get_details()

{'runId': '8601ef27-fb7d-4aa7-8249-1d3e4e271614',
 'status': 'Failed',
 'startTimeUtc': '2022-02-28T19:36:18.677084Z',
 'endTimeUtc': '2022-02-28T19:43:57.832064Z',
 'services': {},
 'properties': {'azureml.runsource': 'azureml.PipelineRun',
  'runSource': 'SDK',
  'runType': 'SDK',
  'azureml.parameters': '{}',
  'azureml.continue_on_step_failure': 'False',
  'azureml.pipelineComponent': 'pipelinerun'},
 'inputDatasets': [],
 'outputDatasets': [],
 'logFiles': {'logs/azureml/executionlogs.txt': 'https://amlws8080781874.blob.core.windows.net/azureml/ExperimentRun/dcid.8601ef27-fb7d-4aa7-8249-1d3e4e271614/logs/azureml/executionlogs.txt?sv=2019-07-07&sr=b&sig=S2w%2Bi4l6xg%2BioAkTV3AMrWAbUXIdUJ6EHxs%2FJfca4n0%3D&skoid=4d39dcde-4abc-4c80-95b6-29e56284a6f3&sktid=e339bd4b-2e3b-4035-a452-2112d502f2ff&skt=2022-02-28T14%3A18%3A45Z&ske=2022-03-01T22%3A28%3A45Z&sks=b&skv=2019-07-07&st=2022-02-28T19%3A45%3A13Z&se=2022-03-01T03%3A55%3A13Z&sp=r',
  'logs/azureml/stderrlogs.txt': 'https://amlws808078

In [None]:
## AUTOML

In [5]:
from azureml.core import Datastore
from azureml.core.compute import AmlCompute, ComputeTarget

from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies

compute_name = 'computeclusterlr'
training_cluster = ComputeTarget(workspace=ws, name=compute_name)

aml_run_config = RunConfiguration()
# Use just-specified compute target ("cpu-cluster")
aml_run_config.target = training_cluster

# Specify CondaDependencies obj, add necessary packages
aml_run_config.environment.python.conda_dependencies = CondaDependencies.create(
    conda_packages=['pandas','scikit-learn'], 
    pip_packages=['azureml-sdk[automl]', 'pyarrow'])

In [6]:
from azureml.core import Dataset

dataset = Dataset.get_by_name(ws, name='sekuritdataset')
data = dataset.to_pandas_dataframe()

In [7]:
data.head()

Unnamed: 0,Model,Length,Type,Style,OEM,Engine Disp,Age of OEM,Year,Mileage,Oil Price,Petrol,Automatic,Price,Sales,Column17
0,Grand Punto,4000,Compact-Regular,Hatchback,Fiat,1.4,8,Jan-14,16,102.1,True,False,5.0,1207,
1,Grand Punto,4000,Compact-Regular,Hatchback,Fiat,1.4,8,Feb-14,16,104.83,True,False,5.0,882,
2,Grand Punto,4000,Compact-Regular,Hatchback,Fiat,1.4,8,Mar-14,16,104.04,True,False,5.0,839,
3,Grand Punto,4000,Compact-Regular,Hatchback,Fiat,1.4,8,Apr-14,16,104.87,True,False,5.0,547,
4,Grand Punto,4000,Compact-Regular,Hatchback,Fiat,1.4,8,May-14,16,105.71,True,False,5.0,571,


In [8]:
import os
# Create a folder for the pipeline step files
experiment_folder = 'automl_sekurit_pipeline-v4'
os.makedirs(experiment_folder, exist_ok=True)

print(experiment_folder)

automl_sekurit_pipeline-v4


In [26]:
%%writefile $experiment_folder/dataprep.py

# Import libraries
import os
import argparse
import pandas as pd
from azureml.core import Run
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq

# Get parameters
parser = argparse.ArgumentParser()
parser.add_argument("--input-data", type=str, dest='raw_dataset_id', help='raw dataset')
parser.add_argument('--prepped-data', type=str, dest='prepped_data', default='prepped_data', help='Folder for results')
args = parser.parse_args()
save_folder = args.prepped_data

run = Run.get_context()

# load the data (passed as an input dataset)
print("Loading Data...")
data= run.input_datasets['raw_data'].to_pandas_dataframe()

# Drop unnecessary columns and other cleaning steps
data=data.drop(['Column17','Year'],axis=1)

data['Sales']=pd.to_numeric(data['Sales'],errors='coerce')
data= data[data['Sales'].notna()]
data = data.drop(data[data.Sales < 0].index)

# Label Encoding
list_of_columns = ['Model','Type','Style','OEM']
data[list_of_columns] = data[list_of_columns].apply(lambda col:pd.Categorical(col).codes)

# Boolean Encoding
data[["Petrol", "Automatic"]] *= 1

# Normalize the numeric columns
scaler = MinMaxScaler()
num_cols = ['Length','Engine Disp','Age of OEM','Mileage','Oil Price','Price']
data[num_cols] = scaler.fit_transform(data[num_cols])

# Log processed rows
row_count = (len(data))
run.log('processed_rows', row_count)


# Save the prepped data  **** AutoML is automatically taking csv files as strings. Best to convert to Parquet
print("Saving Data...")
os.makedirs(save_folder, exist_ok=True)
save_path = os.path.join(save_folder,'prepped_data.parquet')
data.to_parquet(save_path)



Overwriting sekurit_pipeline/dataprep.py


In [27]:
from azureml.data import OutputFileDatasetConfig
from azureml.pipeline.steps import PythonScriptStep


# Get the training dataset
sekurit_ds = ws.datasets.get("sekuritdataset")

# Create an OutputFileDatasetConfig (temporary Data Reference) for data passed from step 1 to step 2
prepped_data = OutputFileDatasetConfig("prepped_data").read_parquet_files()

# Step 1, Run the data prep script
prep_step = PythonScriptStep(name = "Prepare Data",
                                source_directory = experiment_folder,
                                script_name = "dataprep.py",
                                arguments = ['--input-data', sekurit_ds.as_named_input('raw_data'),
                                             '--prepped-data', prepped_data],
                                compute_target = training_cluster,
                                runconfig = aml_run_config,
                                allow_reuse = True)

NameError: name 'training_cluster' is not defined

In [36]:
#prepped_data = prepped_data.read_delimited_files()   ### You don't need this if reading from Parquet. For CSV you need this

In [11]:
import azureml.train.automl.utilities as automl_utils

for metric in automl_utils.get_primary_metrics('regression'):
    print(metric)

spearman_correlation
r2_score
normalized_mean_absolute_error
normalized_root_mean_squared_error


In [12]:
from azureml.pipeline.core import TrainingOutput, PipelineData

metrics_data = PipelineData(name='metrics_data',
                            datastore=default_ds,
                            pipeline_output_name='metrics_output',
                            training_output=TrainingOutput(type='Metrics'))

model_data = PipelineData(name='best_model_data',
                          datastore=default_ds,
                          pipeline_output_name='model_output',
                          training_output=TrainingOutput(type='Model'))

In [13]:
from azureml.train.automl import AutoMLConfig
from azureml.pipeline.steps import AutoMLStep


import logging

automl_settings = {
    "iteration_timeout_minutes": 10,
    "experiment_timeout_hours": 0.25,
    "enable_early_stopping": True,
    "primary_metric": 'normalized_root_mean_squared_error',
    "featurization": 'auto',
    "verbosity": logging.INFO,
    "n_cross_validations": 2
}

automl_config = AutoMLConfig(name='Automated ML Experiment',
                             task='regression',
                             debug_log='automated_ml_errors.log',
                             compute_target=training_cluster,
                             training_data = prepped_data,
                             label_column_name="Sales",
                             **automl_settings)

train_step = AutoMLStep(name='AutoML_Regression',
    automl_config=automl_config,
    passthru_automl_config=False,
    outputs=[metrics_data,model_data],
    enable_default_model_output=False,
    enable_default_metrics_output=False,
    allow_reuse=True)


print("Ready for Auto ML run.")

Ready for Auto ML run.


In [10]:
%%writefile $experiment_folder/register_model.py
from azureml.core.model import Model, Dataset
from azureml.core.run import Run, _OfflineRun
from azureml.core import Workspace
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("--model_name", required=True)
parser.add_argument("--model_path", required=True)
args = parser.parse_args()

print(f"model_name : {args.model_name}")
print(f"model_path: {args.model_path}")

run = Run.get_context()
ws = Workspace.from_config() if type(run) == _OfflineRun else run.experiment.workspace

model = Model.register(workspace=ws,
                       model_path=args.model_path,
                       model_name=args.model_name)

print("Registered version {0} of model {1}".format(model.version, model.name))

Writing $experiment_folder/register_model.py


FileNotFoundError: [Errno 2] No such file or directory: '$experiment_folder/register_model.py'

In [15]:
from azureml.pipeline.core.graph import PipelineParameter

# The model name with which to register the trained model in the workspace.
model_name = PipelineParameter("model_name", default_value="sekurit_model_v4")

register_step = PythonScriptStep(script_name="register_model.py",
                                 source_directory = experiment_folder,
                                       name="register_model",
                                       allow_reuse=False,
                                       arguments=["--model_name", model_name, "--model_path", model_data],
                                       inputs=[model_data],
                                       compute_target=training_cluster,
                                       runconfig=aml_run_config)

In [16]:
from azureml.pipeline.core import Pipeline
from azureml.core import Experiment
from azureml.widgets import RunDetails

experiment = Experiment(workspace=ws, name='automl_sekurit_pipeline_v4')

# Construct the pipeline
pipeline_steps = [prep_step, train_step, register_step]
pipeline = Pipeline(workspace=ws, steps=pipeline_steps)
print("Pipeline is built.")

# Create an experiment and run the pipeline
pipeline_run = experiment.submit(pipeline, regenerate_outputs=True)
print("Pipeline submitted for execution.")

RunDetails(pipeline_run).show()
pipeline_run.wait_for_completion(show_output=True)


Pipeline is built.
Created step Prepare Data [fbd7120a][67ff3b5f-27f4-4b77-952c-dd8baf0ccd93], (This step will run and generate new outputs)Created step AutoML_Regression [431cd644][47a24634-fc8a-4fed-af9b-dc7bb0e6459d], (This step will run and generate new outputs)
Created step register_model [c9e4e482][42c9db6c-a00e-49c2-944a-f0c7973db919], (This step will run and generate new outputs)

Submitted PipelineRun 9c29cc9e-7050-4749-af14-ed125a9373c1
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/9c29cc9e-7050-4749-af14-ed125a9373c1?wsid=/subscriptions/8f35cf98-68ff-457e-b1b3-e05921a0fd46/resourcegroups/rg-lr-dp100/workspaces/aml_ws&tid=e339bd4b-2e3b-4035-a452-2112d502f2ff
Pipeline submitted for execution.


_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

PipelineRunId: 9c29cc9e-7050-4749-af14-ed125a9373c1
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/9c29cc9e-7050-4749-af14-ed125a9373c1?wsid=/subscriptions/8f35cf98-68ff-457e-b1b3-e05921a0fd46/resourcegroups/rg-lr-dp100/workspaces/aml_ws&tid=e339bd4b-2e3b-4035-a452-2112d502f2ff
PipelineRun Status: Running


StepRunId: ae1c9977-3df9-42d6-8f38-63934b887388
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/ae1c9977-3df9-42d6-8f38-63934b887388?wsid=/subscriptions/8f35cf98-68ff-457e-b1b3-e05921a0fd46/resourcegroups/rg-lr-dp100/workspaces/aml_ws&tid=e339bd4b-2e3b-4035-a452-2112d502f2ff
StepRun( Prepare Data ) Status: NotStarted
StepRun( Prepare Data ) Status: Running

StepRun(Prepare Data) Execution Summary
StepRun( Prepare Data ) Status: Finished
{'runId': 'ae1c9977-3df9-42d6-8f38-63934b887388', 'target': 'computeclusterlr', 'status': 'Completed', 'startTimeUtc': '2022-02-23T19:30:56.078463Z', 'endTimeUtc': '2022-02-23T19:32:40.865188Z', 'services': {}, 'pro




StepRunId: 60c326d6-9daa-4c00-96bd-4dbd0a9368a6
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/60c326d6-9daa-4c00-96bd-4dbd0a9368a6?wsid=/subscriptions/8f35cf98-68ff-457e-b1b3-e05921a0fd46/resourcegroups/rg-lr-dp100/workspaces/aml_ws&tid=e339bd4b-2e3b-4035-a452-2112d502f2ff
StepRun( AutoML_Regression ) Status: Running

StepRun(AutoML_Regression) Execution Summary
StepRun( AutoML_Regression ) Status: Finished

Experiment timeout reached, hence experiment stopped. Current experiment timeout: 0 hour(s) 15 minute(s)






StepRunId: 80e3180f-800c-442c-b07e-2a38912cabb1
Link to Azure Machine Learning Portal: https://ml.azure.com/runs/80e3180f-800c-442c-b07e-2a38912cabb1?wsid=/subscriptions/8f35cf98-68ff-457e-b1b3-e05921a0fd46/resourcegroups/rg-lr-dp100/workspaces/aml_ws&tid=e339bd4b-2e3b-4035-a452-2112d502f2ff
StepRun( register_model ) Status: Running

StepRun(register_model) Execution Summary
StepRun( register_model ) Status: Finished
{'runId': '80e3180f-800c-442c-b07e-2a38912cabb1', 'target': 'computeclusterlr', 'status': 'Completed', 'startTimeUtc': '2022-02-23T19:58:33.284063Z', 'endTimeUtc': '2022-02-23T19:58:43.309692Z', 'services': {}, 'properties': {'ContentSnapshotId': '83941250-e2e4-47b2-894e-5dc296355ad5', 'StepType': 'PythonScriptStep', 'ComputeTargetType': 'AmlCompute', 'azureml.moduleid': '42c9db6c-a00e-49c2-944a-f0c7973db919', 'azureml.moduleName': 'register_model', 'azureml.runsource': 'azureml.StepRun', 'azureml.nodeid': 'c9e4e482', 'azureml.pipelinerunid': '9c29cc9e-7050-4749-af14-ed

'Finished'

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

In [18]:
## Get the run ID from pipeline run

from azureml.train.automl.run import AutoMLRun
#from azureml.widgets import RunDetails

# workaround to get the automl run as its the last step in the pipeline 
# and get_steps() returns the steps from latest to first

for step in pipeline_run.get_steps():
    automl_step_run_id = step.id
    print(step.name)
    print(automl_step_run_id)
    break

automl_run = AutoMLRun(experiment = experiment, run_id=automl_step_run_id)
RunDetails(automl_run).show()

register_model
80e3180f-800c-442c-b07e-2a38912cabb1


_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

In [19]:
for run in pipeline_run.get_children():
    print('Run ID', run.id)
    for metric in run.get_metrics():
        print('\t', run.get_metrics(metric))

Run ID 80e3180f-800c-442c-b07e-2a38912cabb1
Run ID 60c326d6-9daa-4c00-96bd-4dbd0a9368a6
	 {'experiment_status': ['DatasetEvaluation', 'FeaturesGeneration', 'DatasetFeaturization', 'DatasetFeaturizationCompleted', 'DatasetCrossValidationSplit', 'ModelSelection', 'BestRunExplainModel', 'ModelExplanationDataSetSetup', 'PickSurrogateModel', 'EngineeredFeatureExplanations', 'EngineeredFeatureExplanations', 'RawFeaturesExplanations', 'RawFeaturesExplanations', 'BestRunExplainModel']}
	 {'experiment_status_description': ['Gathering dataset statistics.', 'Generating features for the dataset.', 'Beginning to fit featurizers and featurize the dataset.', 'Completed fit featurizers and featurizing the dataset.', 'Generating individually featurized CV splits.', 'Beginning model selection.', 'Best run model explanations started', 'Model explanations data setup completed', 'Choosing LightGBM as the surrogate model for explanations', 'Computation of engineered features started', 'Computation of engine

In [11]:
from azureml.core import Model


# List registered models
for model in Model.list(ws):
    print(model.name, 'version:', model.version)
    for tag_name in model.tags:
        tag = model.tags[tag_name]
        print ('\t',tag_name, ':', tag)
    for prop_name in model.properties:
        prop = model.properties[prop_name]
        print ('\t',prop_name, ':', prop)
    print('\n')

sekurit_model_v4 version: 2


sekurit_model_v2 version: 2


sekurit_model_v2 version: 1


sekurit_model_initial version: 2


sekurit_model version: 5
	 Training context : Auto ML in pipeline
	 AUC : 0.014550774687038625


AutoML0b24ea5a20 version: 1


amlstudio-test-deploy-v2 version: 1
	 CreatedByAMLStudio : true


amlstudio-test-endpoint-lr version: 1
	 CreatedByAMLStudio : true


diabetes_model version: 11
	 Training context : Inline Training
	 AUC : 0.8832778417290374
	 Accuracy : 0.8991111111111111


diabetes_mitigated_20 version: 1


diabetes_mitigated_19 version: 1


diabetes_mitigated_18 version: 1


diabetes_mitigated_17 version: 1


diabetes_mitigated_16 version: 1


diabetes_mitigated_15 version: 1


diabetes_mitigated_14 version: 1


diabetes_mitigated_13 version: 1


diabetes_mitigated_12 version: 1


diabetes_mitigated_11 version: 1


diabetes_mitigated_10 version: 1


diabetes_mitigated_9 version: 1


diabetes_mitigated_8 version: 1


diabetes_mitigated_7 version: 1


di

In [24]:
# Publish the pipeline from the run

published_pipeline = pipeline_run.publish_pipeline(
    name="automl-sekurit-pipeline-v4", description="Trains sekurit model in pipeline", version="1.0")

published_pipeline

Name,Id,Status,Endpoint
automl-sekurit-pipeline-v4,55ec1f8e-8c06-49b9-b8a1-a8ff704bf5fb,Active,REST Endpoint


In [37]:
model = ws.models['sekurit_model']
print(model.name, 'version', model.version)

sekurit_model version 7


In [31]:
import json
import numpy as np
import pandas as pd
import joblib
from azureml.core.model import Model
from inference_schema.parameter_types.standard_py_parameter_type import StandardPythonParameterType
from inference_schema.schema_decorators import input_schema, output_schema

In [38]:
import json
import joblib
import numpy as np
import pandas as pd
from azureml.core import Model

 # Get the path to the deployed model file and load it
model_path = Model.get_model_path("sekurit_model")
model = joblib.load(model_path)


raw_data = '{"data":[["Grand Punto", 4000, "Compact-Regular", "Hatchback","Fiat", 1.4, 8, 16, 105.71, "Y", "N", 5]]}'

data = json.loads(raw_data)["data"]
data = np.array(data)

ModelNotFoundException: ModelNotFoundException:
	Message: Model sekurit_model.pkl not found in cache at azureml-models or in current working directory /mnt/batch/tasks/shared/LS_root/mounts/clusters/aml-comp-lr/code. For more info, set logging level to DEBUG.
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "Model sekurit_model.pkl not found in cache at azureml-models or in current working directory /mnt/batch/tasks/shared/LS_root/mounts/clusters/aml-comp-lr/code. For more info, set logging level to DEBUG."
    }
}

In [19]:
request_headers = {}

result = model.predict(data)
print("Test result: ", {"result": result.tolist()})

AttributeError: 'Model' object has no attribute 'predict'