# Titanic Survival Prediction Project using Azure AutoML

In [28]:
#Import libraries needed
import logging
import os
import csv

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from azureml.data.dataset_factory import TabularDatasetFactory
import pkg_resources

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig
from azureml.core.dataset import Dataset

from azureml.pipeline.steps import AutoMLStep

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

SDK version: 1.20.0


In [29]:
#Testing the authentication using the Workspace method "from_config"
Workspace.from_config()

Workspace.create(name='quick-starts-ws-139405', subscription_id='976ee174-3882-4721-b90a-b5fef6b72f24', resource_group='aml-quickstarts-139405')

In [30]:
#create workspace
ws = Workspace.from_config()

In [31]:
# Create Experiment

experiment_name = 'Titanic-AutoML-Experiment'
project_folder = './Titanic-project'

experiment = Experiment(ws, experiment_name)
experiment

Name,Workspace,Report Page,Docs Page
Titanic-AutoML-Experiment,quick-starts-ws-139405,Link to Azure Machine Learning studio,Link to Documentation


## Dataset Overview:

As you probably have guessed from the project title we will be working with the "Titanic Dataset" which is already a classical dataset to learn Machine Learning.

The main task for this project will be to build a predictive model that answers the question: “what sorts of people were more likely to survive?” To answer the above stated question we are going to give the model different input variables such as age, type of cabin the passanger had, etc

In [32]:
#Getting the dataset
key = "titanic_dataset"
description_text = "Titanic Dataset for Model Deployment"

dataset = TabularDatasetFactory.from_delimited_files('https://www.openml.org/data/get_csv/16826755/phpMYEkMl')

#Register Dataset and it will store into the Datasets->Registered Dataset
dataset = dataset.register(workspace=ws,
                           name=key,
                           description=description_text)

#Create a df out of the registered dataset
dataset = dataset.to_pandas_dataframe()
dataset.describe()

Unnamed: 0,pclass,survived,sibsp,parch,fare
count,1309.0,1309.0,1309.0,1309.0,1308.0
mean,2.294882,0.381971,0.498854,0.385027,33.295479
std,0.837836,0.486055,1.041658,0.86556,51.758668
min,1.0,0.0,0.0,0.0,0.0
25%,2.0,0.0,0.0,0.0,7.8958
50%,3.0,0.0,0.0,0.0,14.4542
75%,3.0,1.0,1.0,0.0,31.275
max,3.0,1.0,8.0,9.0,512.3292


In [33]:
###Be sure to have the compute target setup
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.core.compute_target import ComputeTargetException


# Choose a name for your CPU cluster
amlcompute_cluster_name = "compute-cluster"

# Verify that cluster does not exist already
try:
    compute_target = ComputeTarget(workspace=ws, name=amlcompute_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_DS12_V2',# for GPU, use "STANDARD_NC6"
                                                           #vm_priority = 'lowpriority', # optional
                                                           #min_nodes=1,
                                                           max_nodes=5)
    compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, compute_config)

compute_target.wait_for_completion(show_output=True)


CreatingAmlCompute is getting created. Consider calling wait_for_completion() first

AmlCompute is getting created. Consider calling wait_for_completion() first


Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


### Clean Data:

In [34]:
#Import your clean data function from the train.py file
from train import clean_data

In [35]:
#apply the function
x, y = clean_data(dataset)

In [36]:
#Scale the features
from sklearn.preprocessing import StandardScaler

In [37]:
# create scaler
variables = x.columns.tolist()

scaler = StandardScaler()
scaler.fit(x[variables]) 

x = scaler.transform(x[variables])

In [38]:
x = pd.DataFrame(x,columns=variables)
x

Unnamed: 0,pclass,age,sibsp,parch,fare,age_NA,fare_NA,sex_male,cabin_Missing,cabin_Rare,embarked_Q,embarked_Rare,embarked_S,title_Mr,title_Mrs,title_Rare
0,-1.546098,-0.039005,-0.479087,-0.445000,3.442584,-0.501432,-0.02765,-1.344995,-1.853992,2.347858,-0.32204,-0.039118,0.657394,-1.172894,-0.425920,-0.274947
1,-1.546098,-2.215952,0.481288,1.866526,2.286639,-0.501432,-0.02765,0.743497,-1.853992,-0.425920,-0.32204,-0.039118,0.657394,-1.172894,-0.425920,3.637068
2,-1.546098,-2.131977,0.481288,1.866526,2.286639,-0.501432,-0.02765,-1.344995,-1.853992,-0.425920,-0.32204,-0.039118,0.657394,-1.172894,-0.425920,-0.274947
3,-1.546098,0.038512,0.481288,1.866526,2.286639,-0.501432,-0.02765,0.743497,-1.853992,-0.425920,-0.32204,-0.039118,0.657394,0.852592,-0.425920,-0.274947
4,-1.546098,-0.349075,0.481288,1.866526,2.286639,-0.501432,-0.02765,-1.344995,-1.853992,-0.425920,-0.32204,-0.039118,0.657394,-1.172894,2.347858,-0.274947
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1304,0.841916,-1.163009,0.481288,-0.445000,-0.364003,-0.501432,-0.02765,-1.344995,0.539377,-0.425920,-0.32204,-0.039118,-1.521159,-1.172894,-0.425920,-0.274947
1305,0.841916,-0.116523,0.481288,-0.445000,-0.364003,1.994288,-0.02765,-1.344995,0.539377,-0.425920,-0.32204,-0.039118,-1.521159,-1.172894,-0.425920,-0.274947
1306,0.841916,-0.232799,-0.479087,-0.445000,-0.503774,-0.501432,-0.02765,0.743497,0.539377,-0.425920,-0.32204,-0.039118,-1.521159,0.852592,-0.425920,-0.274947
1307,0.841916,-0.194040,-0.479087,-0.445000,-0.503774,-0.501432,-0.02765,0.743497,0.539377,-0.425920,-0.32204,-0.039118,-1.521159,0.852592,-0.425920,-0.274947


In [39]:
#Split the data
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x,y,test_size = 0.3,random_state=0)

In [40]:
#bring them together them again
dataset = pd.concat([x_train,y_train],axis=1)

In [41]:
dataset.head(10)

Unnamed: 0,pclass,age,sibsp,parch,fare,age_NA,fare_NA,sex_male,cabin_Missing,cabin_Rare,embarked_Q,embarked_Rare,embarked_S,title_Mr,title_Mrs,title_Rare,survived
501,-0.352091,-1.279285,-0.479087,0.710763,-0.266447,-0.501432,-0.02765,-1.344995,0.539377,-0.42592,-0.32204,-0.039118,0.657394,-1.172894,-0.42592,-0.274947,1
588,-0.352091,-1.976943,0.481288,0.710763,-0.198777,-0.501432,-0.02765,-1.344995,0.539377,-0.42592,-0.32204,-0.039118,0.657394,-1.172894,-0.42592,-0.274947,1
402,-0.352091,0.038512,0.481288,-0.445,-0.375525,-0.501432,-0.02765,-1.344995,0.539377,-0.42592,-0.32204,-0.039118,-1.521159,-1.172894,-0.42592,-0.274947,1
1193,0.841916,-0.116523,-0.479087,-0.445,-0.494107,1.994288,-0.02765,0.743497,0.539377,-0.42592,3.105202,-0.039118,-1.521159,0.852592,-0.42592,-0.274947,0
686,0.841916,-0.581628,-0.479087,-0.445,-0.494107,-0.501432,-0.02765,-1.344995,0.539377,-0.42592,3.105202,-0.039118,-1.521159,-1.172894,-0.42592,-0.274947,1
971,0.841916,-0.116523,-0.479087,-0.445,-0.491126,1.994288,-0.02765,0.743497,0.539377,-0.42592,3.105202,-0.039118,-1.521159,0.852592,-0.42592,-0.274947,0
117,-1.546098,0.038512,-0.479087,-0.445,0.457218,-0.501432,-0.02765,-1.344995,-1.853992,2.347858,-0.32204,-0.039118,-1.521159,-1.172894,-0.42592,-0.274947,1
540,-0.352091,-2.131977,0.481288,0.710763,-0.140774,-0.501432,-0.02765,-1.344995,0.539377,-0.42592,-0.32204,-0.039118,0.657394,-1.172894,-0.42592,-0.274947,1
294,-1.546098,1.511344,0.481288,0.710763,1.500379,-0.501432,-0.02765,0.743497,-1.853992,-0.42592,-0.32204,-0.039118,-1.521159,0.852592,-0.42592,-0.274947,0
261,-1.546098,0.426099,-0.479087,-0.445,-0.135216,-0.501432,-0.02765,0.743497,-1.853992,2.347858,-0.32204,-0.039118,0.657394,0.852592,-0.42592,-0.274947,1


In [42]:
#To train the model we need a TabularDataset and not a dataframe, therefore the current df will be converterd 
#into a TabularDataset:

#Convert the dataframe into a csv
local_path = 'prepared.csv'

#Save it locally
dataset.to_csv(local_path,index=None)

#Generate the a datastore object which is the the default datastore
datastore = ws.get_default_datastore()

In [43]:
#Upload the dataframe which was previosly converted into a csv
datastore.upload(src_dir='.', target_path='data')

Uploading an estimated of 13 files
Target already exists. Skipping upload for data/.amlignore
Target already exists. Skipping upload for data/.amlignore.amltmp
Target already exists. Skipping upload for data/AzureAutoML.ipynb
Target already exists. Skipping upload for data/prepared.csv
Target already exists. Skipping upload for data/pre_preprocess.py
Target already exists. Skipping upload for data/score.py
Target already exists. Skipping upload for data/train.py
Target already exists. Skipping upload for data/.ipynb_checkpoints/AzureAutoML-checkpoint.ipynb
Target already exists. Skipping upload for data/__pycache__/pre_preprocess.cpython-36.pyc
Target already exists. Skipping upload for data/__pycache__/train.cpython-36.pyc
Uploading ./azureml_automl.log
Uploaded ./azureml_automl.log, 1 files out of an estimated total of 10
Uploading ./automl.log
Uploaded ./automl.log, 2 files out of an estimated total of 11
Uploading ./automl_errors.log
Uploaded ./automl_errors.log, 3 files out of an 

$AZUREML_DATAREFERENCE_5510bbb0779349feab64b05b1f556fb8

In [44]:
#For the sake of checking; check the path
datastore.path()

$AZUREML_DATAREFERENCE_workspaceblobstore

In [45]:
#Now the uploaded file will be transformed into a Tabular dataset and store in a varible named 'training_dataset'
training_dataset = Dataset.Tabular.from_delimited_files("https://raw.githubusercontent.com/ddgope/Titanic-Survival-Prediction/master/prepared.csv")

In [46]:
#let's visualize the data:
training_dataset.to_pandas_dataframe().head()

Unnamed: 0,pclass,age,sibsp,parch,fare,age_NA,fare_NA,sex_male,cabin_Missing,cabin_Rare,embarked_Q,embarked_Rare,embarked_S,title_Mr,title_Mrs,title_Rare,survived
0,-0.352091,-0.116523,-0.479087,0.710763,-0.005435,-0.501432,-0.02765,0.743497,0.539377,-0.42592,-0.32204,-0.039118,0.657394,-1.172894,-0.42592,3.637068,0
1,-0.352091,0.038512,-0.479087,-0.445,-0.404686,-0.501432,-0.02765,-1.344995,0.539377,-0.42592,3.105202,-0.039118,-1.521159,-1.172894,-0.42592,-0.274947,1
2,-1.546098,0.581134,0.481288,-0.445,0.383183,-0.501432,-0.02765,0.743497,-1.853992,-0.42592,-0.32204,-0.039118,0.657394,0.852592,-0.42592,-0.274947,0
3,0.841916,-0.116523,-0.479087,-0.445,-0.493624,1.994288,-0.02765,-1.344995,0.539377,-0.42592,3.105202,-0.039118,-1.521159,-1.172894,-0.42592,-0.274947,1
4,0.841916,-0.19404,-0.479087,-0.445,-0.475981,-0.501432,-0.02765,0.743497,0.539377,-0.42592,-0.32204,-0.039118,0.657394,0.852592,-0.42592,-0.274947,0


## AutoML Configuration

Below we will chose the automl settings and cofiguration

In [47]:
#Create the automl settings which will be used as argurments in the automl config
automl_settings = {
    "experiment_timeout_minutes": 60,
    "max_concurrent_iterations": 4,
    "primary_metric" : 'accuracy'
}

#Create the automl_config
automl_config = AutoMLConfig(compute_target=compute_target,
                             task = "classification",
                             training_data=training_dataset,
                             label_column_name="survived",   
                             path = project_folder,
                             enable_early_stopping= True,
                             featurization= 'auto',
                             debug_log = "automl_errors.log",
                             **automl_settings
                            )

In [48]:
#Submitt the experiment
automl_run = experiment.submit(automl_config,show_output=True)

Running on remote.
No run_configuration provided, running on compute-cluster with default configuration
Running on remote compute: compute-cluster
Parent Run ID: AutoML_2367dc6d-0c89-4491-80d3-45662e0cbc66

Current status: FeaturesGeneration. Generating features for the dataset.
Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
DATA GUARDRAILS: 

TYPE:         Cross validation
STATUS:       DONE
DESCRIPTION:  Each iteration of the trained model was validated through cross-validation.
              
DETAILS:      
+---------------------------------+
|Number of folds                  |
|10                               |
+---------------------------------+

****************************************************************************************************

TYPE:         Class balancing detection
STATUS:       PASSED
DESCRIPTION:  Your inputs were analyzed, and all classes are b

In [49]:
#Additional Run Details
from azureml.widgets import RunDetails

RunDetails(automl_run).show()

# wait for completion
automl_run.wait_for_completion()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

{'runId': 'AutoML_2367dc6d-0c89-4491-80d3-45662e0cbc66',
 'target': 'compute-cluster',
 'status': 'Completed',
 'startTimeUtc': '2021-02-24T02:36:07.753471Z',
 'endTimeUtc': '2021-02-24T03:35:27.114463Z',
 'properties': {'num_iterations': '1000',
  'training_type': 'TrainFull',
  'acquisition_function': 'EI',
  'primary_metric': 'accuracy',
  'train_split': '0',
  'acquisition_parameter': '0',
  'num_cross_validation': None,
  'target': 'compute-cluster',
  'DataPrepJsonString': '{\\"training_data\\": \\"{\\\\\\"blocks\\\\\\": [{\\\\\\"id\\\\\\": \\\\\\"2e0b6cb7-2f40-4d67-814f-e25b228af6cb\\\\\\", \\\\\\"type\\\\\\": \\\\\\"Microsoft.DPrep.GetFilesBlock\\\\\\", \\\\\\"arguments\\\\\\": {\\\\\\"isArchive\\\\\\": false, \\\\\\"path\\\\\\": {\\\\\\"target\\\\\\": 4, \\\\\\"resourceDetails\\\\\\": [{\\\\\\"path\\\\\\": \\\\\\"https://raw.githubusercontent.com/ddgope/Titanic-Survival-Prediction/master/prepared.csv\\\\\\"}]}}, \\\\\\"localData\\\\\\": {}, \\\\\\"isEnabled\\\\\\": true, \\\\\

In [50]:
#Get generic outputs from the automl_run
automl_run.get_output()

Package:azureml-automl-runtime, training version:1.22.0, current version:1.20.0
Package:azureml-core, training version:1.22.0, current version:1.20.0
Package:azureml-dataprep, training version:2.9.1, current version:2.7.3
Package:azureml-dataprep-native, training version:29.0.0, current version:27.0.0
Package:azureml-dataprep-rslex, training version:1.7.0, current version:1.5.0
Package:azureml-dataset-runtime, training version:1.22.0, current version:1.20.0
Package:azureml-defaults, training version:1.22.0, current version:1.20.0
Package:azureml-interpret, training version:1.22.0, current version:1.20.0
Package:azureml-pipeline-core, training version:1.22.0, current version:1.20.0
Package:azureml-telemetry, training version:1.22.0, current version:1.20.0
Package:azureml-train-automl-client, training version:1.22.0, current version:1.20.0
Package:azureml-train-automl-runtime, training version:1.22.0, current version:1.20.0


(Run(Experiment: Titanic-AutoML-Experiment,
 Id: AutoML_2367dc6d-0c89-4491-80d3-45662e0cbc66_36,
 Type: azureml.scriptrun,
 Status: Completed),
 Pipeline(memory=None,
          steps=[('datatransformer',
                  DataTransformer(enable_dnn=None, enable_feature_sweeping=None,
                                  feature_sweeping_config=None,
                                  feature_sweeping_timeout=None,
                                  featurization_config=None, force_text_dnn=None,
                                  is_cross_validation=None,
                                  is_onnx_compatible=None, logger=None,
                                  observer=None, task=None, working_dir=None)),
                 ('prefittedsoftvotingclassifier',...
                                                                                                 min_child_samples=29,
                                                                                                 min_child_weight=4,
  

In [51]:
#Get the best model outputs
best_automl_run, best_model = automl_run.get_output()


# Retrieve the best automl run model
print('Best AutoML run: ', best_automl_run)
print('Best AutoML model :', best_model)

# get best model and display properties
model_name = best_automl_run.properties['model_name']
print('Best_model name: ', model_name)

# display all the properties of the best model
best_automl_run.get_properties()

Package:azureml-automl-runtime, training version:1.22.0, current version:1.20.0
Package:azureml-core, training version:1.22.0, current version:1.20.0
Package:azureml-dataprep, training version:2.9.1, current version:2.7.3
Package:azureml-dataprep-native, training version:29.0.0, current version:27.0.0
Package:azureml-dataprep-rslex, training version:1.7.0, current version:1.5.0
Package:azureml-dataset-runtime, training version:1.22.0, current version:1.20.0
Package:azureml-defaults, training version:1.22.0, current version:1.20.0
Package:azureml-interpret, training version:1.22.0, current version:1.20.0
Package:azureml-pipeline-core, training version:1.22.0, current version:1.20.0
Package:azureml-telemetry, training version:1.22.0, current version:1.20.0
Package:azureml-train-automl-client, training version:1.22.0, current version:1.20.0
Package:azureml-train-automl-runtime, training version:1.22.0, current version:1.20.0


Best AutoML run:  Run(Experiment: Titanic-AutoML-Experiment,
Id: AutoML_2367dc6d-0c89-4491-80d3-45662e0cbc66_36,
Type: azureml.scriptrun,
Status: Completed)
Best AutoML model : Pipeline(memory=None,
         steps=[('datatransformer',
                 DataTransformer(enable_dnn=None, enable_feature_sweeping=None,
                                 feature_sweeping_config=None,
                                 feature_sweeping_timeout=None,
                                 featurization_config=None, force_text_dnn=None,
                                 is_cross_validation=None,
                                 is_onnx_compatible=None, logger=None,
                                 observer=None, task=None, working_dir=None)),
                ('prefittedsoftvotingclassifier',...
                                                                                                min_child_samples=29,
                                                                                                m

{'runTemplate': 'automl_child',
 'pipeline_id': '__AutoML_Ensemble__',
 'pipeline_spec': '{"pipeline_id":"__AutoML_Ensemble__","objects":[{"module":"azureml.train.automl.ensemble","class_name":"Ensemble","spec_class":"sklearn","param_args":[],"param_kwargs":{"automl_settings":"{\'task_type\':\'classification\',\'primary_metric\':\'accuracy\',\'verbosity\':20,\'ensemble_iterations\':15,\'is_timeseries\':False,\'name\':\'Titanic-AutoML-Experiment\',\'compute_target\':\'compute-cluster\',\'subscription_id\':\'976ee174-3882-4721-b90a-b5fef6b72f24\',\'region\':\'southcentralus\',\'spark_service\':None}","ensemble_run_id":"AutoML_2367dc6d-0c89-4491-80d3-45662e0cbc66_36","experiment_name":"Titanic-AutoML-Experiment","workspace_name":"quick-starts-ws-139405","subscription_id":"976ee174-3882-4721-b90a-b5fef6b72f24","resource_group_name":"aml-quickstarts-139405"}}]}',
 'training_percent': '100',
 'predicted_cost': None,
 'iteration': '36',
 '_aml_system_scenario_identification': 'Remote.Child',


In [53]:
#Get the best model id
print(best_automl_run.id)

AutoML_2367dc6d-0c89-4491-80d3-45662e0cbc66_36


# Save the best AutoML model

In [54]:
import joblib

joblib.dump(best_model, 'best_automl_model.pkl')

['best_automl_model.pkl']

# Azure ML Studio

I will be using the azurenl sdk for Python for depoyment.
Ref: https://docs.microsoft.com/en-us/python/api/overview/azure/ml/?view=azure-ml-py

# Install Required Libraries

In [None]:
#pip install azureml , azureml-core

# Import the libraries

In [55]:
# #Prepare deploying of the model as a web service
# from azureml.core import Workspace
# from azureml.core import Workspace
# from azureml.core.model import Model

from azureml.core.webservice import AciWebservice
from azureml.core.webservice import Webservice
from azureml.core.model import InferenceConfig
from azureml.core.environment import Environment
#from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies

# Create Workspace

In [None]:
# In my case Workspace has alreday been created in top otherwise. 
# Need to create using below command

# ws = Workspace.create(
#                name='myworkspace',            
#                subscription_id='<azure-subscription-id>',           
#                resource_group='myresourcegroup',                 
#                create_resource_group=True,                 
#                location='eastus2'                
#                )

#Once it is created, save the details of the workspace in a config.json 
#file to use the workspace later.
# ws.write_config()

# Create a folder named ‘.azureml’ and save the json file inside it. 
# Make sure you name the folder and json file correctly.
# Python will look for the config.json file inside this folder 
# while loading up your workspace

# You won’t need to create a workspace every time, in the future, 
# you can use the following command to load the workspace
# #ws = Workspace.from_config()

# Register the Model
1. workspace: The workspace object we created
2. model_path: the path to the pickle file
3. model_name: Name of the model on Azure MLS
4. tags: Although not necessary, you can add tags to your model
5. description: A description of your model

In [56]:
# register best AutoML model for future deployment
from azureml.core.model import Model
description = 'AutoML Model trained on the titanic dataset'
tags = {'area': 'data science beginners', 'type': 'classification'}

automl_model = Model.register(workspace =ws,
                              model_name = 'best-titanicMLmodel',
                              model_path = 'best_automl_model.pkl',
                             description = description, tags = tags)

print('AutoML RunID: ', automl_run.id, sep='\t')

Registering model best-titanicMLmodel
AutoML RunID: 	AutoML_2367dc6d-0c89-4491-80d3-45662e0cbc66


# Create an Environment
Environment was alreday created otherwise create using below command

In [None]:
## to install required packages
# env = Environment('env')
# cd = CondaDependencies.create(pip_packages=['pandas==1.1.5', 'azureml-defaults','joblib==0.17.0'], conda_packages = ['scikit-learn==0.23.2'])
# env.python.conda_dependencies = cd
## Register environment to re-use later
# env.register(workspace = ws)
# print("Registered Environment")

In [57]:
#get’s the environment whuich just created or previously created
env = Environment.get(workspace=ws, name='AzureML-AutoML')

In [None]:
#Confirm that all the required libraries have been installed by creating a .yml 
#based on the created environment. Ensure all the libraries are mentioned inside the .yml file

In [58]:
#This will create a new folder called environ with a .yml and a .json file inside it
env.save_to_directory('./environ', overwrite=True)

In [68]:
#Chekc environment dependencies
print("packages", env.python.conda_dependencies.serialize_to_string())

#get the environment Details and stored them into a file:
f = open("env.yml", "w")
f.write(env.python.conda_dependencies.serialize_to_string())
f.close()

packages channels:
- anaconda
- conda-forge
- pytorch
dependencies:
- python=3.6.2
- pip=20.2.4
- pip:
  - azureml-core==1.22.0
  - azureml-pipeline-core==1.22.0
  - azureml-telemetry==1.22.0
  - azureml-defaults==1.22.0
  - azureml-interpret==1.22.0
  - azureml-automl-core==1.22.0
  - azureml-automl-runtime==1.22.0
  - azureml-train-automl-client==1.22.0
  - azureml-train-automl-runtime==1.22.0
  - azureml-dataset-runtime==1.22.0
  - inference-schema
  - py-cpuinfo==5.0.0
  - boto3==1.15.18
  - botocore==1.18.18
- numpy~=1.18.0
- scikit-learn==0.22.1
- pandas~=0.25.0
- py-xgboost<=0.90
- fbprophet==0.5
- holidays==0.9.11
- setuptools-git
- psutil>5.0.0,<6.0.0
name: azureml_09ff55f546b313bb1ab136a466214499



# Config Objects
Create a container instance and set the number of cpu_cores and memory_gb based on your requirements.

In [60]:
aci_config = AciWebservice.deploy_configuration(
            cpu_cores=1,
            memory_gb=4, 
            enable_app_insights=True,
            auth_enabled=True,
            tags={"data":"titanic classifier"},
            description='titanic classification Model',
            )

In [61]:
#Create and InferenceConfig instance to link the environment and entry script. 
inference_config = InferenceConfig(entry_script='score.py', environment=env)

#In my case Entry Script is already create 
#and stored in the path otherwise you can create using below command
#The entry script will two functions, an init function and a run function.
#     Create a global variable called model
#     Load the model using the name of the model you registered earlier
#     Load the model from the path
# def init():
#     global modelmodel_path = Model.get_model_path("knn")
#     print("Model Path is  ", model_path)
#     model = joblib.load(model_path)

# #Save this file.
# def run(data):
#     try:
#        print(data)
#        result = model.predict(data['data']) 
#        return {'data' : result.tolist() , 'message' : "Successfully  classified Titanic"}
#     except Exception as e:
#            error = str(e)
#            return {'data' : error , 'message' : 'Failed to classify Titanic'}

# Deploy the Model

In [62]:
#deploy the model by combining our config objects, workspace and model together. 
service_name = 'titanic-ml-service'

model = Model(ws,name='best-titanicMLmodel')
service = Model.deploy(workspace=ws,
                       name=service_name,
                       models=[model],
                       inference_config=inference_config,
                       deployment_config=aci_config,
                       overwrite=True)

service.wait_for_deployment(show_output=True)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running...........................................
Succeeded
ACI service creation operation finished, operation "Succeeded"


In [63]:
#deployment is successful, the rest endpoint will be printed out.
# If it is unsuccessful, see the deployment logs
service.get_logs()

'2021-02-24T03:40:53,553384900+00:00 - gunicorn/run \n2021-02-24T03:40:53,561901200+00:00 - iot-server/run \n2021-02-24T03:40:53,564371500+00:00 - rsyslog/run \n2021-02-24T03:40:53,605945100+00:00 - nginx/run \n/usr/sbin/nginx: /azureml-envs/azureml_09ff55f546b313bb1ab136a466214499/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)\n/usr/sbin/nginx: /azureml-envs/azureml_09ff55f546b313bb1ab136a466214499/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)\n/usr/sbin/nginx: /azureml-envs/azureml_09ff55f546b313bb1ab136a466214499/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)\n/usr/sbin/nginx: /azureml-envs/azureml_09ff55f546b313bb1ab136a466214499/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)\n/usr/sbin/nginx: /azureml-envs/azureml_09ff55f546b313bb1ab136a466214499/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)

In [64]:
# print service state
print(service.state)
# print scoring URI
print('scoring URI: ' + service.scoring_uri)
# print Swagger URI
print('Swagger URI: ' + service.swagger_uri)
# retrieve authentication keys
primary, secondary = service.get_keys()
# print primary authenticaton key
print('Primary Authentication Key: ' + primary)

Healthy
scoring URI: http://c21f86a8-ecdb-43a3-a25e-2887c610f5b2.southcentralus.azurecontainer.io/score
Swagger URI: http://c21f86a8-ecdb-43a3-a25e-2887c610f5b2.southcentralus.azurecontainer.io/swagger.json
Primary Authentication Key: ZwRGMEfMrOaSy8dMLO4uhGnLAfK5PVkC


In [65]:
#Store the uri's in variables:
scoring_uri = 'http://c21f86a8-ecdb-43a3-a25e-2887c610f5b2.southcentralus.azurecontainer.io/score'

key = 'ZwRGMEfMrOaSy8dMLO4uhGnLAfK5PVkC'

# Consume the Endpoint

In [66]:
#let's test requests:
import json
import requests

scoring_uri = scoring_uri
key = key

headers = {'Content-Type':'application/json'}
headers['Authorization'] = f'Bearer {key}'


test_data = json.dumps({'data':[{
    'pclass': 0.8419164182590155,
    'age': -0.34907541344456255,
    'sibsp': -0.47908676070718687,
    'parch': -0.444999501816175,
    'fare': -0.4902404567566683,
    'age_NA': -0.5014319838391105,
    'fare_NA': -0.027650063180466557,
    'sex_male': 0.743496915331831,
    'cabin_Missing': 0.5393765119990418,
    'cabin_Rare': -0.42592011250734235,
    'embarked_Q': -0.32204029159373954,
    'embarked_Rare': -0.03911805059269843,
    'embarked_S': 0.6573935670276714,
    'title_Mr': 0.8525918887485938,
    'title_Mrs': -0.42592011250734235,
    'title_Rare': -0.27494677157229536
    }
    ]
        })

test_data2 = json.dumps({'data':[{
    'pclass': -15460978645168200,
    'age': 0.8912042887450313,
    'sibsp': -0.47908676070718687,
    'parch': -0.444999501816175,
    'fare': 19569900306355100,
    'age_NA': -0.5014319838391105,
    'fare_NA': -0.027650063180466557,
    'sex_male': -13449954927569300,
    'cabin_Missing': -18539924853119600,
    'cabin_Rare': 23478581326275300,
    'embarked_Q': -0.32204029159373954,
    'embarked_Rare': -0.03911805059269843,
    'embarked_S': -15211587854766800,
    'title_Mr': -11728941046668400,
    'title_Mrs': -0.42592011250734235,
    'title_Rare': -0.27494677157229536

    }
    ]
        })


response1 = requests.post(scoring_uri, data=test_data, headers=headers)
response2 = requests.post(scoring_uri, data=test_data2, headers=headers)

print("Classification Prediction:",response1.text)
print("Classification Prediction:",response2.text)

Classification Prediction: [0]
Classification Prediction: [1]


In [None]:
#Delete Service:
service.delete()