# Automated ML

Import Dependencies.

In [1]:
import logging
import os
import csv

from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets
import pkg_resources

import azureml.core
from azureml.core.experiment import Experiment
from azureml.core.workspace import Workspace
from azureml.train.automl import AutoMLConfig
from azureml.core.dataset import Dataset

from azureml.pipeline.steps import AutoMLStep

# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

SDK version: 1.20.0


# Create Experiment

Create experiment to run AutoML

In [2]:

from azureml.core import Workspace, Experiment

ws = Workspace.from_config()

experiment = Experiment(workspace=ws, name="creditcardfraud-automl")

print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

run = experiment.start_logging(snapshot_directory=None)

Performing interactive authentication. Please follow the instructions on the terminal.
To sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code A6EEVNATF to authenticate.
You have logged in. Now let us find all the subscriptions to which you have access...
Interactive authentication successfully completed.
Workspace name: quick-starts-ws-137398
Azure region: southcentralus
Subscription id: cdbe0b43-92a0-4715-838a-f2648cc7ad21
Resource group: aml-quickstarts-137398


## Dataset

### Overview
The datasets contains transactions made by credit cards in September 2013 by european cardholders.

This dataset presents transactions that occurred in two days, where we have 492 frauds out of 284,807 transactions. The dataset is highly unbalanced, the positive class (frauds) account for 0.172% of all transactions.

It contains only numerical input variables which are the result of a PCA transformation. Unfortunately, due to confidentiality issues, we cannot provide the original features and more background information about the data. Features V1, V2, … V28 are the principal components obtained with PCA, the only features which have not been transformed with PCA are 'Time' and 'Amount'. Feature 'Time' contains the seconds elapsed between each transaction and the first transaction in the dataset. The feature 'Amount' is the transaction Amount, this feature can be used for example-dependant cost-senstive learning. Feature 'Class' is the response variable and it takes value 1 in case of fraud and 0 otherwise.

The dataset was downloaded from [kaggle](https://www.kaggle.com/mlg-ulb/creditcardfraud) and a copy from the dataset was saved into the [git repository](https://github.com/heber-augusto/Nanodegree_Azure_ML_Engineer_CapstoneProject/tree/master/data).


In [3]:
# Try to load the dataset from the Workspace. Otherwise, create it from the file
found = False
key = "creditcard"
description_text = "Creditcard dataset"
label = "Class"
if key in ws.datasets.keys(): 
        found = True
        dataset = ws.datasets[key] 

if not found:
        # Create AML Dataset and register it into Workspace
        print("download data from https://www.kaggle.com/mlg-ulb/creditcardfraud/download and create a dataset")
        # Create AML Dataset and register it into Workspace
        #example_data = 'https://raw.githubusercontent.com/heber-augusto/Nanodegree_Azure_ML_Engineer_CapstoneProject/master/data/creditcard.csv.zip'
        #dataset = Dataset.Tabular.from_delimited_files(example_data)
        #Register Dataset in Workspace
        #dataset = dataset.register(workspace=ws,
        #                           name=key,
        #                           description=description_text)        


df = dataset.to_pandas_dataframe()
df.describe()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
count,284806.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,...,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0,284807.0
mean,94813.841366,3.918649e-15,5.682686e-16,-8.761736e-15,2.811118e-15,-1.552103e-15,2.04013e-15,-1.698953e-15,-1.958151e-16,-3.14764e-15,...,1.471982e-16,8.042109e-16,5.28245e-16,4.458267e-15,1.426896e-15,1.70164e-15,-3.671606e-16,-1.218152e-16,88.349619,0.001727
std,47488.22833,1.958696,1.651309,1.516255,1.415869,1.380247,1.332271,1.237094,1.194353,1.098632,...,0.734524,0.7257016,0.6244603,0.6056471,0.5212781,0.482227,0.4036325,0.3300833,250.120109,0.041527
min,0.0,-56.40751,-72.71573,-48.32559,-5.683171,-113.7433,-26.16051,-43.55724,-73.21672,-13.43407,...,-34.83038,-10.93314,-44.80774,-2.836627,-10.2954,-2.604551,-22.56568,-15.43008,0.0,0.0
25%,54201.25,-0.9203734,-0.5985499,-0.8903648,-0.8486401,-0.6915971,-0.7682956,-0.5540759,-0.2086297,-0.6430976,...,-0.2283949,-0.5423504,-0.1618463,-0.3545861,-0.3171451,-0.3269839,-0.07083953,-0.05295979,5.6,0.0
50%,84691.5,0.0181088,0.06548556,0.1798463,-0.01984653,-0.05433583,-0.2741871,0.04010308,0.02235804,-0.05142873,...,-0.02945017,0.006781943,-0.01119293,0.04097606,0.0165935,-0.05213911,0.001342146,0.01124383,22.0,0.0
75%,139320.75,1.315642,0.8037239,1.027196,0.7433413,0.6119264,0.3985649,0.5704361,0.3273459,0.597139,...,0.1863772,0.5285536,0.1476421,0.4395266,0.3507156,0.2409522,0.09104512,0.07827995,77.165,0.0
max,172792.0,2.45493,22.05773,9.382558,16.87534,34.80167,73.30163,120.5895,20.00721,15.59499,...,27.20284,10.50309,22.52841,4.584549,7.519589,3.517346,31.6122,33.84781,25691.16,1.0


In [4]:
dataset.take(5).to_pandas_dataframe()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


### Create cluster to run AutoML

In [5]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# choose a name for your cluster
cluster_name = "cpu-cluster-aml"

try:
    compute_target = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing compute target')
except ComputeTargetException:
    print('Creating a new compute target...')
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', 
                                                           min_nodes=1,
                                                           max_nodes=6)

    # create the cluster
    compute_target = ComputeTarget.create(ws, cluster_name, compute_config)

# can poll for a minimum number of nodes and for a specific timeout. 
# if no min node count is provided it uses the scale settings for the cluster
compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)

# use get_status() to get a detailed status for the current cluster. 
print(compute_target.get_status().serialize())

Creating a new compute target...
Creating
Succeeded.......................
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned
{'currentNodeCount': 1, 'targetNodeCount': 1, 'nodeStateCounts': {'preparingNodeCount': 1, 'runningNodeCount': 0, 'idleNodeCount': 0, 'unusableNodeCount': 0, 'leavingNodeCount': 0, 'preemptedNodeCount': 0}, 'allocationState': 'Steady', 'allocationStateTransitionTime': '2021-02-06T03:05:24.245000+00:00', 'errors': None, 'creationTime': '2021-02-06T03:03:19.449953+00:00', 'modifiedTime': '2021-02-06T03:03:35.918188+00:00', 'provisioningState': 'Succeeded', 'provisioningStateTransitionTime': None, 'scaleSettings': {'minNodeCount': 1, 'maxNodeCount': 6, 'nodeIdleTimeBeforeScaleDown': 'PT120S'}, 'vmPriority': 'Dedicated', 'vmSize': 'STANDARD_D2_V2'}


In [6]:
import os

project_folder = './fraud-detection-automl'
os.makedirs(project_folder, exist_ok=True)

### Split dataset into train and test

In [7]:
from sklearn.model_selection import train_test_split
import pandas as pd
from azureml.core.dataset import Dataset
from train import clean_data

# Get label and features into separate dataframes
x, y = clean_data(dataset.to_pandas_dataframe())

# Split data into train and test sets.
x_train, x_test, y_train, y_test = train_test_split(
    x, 
    y , 
    test_size=0.33, 
    random_state=42)

training_data=pd.concat([x_train,y_train], axis=1)
testing_data=pd.concat([x_test,y_test], axis=1)


if not os.path.isdir('data'):
    os.mkdir('data')
    
# Save the train data to a csv to be uploaded to the datastore
pd.DataFrame(training_data).to_csv("data/train_data.csv", index=False)
pd.DataFrame(testing_data).to_csv("data/test_data.csv", index=False)

ds = ws.get_default_datastore()
ds.upload(
    src_dir='./data', 
    target_path='fraud-detection', 
    overwrite=True, 
    show_progress=True)

# Upload the training data as a tabular dataset for access during training on remote compute
train_data = Dataset.Tabular.from_delimited_files(path=ds.path('fraud-detection/train_data.csv'))

test_data = Dataset.Tabular.from_delimited_files(path=ds.path('fraud-detection/test_data.csv'))


Uploading an estimated of 2 files
Uploading ./data/test_data.csv
Uploaded ./data/test_data.csv, 1 files out of an estimated total of 2
Uploading ./data/train_data.csv
Uploaded ./data/train_data.csv, 2 files out of an estimated total of 2
Uploaded 2 files


## AutoML Configuration

The experiment timeout was set to 1h to avoid losing work inside Udacity workspace (wich has time limit).

The max concurrent interations was set to 5 because it has to be less than the max nodes from cluster (which is 6).

The AUC_weighted was set as a primary metric to compare with HyperDrive Run. The AUC_weighted was set as a primary metric to compare with HyperDrive Run. This metric is more suitable for imbalanced dataset (which is  common with fraud detection datasets).


In [8]:
automl_settings = {
    "experiment_timeout_hours": 1,
    "max_concurrent_iterations": 5,
    "primary_metric" : 'AUC_weighted'
}

automl_config = AutoMLConfig(
    task='classification',
    compute_target=compute_target,
    enable_onnx_compatible_models=True,
    training_data=train_data,
    validation_data=test_data,
    label_column_name=label,   
    path = project_folder,
    enable_early_stopping= True,
    featurization= 'auto',
    debug_log = "automl_errors.log",
    **automl_settings)




In [9]:
from azureml.pipeline.core import PipelineData, TrainingOutput

ds = ws.get_default_datastore()
metrics_output_name = 'metrics_output'
best_model_output_name = 'best_model_output'

metrics_data = PipelineData(name='metrics_data',
                           datastore=ds,
                           pipeline_output_name=metrics_output_name,
                           training_output=TrainingOutput(type='Metrics'))
model_data = PipelineData(name='model_data',
                           datastore=ds,
                           pipeline_output_name=best_model_output_name,
                           training_output=TrainingOutput(type='Model'))

In [10]:
automl_step = AutoMLStep(
    name='automl_module',
    automl_config=automl_config,
    outputs=[metrics_data, model_data],
    allow_reuse=True)

In [11]:
from azureml.pipeline.core import Pipeline
pipeline = Pipeline(
    description="pipeline_with_automlstep",
    workspace=ws,    
    steps=[automl_step])

In [12]:
pipeline_run = experiment.submit(pipeline)

Created step automl_module [70c599a3][68aa2e17-56fd-43fa-ab6b-8d0e5b8c26b9], (This step will run and generate new outputs)
Submitted PipelineRun ee8c0cf1-5e98-437e-a191-d00c46e30290
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/creditcardfraud-automl/runs/ee8c0cf1-5e98-437e-a191-d00c46e30290?wsid=/subscriptions/cdbe0b43-92a0-4715-838a-f2648cc7ad21/resourcegroups/aml-quickstarts-137398/workspaces/quick-starts-ws-137398


## Run Details

In [13]:
from azureml.widgets import RunDetails
RunDetails(pipeline_run).show()

_PipelineWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', …

In [14]:
pipeline_run.wait_for_completion()

PipelineRunId: ee8c0cf1-5e98-437e-a191-d00c46e30290
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/creditcardfraud-automl/runs/ee8c0cf1-5e98-437e-a191-d00c46e30290?wsid=/subscriptions/cdbe0b43-92a0-4715-838a-f2648cc7ad21/resourcegroups/aml-quickstarts-137398/workspaces/quick-starts-ws-137398
PipelineRun Status: NotStarted
PipelineRun Status: Running


StepRunId: d3617007-abc9-42fc-bbe2-f90b28e122ec
Link to Azure Machine Learning Portal: https://ml.azure.com/experiments/creditcardfraud-automl/runs/d3617007-abc9-42fc-bbe2-f90b28e122ec?wsid=/subscriptions/cdbe0b43-92a0-4715-838a-f2648cc7ad21/resourcegroups/aml-quickstarts-137398/workspaces/quick-starts-ws-137398
StepRun( automl_module ) Status: NotStarted
StepRun( automl_module ) Status: Running

StepRun(automl_module) Execution Summary
StepRun( automl_module ) Status: Finished



PipelineRun Execution Summary
PipelineRun Status: Finished
{'runId': 'ee8c0cf1-5e98-437e-a191-d00c46e30290', 'status': 'Completed', 'st

'Finished'

## Best Model

Get the best model from the automl experiments and display all the properties of the model.



In [15]:
metrics_output = pipeline_run.get_pipeline_output(metrics_output_name)
num_file_downloaded = metrics_output.download('.', show_progress=True)

Downloading azureml/d3617007-abc9-42fc-bbe2-f90b28e122ec/metrics_data
Downloaded azureml/d3617007-abc9-42fc-bbe2-f90b28e122ec/metrics_data, 1 files out of an estimated total of 1


In [16]:

import json
with open(metrics_output._path_on_datastore) as f:
    metrics_output_result = f.read()
    
deserialized_metrics_output = json.loads(metrics_output_result)
df = pd.DataFrame(deserialized_metrics_output)
df

Unnamed: 0,d3617007-abc9-42fc-bbe2-f90b28e122ec_9,d3617007-abc9-42fc-bbe2-f90b28e122ec_0,d3617007-abc9-42fc-bbe2-f90b28e122ec_12,d3617007-abc9-42fc-bbe2-f90b28e122ec_3,d3617007-abc9-42fc-bbe2-f90b28e122ec_32,d3617007-abc9-42fc-bbe2-f90b28e122ec_28,d3617007-abc9-42fc-bbe2-f90b28e122ec_4,d3617007-abc9-42fc-bbe2-f90b28e122ec_19,d3617007-abc9-42fc-bbe2-f90b28e122ec_21,d3617007-abc9-42fc-bbe2-f90b28e122ec_35,...,d3617007-abc9-42fc-bbe2-f90b28e122ec_6,d3617007-abc9-42fc-bbe2-f90b28e122ec_14,d3617007-abc9-42fc-bbe2-f90b28e122ec_34,d3617007-abc9-42fc-bbe2-f90b28e122ec_17,d3617007-abc9-42fc-bbe2-f90b28e122ec_18,d3617007-abc9-42fc-bbe2-f90b28e122ec_42,d3617007-abc9-42fc-bbe2-f90b28e122ec_29,d3617007-abc9-42fc-bbe2-f90b28e122ec_31,d3617007-abc9-42fc-bbe2-f90b28e122ec_20,d3617007-abc9-42fc-bbe2-f90b28e122ec_27
balanced_accuracy,[0.5],[0.6861589178347757],[0.85475345000844],[0.5],[0.9031991628050569],[0.5],[0.9485047757447242],[0.9286117353746148],[0.7934791136157284],[0.8611944054530434],...,[0.5],[0.5],[0.9064249692566697],[0.8806078601851441],[0.5],[0.9064196405273586],[0.8096028171444818],[0.8805119430575423],[0.8934887397073874],[0.8934844423450397]
precision_score_micro,[0.998350818206967],[0.9970953120677548],[0.9993509671653225],[0.998350818206967],[0.9996276041112506],[0.998350818206967],[0.9807737322579959],[0.9796352648266763],[0.9991807290447513],[0.9993509671653225],...,[0.998350818206967],[0.998350818206967],[0.9996382439937863],[0.9995318451684293],[0.998350818206967],[0.9996276041112506],[0.9992232885748942],[0.9993403272827868],[0.9673781201455536],[0.9995212052858936]
accuracy,[0.998350818206967],[0.9970953120677548],[0.9993509671653225],[0.998350818206967],[0.9996276041112506],[0.998350818206967],[0.9807737322579959],[0.9796352648266763],[0.9991807290447513],[0.9993509671653225],...,[0.998350818206967],[0.998350818206967],[0.9996382439937863],[0.9995318451684293],[0.998350818206967],[0.9996276041112506],[0.9992232885748942],[0.9993403272827868],[0.9673781201455536],[0.9995212052858936]
norm_macro_recall,[0.0],[0.37231783566955134],[0.7095069000168801],[0.0],[0.8063983256101137],[0.0],[0.8970095514894485],[0.8572234707492297],[0.5869582272314569],[0.7223888109060868],...,[0.0],[0.0],[0.8128499385133394],[0.7612157203702883],[0.0],[0.8128392810547171],[0.6192056342889636],[0.7610238861150846],[0.7869774794147748],[0.7869688846900793]
AUC_macro,[0.976392250858699],[0.6329690201429405],[0.9516750946537031],[0.9527657652175617],[0.982266109178444],[0.9637780484543075],[0.9768256312567447],[0.9639708109397782],[0.9833432172667331],[0.974908491966167],...,[0.9695606823661345],[0.9630134273664972],[0.9832975277102518],[0.9255436593106137],[0.9672367719451684],[0.9903830531281188],[0.975148181648475],[0.9800255847764736],[0.9491234240283062],[0.9855967197029938]
f1_score_weighted,[0.9975269078217406],[0.9973896494492679],[0.9993175262879587],[0.9975269078217406],[0.9996112957598924],[0.9975269078217406],[0.9888695692369674],[0.9882708374338963],[0.9991001785269745],[0.9993225440386695],...,[0.9975269078217406],[0.9975269078217406],[0.999623088549539],[0.9995068028593841],[0.9975269078217406],[0.9996127051425352],[0.9991574344810088],[0.9993270663717244],[0.9819001670722481],[0.9995038364165512]
recall_score_weighted,[0.998350818206967],[0.9970953120677548],[0.9993509671653225],[0.998350818206967],[0.9996276041112506],[0.998350818206967],[0.9807737322579959],[0.9796352648266763],[0.9991807290447513],[0.9993509671653225],...,[0.998350818206967],[0.998350818206967],[0.9996382439937863],[0.9995318451684293],[0.998350818206967],[0.9996276041112506],[0.9992232885748942],[0.9993403272827868],[0.9673781201455536],[0.9995212052858936]
average_precision_score_weighted,[0.9993201736425504],[0.997318922034557],[0.9994279910576342],[0.9993579109791756],[0.9997386087230509],[0.999416202923216],[0.9994525630290533],[0.9992707653335888],[0.9995997798084264],[0.9995667647484479],...,[0.9994128092481166],[0.9993884540154188],[0.999751882514823],[0.9994741657493283],[0.9993634419362656],[0.9997654554629227],[0.9995717795555169],[0.9996439613400923],[0.999006415749044],[0.9997305633576558]
precision_score_macro,[0.4991754091034835],[0.6234143016344996],[0.9362682177779131],[0.4991754091034835],[0.9806094114715833],[0.4991754091034835],[0.5366029399221588],[0.5333777324184616],[0.9371591465882704],[0.9305401564426028],...,[0.4991754091034835],[0.4991754091034835],[0.9807615369005543],[0.9718029000330275],[0.4991754091034835],[0.9771182319928244],[0.9360493920413389],[0.9123902748147498],[0.5199090417824983],[0.9550480718769023]
matthews_correlation,[0.0],[0.30314796939853006],[0.7868104101643498],[0.0],[0.8804119770688572],[0.0],[0.25625450912236],[0.23921611830933437],[0.7163716321846753],[0.7886918177081493],...,[0.0],[0.0],[0.8840667233972719],[0.8475184769920149],[0.0],[0.8807047640056742],[0.7348526932389569],[0.7922611306073126],[0.17701958942192783],[0.8462962526271011]


In [17]:
# Retrieve best model from Pipeline Run
best_model_output = pipeline_run.get_pipeline_output(best_model_output_name)
num_file_downloaded = best_model_output.download('.', show_progress=True)

Downloading azureml/d3617007-abc9-42fc-bbe2-f90b28e122ec/model_data
Downloaded azureml/d3617007-abc9-42fc-bbe2-f90b28e122ec/model_data, 1 files out of an estimated total of 1


In [18]:
import pickle

with open(best_model_output._path_on_datastore, "rb" ) as f:
    best_model = pickle.load(f)
best_model

Pipeline(memory=None,
         steps=[('datatransformer',
                 DataTransformer(enable_dnn=None, enable_feature_sweeping=None,
                                 feature_sweeping_config=None,
                                 feature_sweeping_timeout=None,
                                 featurization_config=None, force_text_dnn=None,
                                 is_cross_validation=None,
                                 is_onnx_compatible=None, logger=None,
                                 observer=None, task=None, working_dir=None)),
                ('prefittedsoftvotingclassifier',...
                                                                                                  min_samples_split=0.15052631578947367,
                                                                                                  min_weight_fraction_leaf=0.0,
                                                                                                  n_estimators=25,
            

In [19]:

from sklearn.metrics import confusion_matrix
import pandas as pd
ypred = best_model.predict(x_test)
cm = confusion_matrix(y_test, ypred)
# Visualize the confusion matrix
pd.DataFrame(cm).style.background_gradient(cmap='Blues', low=0, high=0.9)

Unnamed: 0,0,1
0,93825,6
1,29,126


## Model Deployment

Register the model

In [20]:
import joblib
best_model_path = "best_model.pkl"
joblib.dump(best_model,best_model_path )

from azureml.core.model import Model

model = Model.register(model_path=best_model_path,
                       model_name="fraud_detection_model",
                       tags={'area': "fraud", 'type': "classification"},
                       description="AutoML to detect frauds",
                       workspace=ws)

Registering model fraud_detection_model


## Model Deployment

Create an inference config

In [21]:
%%writefile score.py
import pickle
import json
import pandas as pd
import  numpy as  np
from sklearn.externals import joblib
from azureml.core.model import Model

def init():
    global model
    
    model_path = Model.get_model_path('fraud_detection_model')
    model = joblib.load(model_path)

def run(data):
    try:
        data = pd.DataFrame(json.loads(data)['data'])
        result = model.predict(data)
        return result.tolist()
    except Exception as e:
        error = str(e)
        return error

Overwriting score.py


## Model Deployment

Deploy the model as a web service.

In [22]:
from azureml.core.webservice import AciWebservice
aciconfig = AciWebservice.deploy_configuration(cpu_cores=1, 
                                               memory_gb=4, 
                                               enable_app_insights=True,
                                               tags={"data": "fraud",  "method" : "automl"}, 
                                               description='Detect fraud model')

In [23]:
from azureml.core.webservice import Webservice
from azureml.core.model import InferenceConfig
from azureml.core.environment import Environment
from azureml.core import Workspace
from azureml.core.model import Model

ws = Workspace.from_config()
model = Model(
    ws, 
    'fraud_detection_model')


fraud_env = Environment.get(
    workspace=ws, 
    name="AzureML-AutoML")

for pip_package in ["scikit-learn"]:
    fraud_env.python.conda_dependencies.add_pip_package(pip_package)

inference_config = InferenceConfig(
    entry_script="score.py", 
    environment=fraud_env)

service = Model.deploy(workspace=ws, 
                       name='fraud-detection-svc', 
                       models=[model], 
                       inference_config=inference_config, 
                       deployment_config=aciconfig)

service.wait_for_deployment(show_output=True)

Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running................................................
Succeeded
ACI service creation operation finished, operation "Succeeded"


In [24]:
print(service.scoring_uri)

http://72c58822-db1e-49d5-833f-abdc00352324.southcentralus.azurecontainer.io/score


### Send a request to the web service you deployed to test it.

In [25]:
import requests
import json

# URL for the web service, should be similar to:
scoring_uri = service.scoring_uri

# use test_data to create and test endpoint
data = {"data":[]}

test_data = pd.read_csv('./data/test_data.csv')
samples_with_class_1 = test_data[test_data.Class == 1].head()
samples_with_class_0 = test_data[test_data.Class == 0].head()
test_sample = pd.concat([samples_with_class_1, samples_with_class_0]).reset_index()
test_sample = test_sample.drop('index',axis=1)
output_sample = test_sample.pop('Class')
for index in range(len(output_sample)):
    row = test_sample.iloc[index]
    data['data'].append(row.to_dict())




# Convert to JSON string
input_data = json.dumps(data)
with open("data.json", "w") as _f:
    _f.write(input_data)

# Set the content type
headers = {'Content-Type': 'application/json'}


# Make the request and display the response
resp = requests.post(scoring_uri, input_data, headers=headers)
print(resp.json())

[1, 1, 1, 1, 1, 0, 0, 0, 0, 0]


In [26]:
print(service.get_logs())

2021-02-06T04:34:29,984098400+00:00 - iot-server/run 
2021-02-06T04:34:29,991844200+00:00 - gunicorn/run 
2021-02-06T04:34:29,990223400+00:00 - rsyslog/run 
2021-02-06T04:34:30,042177100+00:00 - nginx/run 
/usr/sbin/nginx: /azureml-envs/azureml_7ade26eb614f97df8030bc480da59236/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_7ade26eb614f97df8030bc480da59236/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_7ade26eb614f97df8030bc480da59236/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_7ade26eb614f97df8030bc480da59236/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_7ade26eb614f97df8030bc480da59236/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
rsyslogd

### Delete the service

In [27]:

service.delete()

In [28]:

compute_target.delete()