# Hyperparameter Tuning using HyperDrive

In [2]:
import os
import sys
import json
import azureml
import logging
import requests
import pandas as pd
import numpy as np
from io import BytesIO
import joblib

from azureml.core.workspace import Workspace
from azureml.core.experiment import Experiment
from azureml.core import Environment

from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.train.estimator import Estimator
from azureml.core import ScriptRunConfig
from azureml.train.automl import AutoMLConfig
from azureml.core.dataset import Dataset

from azureml.pipeline.steps import AutoMLStep

from azureml.core.dataset import Dataset
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling, BayesianParameterSampling
from azureml.core.authentication import InteractiveLoginAuthentication
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, quniform, choice

from azureml.core.runconfig import RunConfiguration
from azureml.core.runconfig import EnvironmentDefinition
from azureml.core.runconfig import CondaDependencies
from azureml.core.model import InferenceConfig, Model

from azureml.core.webservice import AciWebservice, Webservice
from azureml.core.model import Model, InferenceConfig

from azureml.train.automl import constants

from train import *

import warnings
warnings.filterwarnings("ignore")

pd.set_option('display.max_rows', None)

# Check system and core SDK version number
print("System version: {}".format(sys.version))
print("SDK version:", azureml.core.VERSION)

System version: 3.6.13 |Anaconda, Inc.| (default, Feb 23 2021, 12:58:59) 
[GCC Clang 10.0.0 ]
SDK version: 1.23.0


In [2]:
!az login

[33mTo sign in, use a web browser to open the page https://microsoft.com/devicelogin and enter the code RT6WDC3HC to authenticate.[0m
[
  {
    "cloudName": "AzureCloud",
    "homeTenantId": "660b3398-b80e-49d2-bc5b-ac1dc93b5254",
    "id": "2c48c51c-bd47-40d4-abbe-fb8eabd19c8c",
    "isDefault": true,
    "managedByTenants": [],
    "name": "Udacity CloudLabs Sub - 03",
    "state": "Enabled",
    "tenantId": "660b3398-b80e-49d2-bc5b-ac1dc93b5254",
    "user": {
      "name": "odl_user_142985@udacitylabs.onmicrosoft.com",
      "type": "user"
    }
  }
]
[0m

In [3]:
interactive_auth = InteractiveLoginAuthentication(tenant_id="660b3398-b80e-49d2-bc5b-ac1dc93b5254")
ws = Workspace.get(subscription_id="2c48c51c-bd47-40d4-abbe-fb8eabd19c8c",
                   resource_group="aml-quickstarts-142985",
                   name="quick-starts-ws-142985",
                   auth=interactive_auth)

experiment_name = 'online_news_project'
experiment=Experiment(ws, experiment_name)
experiment
# ws = Workspace.from_config()
# print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

Name,Workspace,Report Page,Docs Page
online_news_project,quick-starts-ws-142985,Link to Azure Machine Learning studio,Link to Documentation


In [4]:
dic_data = {'Workspace name': ws.name,
            'Azure region': ws.location,
            'Subscription id': ws.subscription_id,
            'Resource group': ws.resource_group,
            'Experiment Name': experiment.name}

az_data = pd.DataFrame.from_dict(data = dic_data, orient='index')
az_data.rename(columns={0:''}, inplace = True)
az_data

Unnamed: 0,Unnamed: 1
Workspace name,quick-starts-ws-142985
Azure region,southcentralus
Subscription id,2c48c51c-bd47-40d4-abbe-fb8eabd19c8c
Resource group,aml-quickstarts-142985
Experiment Name,online_news_project


## Create or Attach an AmlCompute cluster

In [5]:
# Define CPU cluster name
compute_target_name = "cpu-cluster"

try:
    compute_target = ComputeTarget(workspace=ws, name=compute_target_name)
    print("Found existing cpu-cluster. Use it.")
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size="STANDARD_DS12_V2",
                                                           min_nodes=1, 
                                                           max_nodes=4) 
    compute_target = ComputeTarget.create(ws, compute_target_name, compute_config)

compute_target.wait_for_completion(show_output=True)
print(compute_target.get_status().serialize())

Found existing cpu-cluster. Use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned
{'currentNodeCount': 1, 'targetNodeCount': 1, 'nodeStateCounts': {'preparingNodeCount': 0, 'runningNodeCount': 0, 'idleNodeCount': 1, 'unusableNodeCount': 0, 'leavingNodeCount': 0, 'preemptedNodeCount': 0}, 'allocationState': 'Steady', 'allocationStateTransitionTime': '2021-04-18T02:36:11.900000+00:00', 'errors': None, 'creationTime': '2021-04-18T00:58:49.540122+00:00', 'modifiedTime': '2021-04-18T00:59:05.307569+00:00', 'provisioningState': 'Succeeded', 'provisioningStateTransitionTime': None, 'scaleSettings': {'minNodeCount': 1, 'maxNodeCount': 4, 'nodeIdleTimeBeforeScaleDown': 'PT120S'}, 'vmPriority': 'Dedicated', 'vmSize': 'STANDARD_DS12_V2'}


## Dataset

The dataset used in this project is a dataset made available on UCI Machine Learning Repository called [Online News Popularity Data Set](https://archive.ics.uci.edu/ml/datasets/Online+News+Popularity#).

The dataset summarizes heterogeneous set of features about the articles published by Mashable between 2013 and 2015.

- Number of Instances: 39797
- Number of Attributes: 61 
    - 58 predictive attributes 
    - 2 non-predictive (`url` and `timedelta`) 
    - 1 target column
    
We will also apply the `Boruta` step for feature selection before exporting the data to our defined Datastore

In [6]:
DATA_LOC = "https://raw.githubusercontent.com/franckess/AzureML_Capstone/main/data/OnlineNewsPopularity.csv"
BORUTA_LOC = "https://github.com/franckess/AzureML_Capstone/releases/download/1.1/boruta_model_final.pkl"

# Loading data
df = pd.read_csv(DATA_LOC)

# Removing space character in the feature names
df.columns=df.columns.str.replace(' ','')

# Drop URL column
df = df.drop(['url'], axis=1)

# Perform Data pre-processing
df = corr_drop_cols(df)
df = create_label(df)
df = scaling_num(df)
df = feature_selection(df, BORUTA_LOC)
    
# Split train data into train & test
X_train, X_test, y_train, y_test = split_train_test(df)

m, k = X_train.shape
print("{} x {} table of data:".format(m, k))
X_train.info()

31715 x 47 table of data:
<class 'pandas.core.frame.DataFrame'>
Int64Index: 31715 entries, 38512 to 35050
Data columns (total 47 columns):
n_tokens_title                   31715 non-null float64
n_tokens_content                 31715 non-null float64
n_unique_tokens                  31715 non-null float64
num_hrefs                        31715 non-null float64
num_self_hrefs                   31715 non-null float64
num_imgs                         31715 non-null float64
num_videos                       31715 non-null float64
average_token_length             31715 non-null float64
num_keywords                     31715 non-null float64
data_channel_is_entertainment    31715 non-null int64
data_channel_is_bus              31715 non-null int64
data_channel_is_socmed           31715 non-null int64
data_channel_is_tech             31715 non-null int64
data_channel_is_world            31715 non-null int64
kw_min_min                       31715 non-null float64
kw_max_min                     

## Upload data to Azure Datatore

In [7]:
datastore = ws.get_default_datastore()
datastore.upload_files(files = ['./data/OnlineNewsPopularity.csv'], 
                       target_path='data/', 
                       overwrite=True, 
                       show_progress=True)

Uploading an estimated of 1 files
Uploading ./data/OnlineNewsPopularity.csv
Uploaded ./data/OnlineNewsPopularity.csv, 1 files out of an estimated total of 1
Uploaded 1 files


$AZUREML_DATAREFERENCE_6019f73113234bf6afe18b0d215cae14

In [8]:
print(
    "Datastore type: " + datastore.datastore_type,
    "Account name: " + datastore.account_name,
    "Container name: " + datastore.container_name,
    sep="\n",
)

Datastore type: AzureBlob
Account name: mlstrg142985
Container name: azureml-blobstore-4d8774c1-27e9-47c1-b92b-547a17c80a82


In [9]:
# Get data reference object for the data path
ds_data = datastore.path('data/')
print(ds_data)

$AZUREML_DATAREFERENCE_07dc60bf93cd42b5913bc6b44e871567


## HyperDrive Configuration

### Create an environment

Define a conda environment YAML file with your training script dependencies and create an Azure ML environment.

In [10]:
%%writefile hyperdrive_dependencies.yml

name: project_environment
dependencies:
- python=3.6.2
- pip:
  - azureml-train-automl-runtime==1.24.0
  - Werkzeug==0.16.1
  - inference-schema
  - azureml-interpret==1.24.0
  - azureml-defaults==1.24.0
  - pingouin
  - lightgbm
  - joblib  
  - Boruta
- numpy>=1.16.0,<1.19.0
- pandas==0.25.1
- scikit-learn==0.22.1
- py-xgboost<=0.90
- fbprophet==0.5
- holidays==0.9.11
- psutil>=5.2.2,<6.0.0
channels:
- anaconda
- conda-forge

Overwriting hyperdrive_dependencies.yml


In [11]:
udacity_env = Environment.from_conda_specification(name = 'udacity-env', file_path = './hyperdrive_dependencies.yml')
udacity_env.register(ws)

# Specify an Ubuntu base image
udacity_env.docker.enabled = True
udacity_env.python.user_managed_dependencies = False
udacity_env.docker.base_image = 'mcr.microsoft.com/azureml/base:openmpi3.1.2-ubuntu18.04'

'enabled' is deprecated. Please use the azureml.core.runconfig.DockerConfiguration object with the 'use_docker' param instead.


Build the image just to confirm it works appropriately or identify any errors prior to deployment

In [12]:
# build = udacity_env.build(workspace=ws)
# build.wait_for_completion(show_output=True)

In [13]:
details = udacity_env.get_image_details(ws)
print(details.dockerfile)

FROM mcr.microsoft.com/azureml/intelmpi2018.3-ubuntu16.04:20210301.v1@sha256:000d6c43f606ceaa67983790ca95c70fd741c364d8c2e3217a11d775b99741df
USER root
RUN mkdir -p $HOME/.cache
WORKDIR /
COPY azureml-environment-setup/99brokenproxy /etc/apt/apt.conf.d/
RUN if dpkg --compare-versions `conda --version | grep -oE '[^ ]+$'` lt 4.4.11; then conda install conda==4.4.11; fi
COPY azureml-environment-setup/mutated_conda_dependencies.yml azureml-environment-setup/mutated_conda_dependencies.yml
RUN ldconfig /usr/local/cuda/lib64/stubs && conda env create -p /azureml-envs/azureml_d32c6a42b04d5ead1baaef3bc63324e7 -f azureml-environment-setup/mutated_conda_dependencies.yml && rm -rf "$HOME/.cache/pip" && conda clean -aqy && CONDA_ROOT_DIR=$(conda info --root) && rm -rf "$CONDA_ROOT_DIR/pkgs" && find "$CONDA_ROOT_DIR" -type d -name __pycache__ -exec rm -rf {} + && ldconfig
# AzureML Conda environment name: azureml_d32c6a42b04d5ead1baaef3bc63324e7
ENV PATH /azureml-envs/azureml_d32c6a42b04d5ead1baaef

### Tune hyperparameters using `HyperDrive`

In the following block, I tune my set of hyperparameters for the `LightGBM` model. The ranges of parameters for the `LightGBM` used were chosen considering the parameters tuning guides for different scenarios provided here.

The code below does a parallel search of the hyperparameter space using a Bayesian sampling method which does not support termination policy. Therefore, `policy=None`.

> __Note that when using Bayesian sampling, the number of concurrent runs has an impact on the effectiveness of the tuning process. Typically, a smaller number of concurrent runs leads to better sampling convergence. That is because some runs start without fully benefiting from runs that are still running.__

In order to compare the performance of HyperDrive with the one of AutoML we chose as objective metric of `LightGBM` __Accuracy__ score.

In [14]:
# Create a SKLearn estimator for use with train.py
src = ScriptRunConfig(source_directory='.',
                      script='train.py',
                      compute_target=compute_target,
                      environment=udacity_env)

# Specify hyperparameter space
param_sampling = BayesianParameterSampling(
    {
        "--num-leaves": quniform(8, 128, 1),
        "--min-data-in-leaf": quniform(20, 500, 10),
        "--learning-rate": choice(
            1e-4, 1e-3, 5e-3, 1e-2, 1.5e-2, 2e-2, 3e-2, 5e-2, 1e-1
        ),
        "--feature-fraction": uniform(0.1, 1),
        "--bagging-fraction": uniform(0.1, 1),
        "--bagging-freq": quniform(1, 30, 1),
        "--max-depth": quniform(5, 50, 5)
    }
)

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(run_config=src,
                                     hyperparameter_sampling=param_sampling,
                                     policy=None ,
                                     primary_metric_name="Accuracy",
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                     max_total_runs=50,
                                     max_concurrent_runs=10)

In [15]:
# Submit hyperdrive run to the experiment 
hyperdrive_run = experiment.submit(config=hyperdrive_config, show_output=True)

## Run Details

In [16]:
# Show run details with the Jupyter widget
RunDetails(hyperdrive_run).show()
hyperdrive_run.wait_for_completion(show_output=True)
hyperdrive_run.get_metrics()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

RunId: HD_20c7b5bb-c186-4099-bf31-851bd327326d
Web View: https://ml.azure.com/runs/HD_20c7b5bb-c186-4099-bf31-851bd327326d?wsid=/subscriptions/2c48c51c-bd47-40d4-abbe-fb8eabd19c8c/resourcegroups/aml-quickstarts-142985/workspaces/quick-starts-ws-142985&tid=660b3398-b80e-49d2-bc5b-ac1dc93b5254

Streaming azureml-logs/hyperdrive.txt

"<START>[2021-04-18T03:27:15.802624][GENERATOR][INFO]Trying to sample '10' jobs from the hyperparameter space<END>\n"<START>[2021-04-18T03:27:16.0970621Z][SCHEDULER][INFO]The execution environment is being prepared. Please be patient as it can take a few minutes.<END>"<START>[2021-04-18T03:27:15.303630][API][INFO]Experiment created<END>\n""<START>[2021-04-18T03:27:16.193799][GENERATOR][INFO]Successfully sampled '10' jobs, they will soon be submitted to the execution target.<END>\n"

Execution Summary
RunId: HD_20c7b5bb-c186-4099-bf31-851bd327326d
Web View: https://ml.azure.com/runs/HD_20c7b5bb-c186-4099-bf31-851bd327326d?wsid=/subscriptions/2c48c51c-bd47-40d4

{'HD_20c7b5bb-c186-4099-bf31-851bd327326d_49': {'Accuracy': 0.6688106949173919},
 'HD_20c7b5bb-c186-4099-bf31-851bd327326d_48': {'Accuracy': 0.6664144280489342},
 'HD_20c7b5bb-c186-4099-bf31-851bd327326d_47': {'Accuracy': 0.6804136713330811},
 'HD_20c7b5bb-c186-4099-bf31-851bd327326d_46': {'Accuracy': 0.6773868079202926},
 'HD_20c7b5bb-c186-4099-bf31-851bd327326d_44': {'Accuracy': 0.6753688989784336},
 'HD_20c7b5bb-c186-4099-bf31-851bd327326d_45': {'Accuracy': 0.6801614327153487},
 'HD_20c7b5bb-c186-4099-bf31-851bd327326d_42': {'Accuracy': 0.6666666666666666},
 'HD_20c7b5bb-c186-4099-bf31-851bd327326d_41': {'Accuracy': 0.533610795812839},
 'HD_20c7b5bb-c186-4099-bf31-851bd327326d_40': {'Accuracy': 0.677639046538025},
 'HD_20c7b5bb-c186-4099-bf31-851bd327326d_43': {'Accuracy': 0.6763778534493631},
 'HD_20c7b5bb-c186-4099-bf31-851bd327326d_38': {'Accuracy': 0.6705763652415184},
 'HD_20c7b5bb-c186-4099-bf31-851bd327326d_39': {'Accuracy': 0.6787741203178207},
 'HD_20c7b5bb-c186-4099-bf31-8

## Retrieve and Save Best Model

Here I retrieve and save the best model as well as display all the properties of the model.

In [17]:
best_run = hyperdrive_run.get_best_run_by_primary_metric() 
get_best_metrics = best_run.get_metrics()
parameter_values = best_run.get_details()["runDefinition"]["arguments"]

print('Best Run ID: ', best_run.id, sep='\n')
print('\n Metrics: ', get_best_metrics)
print('\n Best Run Accuracy:', get_best_metrics['Accuracy'])
print('\n Best model hyperparameter values', parameter_values, sep='\n')

Best Run ID: 
HD_20c7b5bb-c186-4099-bf31-851bd327326d_2

 Metrics:  {'Accuracy': 0.6805397906419473}

 Best Run Accuracy: 0.6805397906419473

 Best model hyperparameter values
['--num-leaves', '114', '--min-data-in-leaf', '240', '--learning-rate', '0.05', '--feature-fraction', '0.8767019272422398', '--bagging-fraction', '0.614723534867458', '--bagging-freq', '27', '--max-depth', '25']


In [18]:
best_run.get_file_names()

['azureml-logs/55_azureml-execution-tvmps_d03d575ab23e9efcd21580c90e443f3e7b312f501df41340f95439363e4bb5c4_d.txt',
 'azureml-logs/65_job_prep-tvmps_d03d575ab23e9efcd21580c90e443f3e7b312f501df41340f95439363e4bb5c4_d.txt',
 'azureml-logs/70_driver_log.txt',
 'azureml-logs/75_job_post-tvmps_d03d575ab23e9efcd21580c90e443f3e7b312f501df41340f95439363e4bb5c4_d.txt',
 'azureml-logs/process_info.json',
 'azureml-logs/process_status.json',
 'logs/azureml/94_azureml.log',
 'logs/azureml/job_prep_azureml.log',
 'logs/azureml/job_release_azureml.log',
 'outputs/lgb_model.pkl']

In [19]:
best_run.download_file("outputs/lgb_model.pkl","output/hyperdrive_model.pkl")

## Model Deployment

### Register our best model

In [20]:
model = best_run.register_model(model_name = 'best_model', model_path = './outputs/lgb_model.pkl')
print("Model successfully registered.")

Model successfully registered.


In [21]:
print('Name:', model.name)
print('Version:', model.version)

Name: best_model
Version: 2


In [22]:
Model.get_model_path(model_name = 'best_model', _workspace=ws)

'azureml-models/best_model/2/lgb_model.pkl'

### Prepare the `score.py` script

In [23]:
with open('score.py') as f:
    print(f.read())

# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
import json
import logging
import os
import pickle
import werkzeug
import numpy as np
import pandas as pd
import joblib

import azureml.automl.core
from azureml.automl.core.shared import logging_utilities, log_server
from azureml.telemetry import INSTRUMENTATION_KEY

from inference_schema.schema_decorators import input_schema, output_schema
from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType
from inference_schema.parameter_types.pandas_parameter_type import PandasParameterType

input_sample = pd.DataFrame({"n_tokens_title": pd.Series([0.0], dtype="float64"), "n_tokens_content": pd.Series([0.0], dtype="float64"), "n_unique_tokens": pd.Series([0.0], dtype="float64"), "num_hrefs": pd.Series([0.0], dtype="float64"), "num_self_hrefs": pd.Series([0.0], dtype="float64"), "num_imgs": 

### Inference configuration

Create an inference config and deploy the model as a web service.

In [25]:
script_file_name = './score.py'
inference_config = InferenceConfig(entry_script=script_file_name)

aciconfig = AciWebservice.deploy_configuration(cpu_cores = 2, 
                                               memory_gb = 4, 
                                               tags = {'Company': "Mashable", 'Type': "Hyperdrive", "Version":"1"}, 
                                               description = 'sample service for Capstone Project Hyperdrive Classifier for Online News popularity')
aci_service_name = 'hyperdrive-deployment'
print(aci_service_name)
aci_service = Model.deploy(ws, aci_service_name, [model], inference_config, aciconfig)
aci_service.wait_for_deployment(True)
print(f'\nservice state: {aci_service.state}\n')
print(f'scoring URI: \n{aci_service.scoring_uri}\n')
print(f'swagger URI: \n{aci_service.swagger_uri}\n')

hyperdrive-deployment
Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running
2021-04-18 03:53:51+00:00 Creating Container Registry if not exists.
2021-04-18 03:53:52+00:00 Use the existing image.
2021-04-18 03:53:52+00:00 Generating deployment configuration.
2021-04-18 03:53:53+00:00 Submitting deployment to compute.
2021-04-18 03:53:56+00:00 Checking the status of deployment hyperdrive-deployment..
2021-04-18 03:57:54+00:00 Checking the status of inference endpoint hyperdrive-deployment.
Succeeded
ACI service creation operation finished, operation "Succeeded"

service state: Healthy

scoring URI: 
http://d8e0eae9-7db7-408a-9c42-f6f653589413.southcentralus.azurecontainer.io/score

swagger URI: 
http://d8e0eae9-7db7-408a-9c42-f6f653589413.southcentralus.azurecontainer.io/swagger.json



### Testing our web service

Let's send a request to the web service we deployed to test it.

In [26]:
test_data = pd.concat([X_test, y_test], axis=1)
test_data = test_data[10:15]
display(test_data)

Unnamed: 0,n_tokens_title,n_tokens_content,n_unique_tokens,num_hrefs,num_self_hrefs,num_imgs,num_videos,average_token_length,num_keywords,data_channel_is_entertainment,...,avg_positive_polarity,min_positive_polarity,max_positive_polarity,avg_negative_polarity,min_negative_polarity,max_negative_polarity,title_subjectivity,title_sentiment_polarity,abs_title_sentiment_polarity,label
29000,0.52381,0.133231,0.000633,0.042763,0.008621,0.007812,0.010989,0.638734,0.333333,0,...,0.401117,0.05,0.8,0.653536,0.0,0.928571,0.833333,0.75,0.5,0
4914,0.190476,0.069625,0.000745,0.046053,0.034483,0.085938,0.0,0.598376,0.333333,0,...,0.374029,0.033333,0.7,0.777083,0.6,0.875,1.0,0.75,0.5,1
19445,0.428571,0.048265,0.000777,0.019737,0.043103,0.007812,0.0,0.573733,0.444444,0,...,0.457713,0.136364,0.8,0.679082,0.1,0.95,0.0,0.5,0.0,1
32949,0.238095,0.096767,0.000791,0.039474,0.034483,0.015625,0.010989,0.548221,0.333333,0,...,0.397562,0.1,1.0,0.722049,0.2,0.95,0.0,0.5,0.0,0
22685,0.285714,0.041067,0.000952,0.023026,0.0,0.007812,0.0,0.580678,0.444444,0,...,0.348333,0.033333,0.8,0.803125,0.6875,0.9,0.454545,0.568182,0.136364,0


In [27]:
# remove label column
label = test_data.pop('label')

# convert test input data to dictionary form
input_data = json.dumps({'data': test_data.to_dict(orient='records')})

# print test input data
print(input_data)

{"data": [{"n_tokens_title": 0.5238095238095238, "n_tokens_content": 0.13323105971206042, "n_unique_tokens": 0.0006325920646276748, "num_hrefs": 0.042763157894736836, "num_self_hrefs": 0.008620689655172414, "num_imgs": 0.0078125, "num_videos": 0.01098901098901099, "average_token_length": 0.6387343741908725, "num_keywords": 0.3333333333333333, "data_channel_is_entertainment": 0, "data_channel_is_bus": 0, "data_channel_is_socmed": 0, "data_channel_is_tech": 0, "data_channel_is_world": 1, "kw_min_min": 0.0, "kw_max_min": 0.002047587131367292, "kw_min_max": 0.012332503260998459, "kw_avg_max": 0.47818095576900277, "kw_min_avg": 0.5908604678089657, "kw_max_avg": 0.01182515012030831, "kw_avg_avg": 0.05734695866517057, "self_reference_min_shares": 0.001152614727854856, "self_reference_max_shares": 0.001152614727854856, "weekday_is_wednesday": 1, "weekday_is_saturday": 0, "weekday_is_sunday": 0, "is_weekend": 0, "LDA_00": 0.05393875832876898, "LDA_01": 0.3236042844264332, "LDA_02": 0.5982119195

In [28]:
# Set the content type
headers = {'Content-Type': 'application/json'}

# Make the request and display the response
resp = requests.post(aci_service.scoring_uri, input_data, headers=headers)

print("Response Code : ", resp.status_code)
print("Predicted Value : ",resp.json())

Response Code :  200
Predicted Value :  {"result": [0, 1, 1, 0, 0]}


In [29]:
print(aci_service.get_logs())

2021-04-18T03:57:46,297691400+00:00 - rsyslog/run 
2021-04-18T03:57:46,305729100+00:00 - iot-server/run 
2021-04-18T03:57:46,309454800+00:00 - gunicorn/run 
2021-04-18T03:57:46,320630600+00:00 - nginx/run 
rsyslogd: /azureml-envs/azureml_d32c6a42b04d5ead1baaef3bc63324e7/lib/libuuid.so.1: no version information available (required by rsyslogd)
EdgeHubConnectionString and IOTEDGE_IOTHUBHOSTNAME are not set. Exiting...
2021-04-18T03:57:46,494202200+00:00 - iot-server/finish 1 0
2021-04-18T03:57:46,496798400+00:00 - Exit code 1 is normal. Not restarting iot-server.
Starting gunicorn 19.9.0
Listening at: http://127.0.0.1:31311 (77)
Using worker: sync
worker timeout is set to 300
Booting worker with pid: 105
Initialized PySpark session.
Generating new fontManager, this may take some time...
Initializing logger
Starting up app insights client
Starting up request id generator
Starting up app insight hooks
Invoking user's init function
Users's init has completed successfully
Skipping middleware