# Hyperparameter Tuning using HyperDrive

In [2]:
import os
import sys
import json
import azureml
import logging
import requests
import pandas as pd
import numpy as np
from io import BytesIO
import joblib

from azureml.core.workspace import Workspace
from azureml.core.experiment import Experiment
from azureml.core import Environment

from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.train.estimator import Estimator
from azureml.core import ScriptRunConfig
from azureml.train.automl import AutoMLConfig
from azureml.core.dataset import Dataset

from azureml.pipeline.steps import AutoMLStep

from azureml.core.dataset import Dataset
from azureml.widgets import RunDetails
from azureml.train.sklearn import SKLearn
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.sampling import RandomParameterSampling, BayesianParameterSampling
from azureml.core.authentication import InteractiveLoginAuthentication
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.parameter_expressions import uniform, quniform, choice

from azureml.core.runconfig import RunConfiguration
from azureml.core.runconfig import EnvironmentDefinition
from azureml.core.runconfig import CondaDependencies
from azureml.core.model import InferenceConfig, Model

from azureml.core.webservice import AciWebservice, Webservice
from azureml.core.model import Model, InferenceConfig

from azureml.train.automl import constants

from train import *

import warnings
warnings.filterwarnings("ignore")

pd.set_option('display.max_rows', None)

# Check system and core SDK version number
print("System version: {}".format(sys.version))
print("SDK version:", azureml.core.VERSION)

System version: 3.6.13 |Anaconda, Inc.| (default, Feb 23 2021, 12:58:59) 
[GCC Clang 10.0.0 ]
SDK version: 1.23.0


In [4]:
interactive_auth = InteractiveLoginAuthentication(tenant_id="660b3398-b80e-49d2-bc5b-ac1dc93b5254")
ws = Workspace(subscription_id="81cefad3-d2c9-4f77-a466-99a7f541c7bb",
                   resource_group="aml-quickstarts-142415",
                   workspace_name="quick-starts-ws-142415",
                   auth=interactive_auth)

experiment_name = 'online_news_project'
experiment=Experiment(ws, experiment_name)
experiment

# ws = Workspace.from_config()
# print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

Name,Workspace,Report Page,Docs Page
online_news_project,quick-starts-ws-142415,Link to Azure Machine Learning studio,Link to Documentation


In [5]:
dic_data = {'Workspace name': ws.name,
            'Azure region': ws.location,
            'Subscription id': ws.subscription_id,
            'Resource group': ws.resource_group,
            'Experiment Name': experiment.name}

az_data = pd.DataFrame.from_dict(data = dic_data, orient='index')
az_data.rename(columns={0:''}, inplace = True)
az_data

Unnamed: 0,Unnamed: 1
Workspace name,quick-starts-ws-142415
Azure region,southcentralus
Subscription id,81cefad3-d2c9-4f77-a466-99a7f541c7bb
Resource group,aml-quickstarts-142415
Experiment Name,online_news_project


## Create or Attach an AmlCompute cluster

In [6]:
# Define CPU cluster name
compute_target_name = "cpu-cluster"

try:
    compute_target = ComputeTarget(workspace=ws, name=compute_target_name)
    print("Found existing cpu-cluster. Use it.")
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size="STANDARD_DS12_V2",
                                                           min_nodes=1, 
                                                           max_nodes=4) 
    compute_target = ComputeTarget.create(ws, compute_target_name, compute_config)

compute_target.wait_for_completion(show_output=True)
print(compute_target.get_status().serialize())

Found existing cpu-cluster. Use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned
{'currentNodeCount': 4, 'targetNodeCount': 4, 'nodeStateCounts': {'preparingNodeCount': 0, 'runningNodeCount': 4, 'idleNodeCount': 0, 'unusableNodeCount': 0, 'leavingNodeCount': 0, 'preemptedNodeCount': 0}, 'allocationState': 'Steady', 'allocationStateTransitionTime': '2021-04-11T12:45:30.504000+00:00', 'errors': None, 'creationTime': '2021-04-11T12:18:03.615477+00:00', 'modifiedTime': '2021-04-11T12:18:18.955759+00:00', 'provisioningState': 'Succeeded', 'provisioningStateTransitionTime': None, 'scaleSettings': {'minNodeCount': 1, 'maxNodeCount': 4, 'nodeIdleTimeBeforeScaleDown': 'PT120S'}, 'vmPriority': 'Dedicated', 'vmSize': 'STANDARD_DS12_V2'}


## Dataset

The dataset used in this project is a dataset made available on UCI Machine Learning Repository called [Online News Popularity Data Set](https://archive.ics.uci.edu/ml/datasets/Online+News+Popularity#).

The dataset summarizes heterogeneous set of features about the articles published by Mashable between 2013 and 2015.

- Number of Instances: 39797
- Number of Attributes: 61 
    - 58 predictive attributes 
    - 2 non-predictive (`url` and `timedelta`) 
    - 1 target column
    
We will also apply the `Boruta` step for feature selection before exporting the data to our defined Datastore

In [7]:
DATA_LOC = "https://raw.githubusercontent.com/franckess/AzureML_Capstone/main/data/OnlineNewsPopularity.csv"
BORUTA_LOC = "https://github.com/franckess/AzureML_Capstone/releases/download/1.1/boruta_model_final.pkl"

# Loading data
df = pd.read_csv(DATA_LOC)

# Removing space character in the feature names
df.columns=df.columns.str.replace(' ','')

# Drop URL column
df = df.drop(['url'], axis=1)

# Perform Data pre-processing
df = corr_drop_cols(df)
df = create_label(df)
df = scaling_num(df)
df = feature_selection(df, BORUTA_LOC)
    
# Split train data into train & test
X_train, X_test, y_train, y_test = split_train_test(df)

m, k = X_train.shape
print("{} x {} table of data:".format(m, k))
X_train.info()

31715 x 47 table of data:
<class 'pandas.core.frame.DataFrame'>
Int64Index: 31715 entries, 38512 to 35050
Data columns (total 47 columns):
n_tokens_title                   31715 non-null float64
n_tokens_content                 31715 non-null float64
n_unique_tokens                  31715 non-null float64
num_hrefs                        31715 non-null float64
num_self_hrefs                   31715 non-null float64
num_imgs                         31715 non-null float64
num_videos                       31715 non-null float64
average_token_length             31715 non-null float64
num_keywords                     31715 non-null float64
data_channel_is_entertainment    31715 non-null int64
data_channel_is_bus              31715 non-null int64
data_channel_is_socmed           31715 non-null int64
data_channel_is_tech             31715 non-null int64
data_channel_is_world            31715 non-null int64
kw_min_min                       31715 non-null float64
kw_max_min                     

## Upload data to Azure Datatore

In [8]:
datastore = ws.get_default_datastore()
datastore.upload_files(files = ['./data/OnlineNewsPopularity.csv'], target_path='data/', overwrite=True, show_progress=True)

Uploading an estimated of 1 files
Uploading ./data/OnlineNewsPopularity.csv
Uploaded ./data/OnlineNewsPopularity.csv, 1 files out of an estimated total of 1
Uploaded 1 files


$AZUREML_DATAREFERENCE_7f448b8017ee416c86533992b556680c

In [9]:
print(
    "Datastore type: " + datastore.datastore_type,
    "Account name: " + datastore.account_name,
    "Container name: " + datastore.container_name,
    sep="\n",
)

Datastore type: AzureBlob
Account name: mlstrg142415
Container name: azureml-blobstore-e3f99bb8-a492-4d55-add2-2ab0bb5281ce


In [10]:
# Get data reference object for the data path
ds_data = datastore.path('data/')
print(ds_data)

$AZUREML_DATAREFERENCE_ac3f7660eb3b4182afbbc224dc05bdcd


## HyperDrive Configuration

### Create an environment

Define a conda environment YAML file with your training script dependencies and create an Azure ML environment.

In [11]:
%%writefile hyperdrive_dependencies.yml

channels:
- conda-forge
- anaconda
dependencies:
- python=3.6.2
- scikit-learn
- pandas
- numpy
- pip:
  - azureml-defaults 
  - pingouin
  - lightgbm
  - joblib  
  - Boruta
  - inference-schema
  - azureml-interpret
  - azureml-train-automl-runtime

Overwriting hyperdrive_dependencies.yml


In [12]:
udacity_env = Environment.from_conda_specification(name = 'udacity-env', file_path = './hyperdrive_dependencies.yml')
udacity_env.register(ws)

# Specify an Ubuntu base image
udacity_env.docker.enabled = True
udacity_env.python.user_managed_dependencies = False
udacity_env.docker.base_image = 'mcr.microsoft.com/azureml/base:openmpi3.1.2-ubuntu18.04'

Build the image just to confirm it works appropriately or identify any errors prior to deployment

In [None]:
# build = udacity_env.build(workspace=ws)
# build.wait_for_completion(show_output=True)

In [None]:
# details = udacity_env.get_image_details(ws)
# print(details.dockerfile)

### Tune hyperparameters using `HyperDrive`

In the following block, I tune my set of hyperparameters for the `LightGBM` model. The ranges of parameters for the `LightGBM` used were chosen considering the parameters tuning guides for different scenarios provided here.

The code below does a parallel search of the hyperparameter space using a Bayesian sampling method which does not support termination policy. Therefore, `policy=None`.

> __Note that when using Bayesian sampling, the number of concurrent runs has an impact on the effectiveness of the tuning process. Typically, a smaller number of concurrent runs leads to better sampling convergence. That is because some runs start without fully benefiting from runs that are still running.__

In order to compare the performance of HyperDrive with the one of AutoML we chose as objective metric of `LightGBM` __Accuracy__ score.

In [13]:
# Create a SKLearn estimator for use with train.py
src = ScriptRunConfig(source_directory='.',
                      script='train.py',
                      compute_target=compute_target,
                      environment=udacity_env)

# Specify a Policy
#early_termination_policy = BanditPolicy(slack_factor = 0.1, evaluation_interval=2, delay_evaluation=5)

# Specify hyperparameter space
param_sampling = BayesianParameterSampling(
    {
        "--num-leaves": quniform(8, 128, 1),
        "--min-data-in-leaf": quniform(20, 500, 10),
        "--learning-rate": choice(
            1e-4, 1e-3, 5e-3, 1e-2, 1.5e-2, 2e-2, 3e-2, 5e-2, 1e-1
        ),
        "--feature-fraction": uniform(0.1, 1),
        "--bagging-fraction": uniform(0.1, 1),
        "--bagging-freq": quniform(1, 30, 1),
        "--max-depth": quniform(5, 50, 5)
    }
)

# Create a HyperDriveConfig using the estimator, hyperparameter sampler, and policy.
hyperdrive_config = HyperDriveConfig(run_config=src,
                                     hyperparameter_sampling=param_sampling,
                                     policy=None ,
                                     primary_metric_name="Accuracy",
                                     primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                     max_total_runs=50,
                                     max_concurrent_runs=10)

In [14]:
# Submit hyperdrive run to the experiment 
hyperdrive_run = experiment.submit(config=hyperdrive_config, show_output=True)

## Run Details

In [15]:
# Show run details with the Jupyter widget
RunDetails(hyperdrive_run).show()
hyperdrive_run.wait_for_completion(show_output=True)
hyperdrive_run.get_metrics()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…

RunId: HD_1ab2b145-4bb1-46b8-a702-2dd0b5aedf49
Web View: https://ml.azure.com/experiments/online_news_project/runs/HD_1ab2b145-4bb1-46b8-a702-2dd0b5aedf49?wsid=/subscriptions/81cefad3-d2c9-4f77-a466-99a7f541c7bb/resourcegroups/aml-quickstarts-142415/workspaces/quick-starts-ws-142415

Streaming azureml-logs/hyperdrive.txt

"<START>[2021-04-11T12:49:42.359948][API][INFO]Experiment created<END>\n""<START>[2021-04-11T12:49:43.230604][GENERATOR][INFO]Trying to sample '10' jobs from the hyperparameter space<END>\n""<START>[2021-04-11T12:49:43.591665][GENERATOR][INFO]Successfully sampled '10' jobs, they will soon be submitted to the execution target.<END>\n"

Execution Summary
RunId: HD_1ab2b145-4bb1-46b8-a702-2dd0b5aedf49
Web View: https://ml.azure.com/experiments/online_news_project/runs/HD_1ab2b145-4bb1-46b8-a702-2dd0b5aedf49?wsid=/subscriptions/81cefad3-d2c9-4f77-a466-99a7f541c7bb/resourcegroups/aml-quickstarts-142415/workspaces/quick-starts-ws-142415



{'HD_1ab2b145-4bb1-46b8-a702-2dd0b5aedf49_49': {'Accuracy': 0.6539286164711817},
 'HD_1ab2b145-4bb1-46b8-a702-2dd0b5aedf49_47': {'Accuracy': 0.6763778534493631},
 'HD_1ab2b145-4bb1-46b8-a702-2dd0b5aedf49_48': {'Accuracy': 0.6686845756085257},
 'HD_1ab2b145-4bb1-46b8-a702-2dd0b5aedf49_45': {'Accuracy': 0.533610795812839},
 'HD_1ab2b145-4bb1-46b8-a702-2dd0b5aedf49_46': {'Accuracy': 0.6652793542691386},
 'HD_1ab2b145-4bb1-46b8-a702-2dd0b5aedf49_44': {'Accuracy': 0.6743599445075041},
 'HD_1ab2b145-4bb1-46b8-a702-2dd0b5aedf49_43': {'Accuracy': 0.533610795812839},
 'HD_1ab2b145-4bb1-46b8-a702-2dd0b5aedf49_41': {'Accuracy': 0.533610795812839},
 'HD_1ab2b145-4bb1-46b8-a702-2dd0b5aedf49_42': {'Accuracy': 0.6767562113759616},
 'HD_1ab2b145-4bb1-46b8-a702-2dd0b5aedf49_39': {'Accuracy': 0.6783957623912221},
 'HD_1ab2b145-4bb1-46b8-a702-2dd0b5aedf49_40': {'Accuracy': 0.6818009837306092},
 'HD_1ab2b145-4bb1-46b8-a702-2dd0b5aedf49_37': {'Accuracy': 0.6733509900365746},
 'HD_1ab2b145-4bb1-46b8-a702-2d

## Retrieve and Save Best Model

Here I retrieve and save the best model as well as display all the properties of the model.

In [16]:
get_best_performance = hyperdrive_run.get_best_run_by_primary_metric() 
get_best_metrics = get_best_performance.get_metrics()
parameter_values = get_best_performance.get_details()["runDefinition"]["arguments"]

print('Best Run Id: ', get_best_performance.id)
print('Best Run Accuracy:', get_best_metrics['Accuracy'])
print('Best model hyperparameter values', parameter_values)

Best Run Id:  HD_1ab2b145-4bb1-46b8-a702-2dd0b5aedf49_40
Best Run Accuracy: 0.6818009837306092
Best model hyperparameter values ['--num-leaves', '61', '--min-data-in-leaf', '190', '--learning-rate', '0.05', '--feature-fraction', '0.825491549450686', '--bagging-fraction', '0.631814628503191', '--bagging-freq', '11', '--max-depth', '15']


In [17]:
get_best_performance.get_file_names()

['azureml-logs/55_azureml-execution-tvmps_950586d5edd00087bc07a26b003d754aefbdc3abb42c5d8a4fc09bca1159822e_d.txt',
 'azureml-logs/65_job_prep-tvmps_950586d5edd00087bc07a26b003d754aefbdc3abb42c5d8a4fc09bca1159822e_d.txt',
 'azureml-logs/70_driver_log.txt',
 'azureml-logs/75_job_post-tvmps_950586d5edd00087bc07a26b003d754aefbdc3abb42c5d8a4fc09bca1159822e_d.txt',
 'azureml-logs/process_info.json',
 'azureml-logs/process_status.json',
 'logs/azureml/97_azureml.log',
 'logs/azureml/job_prep_azureml.log',
 'logs/azureml/job_release_azureml.log',
 'outputs/lgb_model.pkl']

In [18]:
get_best_performance.download_file("outputs/lgb_model.pkl","output/hyperdrive_model.pkl")

In [19]:
get_best_performance

Experiment,Id,Type,Status,Details Page,Docs Page
online_news_project,HD_1ab2b145-4bb1-46b8-a702-2dd0b5aedf49_40,azureml.scriptrun,Completed,Link to Azure Machine Learning studio,Link to Documentation


## Model Deployment

### Register our best model

In [20]:
model = get_best_performance.register_model(model_name = 'best_model', 
                                            model_path = './outputs/lgb_model.pkl')
print("Model successfully registered.")

Model successfully registered.


In [30]:
print('Name:', model.name)
print('Version:', model.version)

Name: best_model
Version: 1


In [41]:
Model.get_model_path(model_name = 'best_model', _workspace=ws)

'azureml-models/best_model/1/lgb_model.pkl'

### Create a scoring script `score.py`

In [23]:
%%writefile score.py

import os
import pandas as pd
import json
import pickle
import logging 
import joblib

import pingouin as pg
import numpy as np
import requests
import pandas as pd
import azureml.core
import lightgbm as lgb
from io import BytesIO
from boruta import BorutaPy
from azureml.core.run import Run
from urllib.request import urlopen
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

def init():
    
    model_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'outputs')
    print("Model path ", model_path)
    #load models
    deploy_model = joblib.load(model_path + '/lgb_model.pkl')
    
def corr_drop_cols(df, corr_val = 0.85):
    df_copy = df.copy() # create a copy
    corrmat = pg.pairwise_corr(df_copy, method='pearson')[['X', 'Y', 'r']]
    df_corr = corrmat.sort_values(by='r', ascending=0)[(corrmat['r'] >= corr_val) | (corrmat['r'] <= -1*corr_val)]
    setcols = set(df_corr.Y.to_list())
    # Drop columns high correlation values
    df_copy = df_copy.drop(list(setcols), axis=1)

    return df_copy

def create_label(df):
    df_copy = df.copy() # create a copy
    df_copy['label'] = [1 if x >= 1400 else 0 for x in df_copy['shares']]
    df_copy = df_copy.drop(['shares', 'timedelta'], axis=1)
    y = df_copy['label'].values
    labelencoder = LabelEncoder()
    df_copy['label'] = labelencoder.fit_transform(y)
    col_list = [s for s in df_copy.columns if 'is' in s]
    df_copy[col_list] = df_copy[col_list].apply(lambda x: labelencoder.fit_transform(x))

    return df_copy

def scaling_num(df):
    df_copy = df.copy() # create a copy
    from sklearn.preprocessing import MinMaxScaler
    col_list = [s for s in df_copy.columns if 'is' in s] + ['label']
    num_cols = [m for m in df_copy if m not in col_list]
    scale = MinMaxScaler()
    df_copy[num_cols] = pd.DataFrame(scale.fit_transform(df_copy[num_cols].values), columns=[num_cols], index=df_copy.index)

    return df_copy

def feature_selection(df, OUT_LOC):
    df_copy = df.copy() # create a copy
    mfile = BytesIO(requests.get(OUT_LOC).content) # BytesIO create a file object out of the response from GitHub 
    feat_selector = joblib.load(mfile)
    X = df_copy.drop(['label'], axis=1)
    keep_cols = list(X.columns[feat_selector.support_]) + ['label']
    df_copy = df_copy[keep_cols]

    return df_copy

def split_train_test(df):
    df_copy = df.copy() # create a copy
    X = df_copy.drop('label', axis=1)
    y = df_copy.pop('label')
    # Train-test split 80/20
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, stratify = y, random_state = 100)

    return X_train, X_test, y_train, y_test

def run(data):
    # Boruta model location
    BORUTA_LOC = "https://github.com/franckess/AzureML_Capstone/releases/download/1.1/boruta_model_final.pkl"
    
    try:
        data.columns = data.columns.str.replace(' ','')
        data = data.drop(['url'], axis=1)
        data = corr_drop_cols(data)
        data = create_label(data)
        data = scaling_num(data)
        data = feature_selection(data, BORUTA_LOC)
        y = data.pop('label')
        X = data.drop(['label'], axis=1)
        
        result = deploy_model.predict(X)
        print("Result is ", result)
        return result.tolist()
    except Exception as e:
        error = str(e)
        prinrt("Error occured ", error)
        return error

Overwriting score.py


### Inference configuration

Create an inference config and deploy the model as a web service.

In [45]:
script_file_name = './score.py'
inference_config = InferenceConfig(entry_script=script_file_name)

aciconfig = AciWebservice.deploy_configuration(cpu_cores = 2, 
                                               memory_gb = 4, 
                                               tags = {'Company': "Mashable", 'type': "Hyperdrive"}, 
                                               description = 'sample service for Capstone Project Hyperdrive Classifier for Online News popularity')
aci_service_name = 'hyperdrive-inference'
print(aci_service_name)
aci_service = Model.deploy(ws, aci_service_name, [model], inference_config, aciconfig)
aci_service.wait_for_deployment(True)
print(aci_service.state)
print(aci_service.scoring_uri)
print(aci_service.swagger_uri)

hyperdrive-inference
Tips: You can try get_logs(): https://aka.ms/debugimage#dockerlog or local deployment: https://aka.ms/debugimage#debug-locally to debug if deployment takes longer than 10 minutes.
Running..........................
Failed


Service deployment polling reached non-successful terminal state, current service state: Failed
Operation ID: d868bfd7-43fe-4da9-aec9-9502f51f87ba
More information can be found using '.get_logs()'
Error:
{
  "code": "AciDeploymentFailed",
  "statusCode": 400,
  "message": "Aci Deployment failed with exception: Error in entry script, ImportError: cannot import name 'cached_property', please run print(service.get_logs()) to get details.",
  "details": [
    {
      "code": "CrashLoopBackOff",
      "message": "Error in entry script, ImportError: cannot import name 'cached_property', please run print(service.get_logs()) to get details."
    }
  ]
}



WebserviceException: WebserviceException:
	Message: Service deployment polling reached non-successful terminal state, current service state: Failed
Operation ID: d868bfd7-43fe-4da9-aec9-9502f51f87ba
More information can be found using '.get_logs()'
Error:
{
  "code": "AciDeploymentFailed",
  "statusCode": 400,
  "message": "Aci Deployment failed with exception: Error in entry script, ImportError: cannot import name 'cached_property', please run print(service.get_logs()) to get details.",
  "details": [
    {
      "code": "CrashLoopBackOff",
      "message": "Error in entry script, ImportError: cannot import name 'cached_property', please run print(service.get_logs()) to get details."
    }
  ]
}
	InnerException None
	ErrorResponse 
{
    "error": {
        "message": "Service deployment polling reached non-successful terminal state, current service state: Failed\nOperation ID: d868bfd7-43fe-4da9-aec9-9502f51f87ba\nMore information can be found using '.get_logs()'\nError:\n{\n  \"code\": \"AciDeploymentFailed\",\n  \"statusCode\": 400,\n  \"message\": \"Aci Deployment failed with exception: Error in entry script, ImportError: cannot import name 'cached_property', please run print(service.get_logs()) to get details.\",\n  \"details\": [\n    {\n      \"code\": \"CrashLoopBackOff\",\n      \"message\": \"Error in entry script, ImportError: cannot import name 'cached_property', please run print(service.get_logs()) to get details.\"\n    }\n  ]\n}"
    }
}

In [46]:
aci_service.get_logs()

'2021-04-11T13:59:10,588265147+00:00 - iot-server/run \n2021-04-11T13:59:10,589661455+00:00 - rsyslog/run \n2021-04-11T13:59:10,590864662+00:00 - gunicorn/run \n/bin/bash: /azureml-envs/azureml_b762c9700eeb682ffc0f3e629098f125/lib/libtinfo.so.5: no version information available (required by /bin/bash)\n/bin/bash: /azureml-envs/azureml_b762c9700eeb682ffc0f3e629098f125/lib/libtinfo.so.5: no version information available (required by /bin/bash)\n/bin/bash: /azureml-envs/azureml_b762c9700eeb682ffc0f3e629098f125/lib/libtinfo.so.5: no version information available (required by /bin/bash)\n/bin/bash: /azureml-envs/azureml_b762c9700eeb682ffc0f3e629098f125/lib/libtinfo.so.5: no version information available (required by /bin/bash)\n2021-04-11T13:59:10,595655090+00:00 - nginx/run \nbash: /azureml-envs/azureml_b762c9700eeb682ffc0f3e629098f125/lib/libtinfo.so.5: no version information available (required by bash)\nEdgeHubConnectionString and IOTEDGE_IOTHUBHOSTNAME are not set. Exiting...\n/bin/bas

In [None]:
test_data = pd.concat([X_test, y_test], axis=1)
test_data = test_data[10:15]
display(test_data)