In [130]:
import azureml.core


# Check core SDK version number.
print('SDK version:', azureml.core.VERSION)

SDK version: 1.7.0


In [131]:
from azureml.core import Workspace

ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\n')

hakk-ai
hakk.ai
eastus
b738af92-69d2-4390-a1d8-579dc094b72e


In [132]:
from azureml.core.model import Model

model = Model.register(model_path="Model/xgb_really_final.pkl",
                       model_name="xgb_final_model",
                       workspace=ws)

print('Name:', model.name)
print('Version:', model.version)

Registering model xgb_final_model
Name: xgb_final_model
Version: 2


In [133]:
from azureml.core.conda_dependencies import CondaDependencies 


# Usually a good idea to choose specific version numbers
# so training is made on same packages as scoring
myenv = CondaDependencies.create(
    conda_packages=[
        'numpy==1.15.4',
        'scikit-learn==0.19.1', 
        'pandas==0.23.4',        
        'joblib',
        'xgboost'
    ],
    pip_packages=[
        'azureml-defaults>=1.0.45',
        'inference-schema[numpy-support]',
        'inference-schema[pandas-support]'
    ], 
    python_version='3.6.2')

with open("myenv.yml","w") as f:
    f.write(myenv.serialize_to_string())

In [134]:
%%writefile score.py
import json
import numpy as np
import joblib
from azureml.core.model import Model
import xgboost as xgb
import pandas as pd

from inference_schema.schema_decorators import input_schema, output_schema
from inference_schema.parameter_types.numpy_parameter_type import NumpyParameterType
from inference_schema.parameter_types.pandas_parameter_type import PandasParameterType

def haversine_km(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points
    on the earth (specified in decimal degrees)

    All args must be of equal length.    

    """
    lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = np.sin(dlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2.0)**2

    c = 2 * np.arcsin(np.sqrt(a))
    km = 6367 * c
    return km

def haversine_m(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points
    on the earth (specified in decimal degrees)

    All args must be of equal length.    

    """
    lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = np.sin(dlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2.0)**2

    c = 2 * np.arcsin(np.sqrt(a))
    m = 3956 * c
    return m

"""def preprocessing(data):
    data['timestamp'] = pd.to_datetime(data['timestamp'], unit='s')
    data['timestamp'] = pd.DatetimeIndex(data.timestamp)

    data['is_weekend'] = np.where(data['day_of_week'].isin([5,6]), 1, 0)
    data['is_weekday'] = np.where(data['day_of_week'].isin([5,6]), 0, 1)

    data['is_wee_hours'] = np.where(data['hour_of_day'].isin([17,18,19,20,21]), 1, 0)

    data['is_rush_hours_morning'] = np.where(data.timestamp.dt.strftime('%H:%M:%S').between('11:30:00', '01:30:00'), 1, 0)
    data['is_rush_hours_evening'] = np.where(data.timestamp.dt.strftime('%H:%M:%S').between('09:00:00', '12:00:00'), 1, 0)

    data['sin_hour_of_day'] = np.sin(2*np.pi*data.hour_of_day/24)
    data['cos_hour_of_day'] = np.cos(2*np.pi*data.hour_of_day/24)
    data['sin_day_of_week'] = np.sin(2*np.pi*data.day_of_week/7)
    data['cos_day_of_week'] = np.cos(2*np.pi*data.day_of_week/7)


    data['haversine_km'] = haversine_km(data['longitude_origin'], data['latitude_origin'], 
                                 data['longitude_destination'], data['latitude_destination'])

    data['haversine_m'] = haversine_m(data['longitude_origin'], data['latitude_origin'], 
                                 data['longitude_destination'], data['latitude_destination'])


    data = data.drop(['timestamp'], axis=1)
    return data"""

def init():
    global bst

    model_path = Model.get_model_path(model_name='xgb_final_model')
    bst = xgb.Booster({'nthread': -1})  # init model
    bst.load_model(model_path)  # load data
    #model = joblib.load(model_path)
    
input_sample = pd.DataFrame(data=[{
    "latitude_origin": -6.141255,
    "longitude_origin": 106.692710,
    "latitude_destination": -6.141150,
    "longitude_destination": 106.693154,
    "timestamp": 1590487113,
    "hour_of_day": 9,
    "day_of_week": 1
}])

# This is an integer type sample. Use the data type that reflects the expected result.
output_sample = np.array([360.00])

# To indicate that we support a variable length of data input,
# set enforce_shape=False
@input_schema('data', PandasParameterType(input_sample, enforce_shape=False))
@output_schema(NumpyParameterType(output_sample))
def run(data):
    try:
        data['timestamp'] = pd.to_datetime(data['timestamp'], unit='s')
        data['timestamp'] = pd.DatetimeIndex(data.timestamp)

        data['is_wee_hours'] = np.where(data['hour_of_day'].isin([17,18,19,20,21]), 1, 0)

        data['is_rush_hours_morning'] = np.where(data.timestamp.dt.strftime('%H:%M:%S').between('23:30:00', '01:30:00'), 1, 0)
        data['is_rush_hours_evening'] = np.where(data.timestamp.dt.strftime('%H:%M:%S').between('09:00:00', '12:00:00'), 1, 0)

        data['sin_hour_of_day'] = np.sin(2*np.pi*data.hour_of_day/24)
        data['cos_hour_of_day'] = np.cos(2*np.pi*data.hour_of_day/24)
        data['sin_day_of_week'] = np.sin(2*np.pi*data.day_of_week/7)
        data['cos_day_of_week'] = np.cos(2*np.pi*data.day_of_week/7)


        data['haversine_km'] = haversine_km(data['longitude_origin'], data['latitude_origin'], 
                                     data['longitude_destination'], data['latitude_destination'])

        data['haversine_m'] = haversine_m(data['longitude_origin'], data['latitude_origin'], 
                                     data['longitude_destination'], data['latitude_destination'])


        data = data.drop(['timestamp'], axis=1)
        ddata = xgb.DMatrix(data)

        #preprocessing
        #data = preprocessing(data)
        #result
        result = bst.predict(ddata)    
        return result.tolist()
    
    except Exception as e:
        result = str(e)
        return result

Overwriting score.py


In [96]:
import pandas as pd
import numpy as np

def haversine_km(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points
    on the earth (specified in decimal degrees)

    All args must be of equal length.    

    """
    lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = np.sin(dlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2.0)**2

    c = 2 * np.arcsin(np.sqrt(a))
    km = 6367 * c
    return km

def haversine_m(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points
    on the earth (specified in decimal degrees)

    All args must be of equal length.    

    """
    lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])

    dlon = lon2 - lon1
    dlat = lat2 - lat1

    a = np.sin(dlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2.0)**2

    c = 2 * np.arcsin(np.sqrt(a))
    m = 3956 * c
    return m

    

input_sample = pd.DataFrame(data=[{
    "latitude_origin": -6.141255,
    "longitude_origin": 106.692710,
    "latitude_destination": -6.141150,
    "longitude_destination": 106.693154,
    "timestamp": 1590487113,
    "hour_of_day": 9,
    "day_of_week": 1
}])
data = input_sample

data['timestamp'] = pd.to_datetime(data['timestamp'], unit='s')
data['timestamp'] = pd.DatetimeIndex(data.timestamp)

data['is_wee_hours'] = np.where(data['hour_of_day'].isin([17,18,19,20,21]), 1, 0)

data['is_rush_hours_morning'] = np.where(data.timestamp.dt.strftime('%H:%M:%S').between('11:30:00', '01:30:00'), 1, 0)
data['is_rush_hours_evening'] = np.where(data.timestamp.dt.strftime('%H:%M:%S').between('09:00:00', '12:00:00'), 1, 0)

data['sin_hour_of_day'] = np.sin(2*np.pi*data.hour_of_day/24)
data['cos_hour_of_day'] = np.cos(2*np.pi*data.hour_of_day/24)
data['sin_day_of_week'] = np.sin(2*np.pi*data.day_of_week/7)
data['cos_day_of_week'] = np.cos(2*np.pi*data.day_of_week/7)


data['haversine_km'] = haversine_km(data['longitude_origin'], data['latitude_origin'], 
                             data['longitude_destination'], data['latitude_destination'])

data['haversine_m'] = haversine_m(data['longitude_origin'], data['latitude_origin'], 
                             data['longitude_destination'], data['latitude_destination'])


data = data.drop(['timestamp'], axis=1)

In [97]:
data

Unnamed: 0,day_of_week,hour_of_day,latitude_destination,latitude_origin,longitude_destination,longitude_origin,is_wee_hours,is_rush_hours_morning,is_rush_hours_evening,sin_hour_of_day,cos_hour_of_day,sin_day_of_week,cos_day_of_week,haversine_km,haversine_m
0,1,9,-6.14115,-6.141255,106.693154,106.69271,0,0,1,0.707107,-0.707107,0.781831,0.62349,0.050425,0.03133


# Deploy locally

In [129]:
from azureml.core import Webservice
from azureml.core.environment import Environment
from azureml.core.model import InferenceConfig, Model
from azureml.core.webservice import LocalWebservice

service_name = 'hakkai-service-xgb-final'

# Remove any existing service under the same name.
try:
    Webservice(ws, service_name).delete()
except WebserviceException:
    pass

myenv = Environment.from_conda_specification(name="env", file_path="myenv.yml")

inference_config = InferenceConfig(entry_script='score.py', environment=myenv)

deployment_config = LocalWebservice.deploy_configuration()

service = Model.deploy(workspace=ws,
                       name=service_name,
                       models=[model],
                       inference_config=inference_config,
                       deployment_config=deployment_config)
service.wait_for_deployment(show_output=True)

print(service.port)

Downloading model xgb_final_model:1 to /tmp/azureml_u67qq_5q/xgb_final_model/1
Generating Docker build context.
Package creation Succeeded
Logging into Docker registry hakkai6fb3fa5e.azurecr.io
Logging into Docker registry hakkai6fb3fa5e.azurecr.io
Building Docker image from Dockerfile...
Step 1/5 : FROM hakkai6fb3fa5e.azurecr.io/azureml/azureml_73d03d5fb609b949b5587123a3620e38
 ---> fc9efa5255e2
Step 2/5 : COPY azureml-app /var/azureml-app
 ---> a4e981c0be29
Step 3/5 : RUN mkdir -p '/var/azureml-app' && echo eyJhY2NvdW50Q29udGV4dCI6eyJzdWJzY3JpcHRpb25JZCI6ImI3MzhhZjkyLTY5ZDItNDM5MC1hMWQ4LTU3OWRjMDk0YjcyZSIsInJlc291cmNlR3JvdXBOYW1lIjoiaGFray5haSIsImFjY291bnROYW1lIjoiaGFray1haSIsIndvcmtzcGFjZUlkIjoiYTkxYzQ5YjUtN2RlMC00Yzg4LTk3ZDAtOTdiNjVjMzgwODE3In0sIm1vZGVscyI6e30sIm1vZGVsc0luZm8iOnt9fQ== | base64 --decode > /var/azureml-app/model_config_map.json
 ---> Running in 0aa0998acacc
 ---> b0c4a8901fb7
Step 4/5 : RUN mv '/var/azureml-app/tmp_y0svlj6.py' /var/azureml-app/main.py
 ---> Running i

KeyboardInterrupt: 

In [128]:
print(service.get_logs())

Initialized PySpark session.
Initializing logger
Starting up app insights client
Starting up request id generator
Starting up app insight hooks
Invoking user's init function
terminate called after throwing an instance of 'std::length_error'
  what():  basic_string::_M_replace_aux
worker timeout is set to 300
Booting worker with pid: 148
Initialized PySpark session.
Initializing logger
Starting up app insights client
Starting up request id generator
Starting up app insight hooks
Invoking user's init function
terminate called after throwing an instance of 'std::length_error'
  what():  basic_string::_M_replace_aux
worker timeout is set to 300
Booting worker with pid: 150
Initialized PySpark session.
Initializing logger
Starting up app insights client
Starting up request id generator
Starting up app insight hooks
Invoking user's init function
terminate called after throwing an instance of 'std::length_error'
  what():  basic_string::_M_replace_aux
worker timeout is set to 300
Booting work

In [52]:
print(service.state)

running


# Deploy on cloud

In [136]:
from azureml.core import Webservice
from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice
from azureml.exceptions import WebserviceException


service_name = 'hakkai-service-xgb-final'

# Remove any existing service under the same name.
try:
    Webservice(ws, service_name).delete()
except WebserviceException:
    pass


myenv = Environment.from_conda_specification(name="env", file_path="myenv.yml")

inference_config = InferenceConfig(entry_script='score.py', environment=myenv)
aci_config = AciWebservice.deploy_configuration(cpu_cores=3, memory_gb=15, enable_app_insights=True)

service = Model.deploy(workspace=ws,
                       name=service_name,
                       models=[model],
                       inference_config=inference_config,
                       deployment_config=aci_config)
service.wait_for_deployment(show_output=True)

Running........................................
Succeeded
ACI service creation operation finished, operation "Succeeded"


In [137]:
print(service.get_logs())

2020-06-20T12:20:43,746587254+00:00 - rsyslog/run 
2020-06-20T12:20:43,747160972+00:00 - iot-server/run 
2020-06-20T12:20:43,747707590+00:00 - nginx/run 
/usr/sbin/nginx: /azureml-envs/azureml_a0677afa4eacaa88546a43bc78063d41/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_a0677afa4eacaa88546a43bc78063d41/lib/libcrypto.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_a0677afa4eacaa88546a43bc78063d41/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_a0677afa4eacaa88546a43bc78063d41/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
/usr/sbin/nginx: /azureml-envs/azureml_a0677afa4eacaa88546a43bc78063d41/lib/libssl.so.1.0.0: no version information available (required by /usr/sbin/nginx)
2020-06-20T12:20:43,748801024+00:00 - gunicorn/run 
EdgeHubC

In [138]:
print(service.swagger_uri)

http://12929100-4b3e-48aa-b7bc-329316a625ee.eastus.azurecontainer.io/swagger.json


In [139]:
print(service.scoring_uri)

http://12929100-4b3e-48aa-b7bc-329316a625ee.eastus.azurecontainer.io/score


In [143]:
import requests
import json

# URL for the web service
scoring_uri = 'http://12929100-4b3e-48aa-b7bc-329316a625ee.eastus.azurecontainer.io/score'

# Two sets of data to score, so we get two results back
data = {"data":
        [
            {
            "latitude_origin": -6.141255,
            "longitude_origin": 106.692710,
            "latitude_destination": -6.141150,
            "longitude_destination": 106.693154,
            "timestamp": 1590487113,
            "hour_of_day": 9,
            "day_of_week": 1
        },
            {
            "latitude_origin": -6.141255,
            "longitude_origin": 106.692710,
            "latitude_destination": -6.141150,
            "longitude_destination": 106.693154,
            "timestamp": 1590487113,
            "hour_of_day": 9,
            "day_of_week": 1
        }
        ]
       }

# Convert to JSON string
input_data = json.dumps(data)

# Set the content type
headers = {'Content-Type': 'application/json'}


# Make the request and display the response
resp = requests.post(scoring_uri, input_data, headers=headers)

print(resp.status_code)
print(resp.elapsed)
print(resp.text)

200
0:00:00.059266
[1153.4814453125, 1153.4814453125]
