# Set up the environment

In [2]:
from azureml.core import Workspace
ws = Workspace.get(name='demo-aml',
                   subscription_id='YOUR-SUSCRIPTION-ID',
                   resource_group='demo-aml')

In [3]:
from azureml.core.environment import Environment
from azureml.core.conda_dependencies import CondaDependencies

# to install required packages
env = Environment('xgboost-env')
cd = CondaDependencies.create(pip_packages=['azureml-defaults', 'xgboost', 'numpy'],
                              conda_packages = ['scikit-learn', 'joblib', 'pandas'])

env.python.conda_dependencies = cd

# Register environment to re-use later
env.register(workspace = ws)

{
    "databricks": {
        "eggLibraries": [],
        "jarLibraries": [],
        "mavenLibraries": [],
        "pypiLibraries": [],
        "rcranLibraries": []
    },
    "docker": {
        "arguments": [],
        "baseDockerfile": null,
        "baseImage": "mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20210531.v1",
        "baseImageRegistry": {
            "address": null,
            "password": null,
            "registryIdentity": null,
            "username": null
        },
        "enabled": false,
        "platform": {
            "architecture": "amd64",
            "os": "Linux"
        },
        "sharedVolumes": true,
        "shmSize": null
    },
    "environmentVariables": {
        "EXAMPLE_ENV_VAR": "EXAMPLE_VALUE"
    },
    "inferencingStackVersion": null,
    "name": "xgboost-env",
    "python": {
        "baseCondaEnvironment": null,
        "condaDependencies": {
            "channels": [
                "anaconda",
                "conda-forge"
  

In [5]:
%%time

# Optional
image = env.build(ws)
image.wait_for_completion(show_output=True)

CPU times: user 30.4 ms, sys: 5.15 ms, total: 35.6 ms
Wall time: 201 ms


<azureml.core.environment.ImageBuildDetails at 0x7f9ca6b80160>

# Create scoring script

In [28]:
import os
import json
import joblib
import numpy as np

def init():
    global model
    #model_path = './outputs/model.h5'
    #model = load_model(model_path)
    model_path = './outputs/model.pkl'
    model = joblib.load(model_path)

def run(raw_data):
    
    data = np.array(json.loads(raw_data)['data'])
    with open("./outputs/columns.txt", "r") as fp:
        columns = json.load(fp)
    df = pd.DataFrame(data, columns=columns)
    
    # make prediction
    y_hat = model.predict(df)
    #Load Scaler
    sc_y = joblib.load('./outputs/std_scaler_y.bin')
    y_hat = sc_y.inverse_transform(y_hat)
    # Pounds to kg
    y_hat = np.multiply(y_hat, 0.454)

    # you can return any data type as long as it is JSON-serializable
    return y_hat.tolist()

In [19]:
init()



In [20]:
import pandas as pd
X_val=pd.read_json('./inputs/X_validation_data.json', orient="split")
X_val.head()
X_val = X_val.values

In [29]:
# Online prediction: send a random row from the validatition set to score
random_index = np.random.randint(0, len(X_val)-1)
input_data = "{\"data\": [" + str(list(X_val[random_index])) + "]}"
run(input_data)

['SEX', 'FAGE', 'GAINED', 'VISITS', 'MAGE', 'TOTALP', 'BDEAD', 'TERMS', 'LOUTCOME', 'WEEKS', 'RACEMOM', 'RACEDAD', 'CIGNUM', 'DRINKNUM', 'ANEMIA', 'CARDIAC', 'ACLUNG', 'DIABETES', 'HERPES', 'HYDRAM', 'HEMOGLOB', 'HYPERCH', 'HYPERPR', 'ECLAMP', 'CERVIX', 'PINFANT', 'PRETERM', 'RENAL', 'RHSEN', 'UTERINE']


AttributeError: 'DataFrame' object has no attribute 'feature_names'

In [8]:
# Batch prediction
batch = json.dumps({"data": X_val.tolist()})
batch = bytes(batch, encoding='utf8')
y_hat = run(batch)
y_hat

[3.319709062576294,
 3.046156883239746,
 3.2568764686584473,
 3.338683605194092,
 3.3013522624969482,
 3.683459520339966,
 3.6008081436157227,
 3.570554733276367,
 3.4407408237457275,
 3.5708460807800293,
 3.182047128677368,
 2.841775417327881,
 2.0266659259796143,
 3.3647069931030273,
 3.4342145919799805,
 3.5120291709899902,
 2.390129566192627,
 3.106153964996338,
 3.4251089096069336,
 3.3541691303253174,
 3.39890193939209,
 1.6730077266693115,
 3.1126489639282227,
 3.522404193878174,
 3.388153553009033,
 3.022202968597412,
 1.9473463296890259,
 2.7303526401519775,
 3.152921199798584,
 2.334705352783203,
 3.4306275844573975,
 3.477069139480591,
 3.3906328678131104,
 3.3609859943389893,
 3.0527641773223877,
 3.100623369216919,
 3.1925549507141113,
 3.2661590576171875,
 3.346280813217163,
 3.021045446395874,
 3.449305772781372,
 3.4340333938598633,
 3.6302011013031006,
 3.530768871307373,
 2.807097911834717,
 3.2571470737457275,
 3.40000319480896,
 3.4762539863586426,
 3.57146096229553

In [35]:
%%writefile score.py
import os
import json
import joblib
import numpy as np
import xgboost as xgb


def init():
    #Load the model
    global model
    model_path = os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'outputs/model.bst')
    model = xgb.Booster()
    model.load_model(model_path)

def run(raw_data):
    # Load the new Data
    data = np.array(json.loads(raw_data)['data'])
    data = xgb.DMatrix(data)
    
    # make prediction
    y_hat = model.predict(data)
    #Load Scaler
    sc_y = joblib.load(os.path.join(os.getenv('AZUREML_MODEL_DIR'), 'outputs/std_scaler_y.bin'))
    y_hat = sc_y.inverse_transform(y_hat)
    # Pounds to kg
    y_hat = np.multiply(y_hat, 0.454)

    # you can return any data type as long as it is JSON-serializable
    return y_hat.tolist()

Overwriting score.py
