# Create Model Schema File For Publishing

This notebook will generate the schema file needed for the ML service that will be deployed. Further it will also utilize much of the Python code in the score.py file as a test to ensure it works so we aren't chasing ghosts after deployment.


# Imports and constants

These are the imports and constants required for the functionality that follows.

In [8]:
# Use the Azure Machine Learning data collector to log various metrics
from azureml.logging import get_azureml_logger
logger = get_azureml_logger()

# Use Azure Machine Learning history magic to control history collection
# History is off by default, options are "on", "off", or "show"
# %azureml history on

from azureml.datacollector import ModelDataCollector
from azureml.api.schema.dataTypes import DataTypes
from azureml.api.schema.sampleDefinition import SampleDefinition
from azureml.api.realtime.services import generate_schema

from azure.storage.blob import BlockBlobService
from azure.storage.blob import PublicAccess
from azure.storage.blob import ContentSettings

import pandas

from sklearn.externals import joblib

import json
import os

# Azure storage and file name information
AZURE_STORAGE_ACCOUNT_NAME = "<STORAGE_ACCOUNT_NAME>"
AZURE_STORAGE_ACCOUNT_KEY = "<STORAGE_ACCOUNT_KEY>"
AZURE_STORAGE_CONTAINER_NAME = "readydemo"
AZURE_STORAGE_BLOB_NAME = "factory.pkl"
AZURE_STORAGE_BLOB_NAME_SCHEMA = "factory.schema"

LOCAL_SYSTEM_DIRECTORY = "modelfile"

MODEL_FILE_LOCAL = "./{}/{}".format(LOCAL_SYSTEM_DIRECTORY,AZURE_STORAGE_BLOB_NAME)
SCHEMA_FILE_LOCAL = "./{}/{}".format(LOCAL_SYSTEM_DIRECTORY,AZURE_STORAGE_BLOB_NAME_SCHEMA)

print("Model File {}".format(MODEL_FILE_LOCAL))
print("Schema File {}".format(SCHEMA_FILE_LOCAL))

Model File ./modelfile/factory.pkl
Schema File ./modelfile/factory.schema


# Download model from storage

In [4]:
# Ensure that the directory to put the file exists
if not os.path.exists(LOCAL_SYSTEM_DIRECTORY):
    os.makedirs(LOCAL_SYSTEM_DIRECTORY)
    print('DONE creating a local directory!')
else:
    print('Local directory already exists!')
    
# If the file exists, make sure to delete it.
if os.path.isfile(MODEL_FILE_LOCAL):
    os.remove(MODEL_FILE_LOCAL)
    print("Local model file exists and was deleted")
    
# Pull model back to the local system
az_blob_service = BlockBlobService(account_name=AZURE_STORAGE_ACCOUNT_NAME, account_key=AZURE_STORAGE_ACCOUNT_KEY)
az_blob_service.get_blob_to_path(AZURE_STORAGE_CONTAINER_NAME, AZURE_STORAGE_BLOB_NAME, MODEL_FILE_LOCAL)

print("Model file downloaded")

Local directory already exists!
Local model file exists and was deleted
Model file downloaded


# Prepare Python Code
This code is essentially a duplicate of the code found in the score.py file.

In [5]:
# Init function used to initialize the model from the local file that we downloaded
def init():
    global inputs_dc, prediction_dc, localFilePath
    import os
    from sklearn.externals import joblib

    # load the model file
    global model
    model = joblib.load(localFilePath)

    inputs_dc = ModelDataCollector(localFilePath, identifier="inputs")
    prediction_dc = ModelDataCollector(localFilePath, identifier="prediction")


In [6]:
# Run the model and get the prediction(s)
def run(input_df):
    import json
    
    inputs_dc.collect(input_df)

    pred = model.predict(input_df)
    prediction_dc.collect(pred)
    return json.dumps(str(pred[0]))

# Generate the model schema
Generates the schema and uploads it to the storage account in the same container as the model.


In [9]:
# Turn on data collection debug mode to view output in stdout
os.environ["AML_MODEL_DC_DEBUG"] = 'true'

localFilePath = MODEL_FILE_LOCAL

# Initialize the model
init()

# Create inputs for the model
df = pandas.DataFrame(data=[[45.9842594460449, 150.513223075022, 277.294013981084, 1.0, 1.0]], columns=['temp', 'volt','rotate','time', 'id'])
inputs = {"input_df": SampleDefinition(DataTypes.PANDAS, df)}
  
# Genereate the schema using the inputs
generate_schema(run_func=run, inputs=inputs, filepath=SCHEMA_FILE_LOCAL)
print("Schema generated")

# Upload the schema to blob storage
az_blob_service.create_blob_from_path(
        AZURE_STORAGE_CONTAINER_NAME,
        AZURE_STORAGE_BLOB_NAME_SCHEMA,
        SCHEMA_FILE_LOCAL,
        content_settings=ContentSettings(content_type='application/json'))

print("Schema uploaded")  

Data collection is in debug mode. Set environment variable AML_MODEL_DC_STORAGE_ENABLED to 'true' to send data to the cloud (http://aka.ms/amlmodeldatacollection).
Data collection is in debug mode. Set environment variable AML_MODEL_DC_STORAGE_ENABLED to 'true' to send data to the cloud (http://aka.ms/amlmodeldatacollection).
Schema generated
Schema uploaded
