Model Training for Diabetes Classification in Azure Databricks with Azure Machine Learning integration.

In [2]:
# Import necessary libraries for model training
import requests
import pandas as pd
from sklearn import model_selection
from sklearn.linear_model import LogisticRegression
import joblib

In [3]:
# Mount ADLS to read data source.
configs = {"fs.azure.account.auth.type": "OAuth",
           "fs.azure.account.oauth.provider.type": "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider",
           "fs.azure.account.oauth2.client.id": "your_application_id",
           "fs.azure.account.oauth2.client.secret": dbutils.secrets.get(scope="your_adb_secret_name",key="your_adb_key_name"),
           "fs.azure.account.oauth2.client.endpoint": "https://login.microsoftonline.com/your_aad_tenant_id/oauth2/token"}

dbutils.fs.mount(
  source = "abfss://your_container_name@your_adls_name.dfs.core.windows.net/",
  mount_point = "/mnt/your_adls_mount_point_name",
  extra_configs = configs)

In [4]:
# Mount Azure Blob Storage as model artifact store.
dbutils.fs.mount(
  source = "wasbs://your_container_name@your_azure_blob_storage_name.blob.core.windows.net",
  mount_point = "/mnt/your_azure_blob_storage_mount_point_name",
  extra_configs = {"fs.azure.account.key.your_azure_blob_storage_mount_point_name.blob.core.windows.net":dbutils.secrets.get(scope = "your_adb_secret_name", key = "your_adb_key_name")})

In [5]:
# Unmount ADLS as needed.
# dbutils.fs.unmount("/mnt/your_adls_mount_point_name")

In [6]:
# Unmount Azure Blob Storage as needed.
# dbutils.fs.unmount("/mnt/your_azure_blob_storage_mount_point_name")

In [7]:
# List file in ADLS.
dbutils.fs.ls("/mnt/your_adls_mount_point_name/")

In [8]:
# List file in Azure Blob Storage.
dbutils.fs.ls("/mnt/your_azure_blob_storage_mount_point_name/")

In [9]:
# Download source data and save it into ADLS.
csv_url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv'
req = requests.get(csv_url,verify=False)
url_content = req.content
csv_file = open('/dbfs/mnt/your_adls_mount_point_name/pima-indians-diabetes.data.csv', 'wb')

csv_file.write(url_content)
csv_file.close()

In [10]:
# Read source data from ADLS.
csv_file = '/dbfs/mnt/your_adls_mount_point_name/pima-indians-diabetes.data.csv'
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = pd.read_csv(csv_file, names=names)
dataframe

In [11]:
# Define data split for model training.
array = dataframe.values
X = array[:,0:8]
Y = array[:,8]
test_size = 0.33
seed = 7
X_train, X_test, Y_train, Y_test = model_selection.train_test_split(X, Y, test_size=test_size, random_state=seed)

In [12]:
# Start model training.
model = LogisticRegression(max_iter=200)
model.fit(X_train, Y_train)

In [13]:
# Create folder in model artifact store as needed.
# dbutils.fs.mkdirs("dbfs/mnt/your_azure_blob_storage_mount_point_name/your_model_folder_name/")

In [14]:
# Dumping model file into model artifact store.
filepath = "/dbfs/mnt/your_azure_blob_storage_mount_point_name/your_model_folder_name/"
filename = "finalized_model.pkl"
filenamepath = filepath+filename
joblib.dump(model, filenamepath)

In [15]:
# Dumping model column header into model artifact store.
columnsfilepath = "/dbfs/mnt/your_azure_blob_storage_mount_point_name/your_model_folder_name/"
columnsfilename = "finalized_model_column.pkl"
columnsfilenamepath = columnsfilepath+columnsfilename
model_columns = list(dataframe.columns)
joblib.dump(model_columns, columnsfilenamepath)

In [16]:
# List file in Azure Blob Storage.
dbutils.fs.ls("/mnt/your_azure_blob_storage_mount_point_name/your_model_folder_name")

In [17]:
# Load saved model file and print result
loaded_model = joblib.load(filenamepath)
result = loaded_model.score(X_test, Y_test)
print(result)

In [18]:
# Pick one sample for testing
pred_list = ['6','148','72','35','0','33.6','0.627','50','1']
df_pred_list = pd.DataFrame([pred_list], columns =['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class'], dtype = float)
df_pred_list

In [19]:
# Import necessary libraries and configure access into Azure Machine Learning.
import mlflow
import mlflow.azureml
import azureml.mlflow
import azureml.core

from azureml.core import Workspace
from azureml.mlflow import get_portal_url
from azureml.core.model import Model

from azureml.core.authentication import InteractiveLoginAuthentication
interactive_auth = InteractiveLoginAuthentication(tenant_id="your_aad_tenant_id")

subscription_id = 'your_azure_subscription_id'

# Azure Machine Learning resource group NOT the managed resource group
resource_group = 'your_resource_group_name' 

# Azure Machine Learning workspace name, NOT Azure Databricks workspace
workspace_name = 'your_aml_workspace_name'  

# Instantiate Azure Machine Learning workspace
ws = Workspace.get(name=workspace_name,
                   subscription_id=subscription_id,
                   resource_group=resource_group,
                  auth=interactive_auth)

In [20]:
# Print MLFlow with AML as reference.
uri = ws.get_mlflow_tracking_uri()
mlflow.set_tracking_uri(uri)
print(uri)

In [21]:
# Define the experiment name for model telemetries tracking.
experiment_name = 'your_aml_experiment_name'
mlflow.set_experiment(experiment_name)

In [22]:
# Post model accuracy result to AML via MLFlow.
with mlflow.start_run():
  # Log a metric; metrics can be updated throughout the run
  mlflow.log_metric("accuracy", result, step=1)
  mlflow.end_run()

In [23]:
# Packaging model file as zip for later on DevOps integration use
import shutil
version = '1'
model_version = str(version)
shutil.make_archive(model_version, 'zip', '/dbfs/mnt/your_azure_blob_storage_mount_point_name/your_model_folder_name')
model_version_source = str(version) + '.zip'
model_zip_path = '/dbfs/mnt/your_azure_blob_storage_mount_point_name/'
model_zip_filename = 'your_model_package_name.zip'
model_zip_filepath = model_zip_path+model_zip_filename
model_version_dest = model_zip_filepath
shutil.move(model_version_source, model_version_dest)

In [24]:
# Pushing model file to AML model registry
filename = '/dbfs/mnt/your_azure_blob_storage_mount_point_name/your_model_package_name.zip'
model_name = 'your_sklearn_model_name'

model = Model.register(workspace = ws,
                        model_path = filename,
                        model_name = model_name,
                        model_framework=Model.Framework.SCIKITLEARN,
                        tags = {"network":"none"},
                        description = "your_sklearn_model_description")

In [25]:
# Model API Test
import requests

url = "http://your_model_api_ip:8080/predict"
datas = {"preg":[6],"plas":[148],"pres":[72],"skin":[35],"test":[0],"mass":[33.6],"pedi":[0.627],"age":[50],"class":[0]}
headers = {'Content-type': 'application/json'}
rsp = requests.post(url, json=datas, headers=headers)
print(rsp)
print(rsp.text)

In [26]:
# Model Web Test
import requests

url = "http://your_model_api_ip:8080/hello"
rsp = requests.get(url)
print(rsp)
print(rsp.text)