# LIBRARIES

In [1]:
import os
import mlflow

# EXPERIMENTS OUTSIDE OF DATABRICKS

Configure Tracking Server

In [3]:
mlflow.set_tracking_uri("databricks://DPP")

<div>
    <center>
    <img src="./images/configure_tracking_server.png" width="500"/>
    </center>
</div>

List Experiments

In [4]:
experiments = mlflow.list_experiments()

In [5]:
len(experiments)

6

<div>
    <center>
    <img src="./images/list_experiments.png" width="500"/>
    </center>
</div>

Train Experiment Run

In [7]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_digits
from sklearn.metrics import balanced_accuracy_score, plot_confusion_matrix
from sklearn.model_selection import train_test_split

X, y = load_digits(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, stratify=y)

In [8]:
max_depth = 10
n_estimators = 30

mlflow.set_experiment("/Users/daniel.perez@databricks.com/AA/ML with DB - Python/digits_experiment")

<Experiment: artifact_location='dbfs:/databricks/mlflow-tracking/3917805885816983', experiment_id='3917805885816983', lifecycle_stage='active', name='/Users/daniel.perez@databricks.com/AA/ML with DB - Python/digits_experiment', tags={'mlflow.experimentType': 'MLFLOW_EXPERIMENT',
 'mlflow.ownerEmail': 'daniel.perez@databricks.com',
 'mlflow.ownerId': '3418784813438248'}>

In [9]:
with mlflow.start_run(run_name="from local by Jesse"):
    model = RandomForestClassifier(max_depth=max_depth,
                                   n_estimators=n_estimators)

    model.fit(X_train, y_train)

    #LOG MODEL
    mlflow.sklearn.log_model(model,
                           artifact_path = "model")

    mlflow.log_param("max_depth", max_depth)
    mlflow.log_param("n_estimators", n_estimators)

    #LOG TRAINING METRICS
    y_train_pred = model.predict(X_train)
    accuracy_train = balanced_accuracy_score(y_train, y_train_pred)
    mlflow.log_metric("accuracy_train", accuracy_train)
    
    y_test_pred = model.predict(X_test)
    accuracy_test = balanced_accuracy_score(y_test, y_test_pred)
    mlflow.log_metric("accuracy_test", accuracy_test)

<div>
    <center>
    <img src="./images/register_run.png" width="500"/>
    </center>
</div>

# ON-LINE INFERENCE

## FROM MODEL REGISTRY OUTSIDE OF DATABRICKS

Configure Registry Server

In [11]:
mlflow_client = mlflow.tracking.MlflowClient(registry_uri="databricks://DPP")

<div>
    <center>
    <img src="./images/configure_registry_server.png" width="500"/>
    </center>
</div>

LIST MODELS

In [12]:
registered_models = mlflow_client.list_registered_models()

In [13]:
len(registered_models)

13

In [14]:
[model for model in registered_models if model.name=='aa_digits']

[<RegisteredModel: creation_timestamp=1644274567853, description='', last_updated_timestamp=1645647723238, latest_versions=[<ModelVersion: creation_timestamp=1644274705284, current_stage='None', description='', last_updated_timestamp=1644274708951, name='aa_digits', run_id='815b868dca0c40b8ab9aa7adcad056c3', run_link='', source='dbfs:/databricks/mlflow-tracking/2560579026999475/815b868dca0c40b8ab9aa7adcad056c3/artifacts/model', status='READY', status_message='', tags={}, user_id='daniel.perez@databricks.com', version='2'>,
  <ModelVersion: creation_timestamp=1644276201591, current_stage='Production', description='', last_updated_timestamp=1645593090215, name='aa_digits', run_id='ccd4dc9a0c594e948cad171325993fee', run_link='', source='dbfs:/databricks/mlflow-tracking/2560579026999475/ccd4dc9a0c594e948cad171325993fee/artifacts/model', status='READY', status_message='', tags={}, user_id='daniel.perez@databricks.com', version='3'>,
  <ModelVersion: creation_timestamp=1645596048833, current

<div>
    <center>
    <img src="./images/list_models.png" width="500"/>
    </center>
</div>

INSTALL LIBRARIES FOR MODEL

In [15]:
from mlflow.store.artifact.models_artifact_repo import ModelsArtifactRepository

model_uri = f"models:/digits_model/Production"
local_path = ModelsArtifactRepository(model_uri).download_artifacts("") # download model from remote registry

requirements_path = os.path.join(local_path, "requirements.txt")

In [16]:
!cat $requirements_path

mlflow
cloudpickle==1.6.0
psutil==5.8.0
scikit-learn==0.24.1
typing-extensions==3.7.4.3

In [17]:
!pip install -r $requirements_path 



<div>
    <center>
    <img src="./images/install_requirements.png" width="500"/>
    </center>
</div>

DEPLOY MODEL

In [18]:
mlflow_client = mlflow.tracking.MlflowClient(registry_uri="databricks://DPP")

In [19]:
registered_models = mlflow_client.list_registered_models()

In [20]:
registered_models[0]

<RegisteredModel: creation_timestamp=1644274567853, description='', last_updated_timestamp=1645647723238, latest_versions=[<ModelVersion: creation_timestamp=1644274705284, current_stage='None', description='', last_updated_timestamp=1644274708951, name='aa_digits', run_id='815b868dca0c40b8ab9aa7adcad056c3', run_link='', source='dbfs:/databricks/mlflow-tracking/2560579026999475/815b868dca0c40b8ab9aa7adcad056c3/artifacts/model', status='READY', status_message='', tags={}, user_id='daniel.perez@databricks.com', version='2'>,
 <ModelVersion: creation_timestamp=1644276201591, current_stage='Production', description='', last_updated_timestamp=1645593090215, name='aa_digits', run_id='ccd4dc9a0c594e948cad171325993fee', run_link='', source='dbfs:/databricks/mlflow-tracking/2560579026999475/ccd4dc9a0c594e948cad171325993fee/artifacts/model', status='READY', status_message='', tags={}, user_id='daniel.perez@databricks.com', version='3'>,
 <ModelVersion: creation_timestamp=1645596048833, current_st

In [21]:
model = mlflow.pyfunc.load_model("models:/digits_model/Production")

<div>
    <center>
    <img src="./images/load_model.png" width="500"/>
    </center>
</div>

SCORE MODEL

In [22]:
from sklearn.datasets import load_digits

In [23]:
X, y = load_digits(return_X_y = True)

In [24]:
y_hat = model.predict(X)

In [25]:
from sklearn.metrics import confusion_matrix, accuracy_score

In [26]:
confusion_matrix(y, y_hat)

array([[175,   3,   0,   0,   0,   0,   0,   0,   0,   0],
       [  0, 182,   0,   0,   0,   0,   0,   0,   0,   0],
       [  0,   9, 168,   0,   0,   0,   0,   0,   0,   0],
       [  0,   5,   3, 159,   0,   0,   0,   0,   7,   9],
       [  0,  97,   0,   0,  55,   0,  20,   0,   1,   8],
       [  0,   6,   0,   0,   0, 163,   1,   0,   0,  12],
       [  0,   7,   0,   0,   0,   0, 173,   0,   1,   0],
       [  0,  42,   0,   0,   0,   0,   0, 117,   1,  19],
       [  0,  29,   0,   0,   0,   0,   0,   0, 145,   0],
       [  0,   3,   0,   0,   0,   0,   0,   0,   0, 177]])

In [None]:
accuracy_score(y, y_hat)

## FROM MODEL REGISTRY USING DATABRICKS

In [27]:
import os
import requests
import numpy as np
import pandas as pd

def create_tf_serving_json(data):
    return {'inputs': {name: data[name].tolist() for name in data.keys()} if isinstance(data, dict) else data.tolist()}

def score_model(dataset):
    url = 'https://adb-2173364778179441.1.azuredatabricks.net/model/aa_digits/1/invocations'
    headers = {'Authorization': f'Bearer dapieee4726f1683df255e4fb005eed8a08e'}
    data_json = dataset.to_dict(orient='split') if isinstance(dataset, pd.DataFrame) else create_tf_serving_json(dataset)
    response = requests.request(method='POST', headers=headers, url=url, json=data_json)
    
    if response.status_code != 200:
        raise Exception(f'Request failed with status {response.status_code}, {response.text}')
    return response.json()

In [28]:
y_hat = score_model(X)

In [29]:
confusion_matrix(y, y_hat)

array([[172,   5,   0,   0,   0,   1,   0,   0,   0,   0],
       [  0, 182,   0,   0,   0,   0,   0,   0,   0,   0],
       [  0,  16, 161,   0,   0,   0,   0,   0,   0,   0],
       [  0,   7,   1, 157,   0,   0,   0,   0,  12,   6],
       [  0, 108,   0,   0,  43,   1,  20,   0,   1,   8],
       [  0,  19,   1,   0,   0, 145,   0,   0,   4,  13],
       [  0,  13,   0,   0,   0,   0, 167,   0,   1,   0],
       [  0,  64,   0,   1,   0,   0,   0, 100,   6,   8],
       [  0,  26,   0,   0,   0,   0,   0,   0, 148,   0],
       [  0,  10,   0,   0,   0,   0,   0,   0,   0, 170]])

In [None]:
accuracy_score(y, y_hat)