# LIBRARIES

In [None]:
import os
import mlflow

# EXPERIMENTS OUTSIDE OF DATABRICKS

Configure Tracking Server

In [None]:
mlflow.set_tracking_uri("databricks://DPP")

<div>
    <center>
    <img src="./images/configure_tracking_server.png" width="500"/>
    </center>
</div>

List Experiments

In [None]:
experiments = mlflow.list_experiments()

In [None]:
len(experiments)

<div>
    <center>
    <img src="./images/list_experiments.png" width="500"/>
    </center>
</div>

Train Experiment Run

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_digits
from sklearn.metrics import balanced_accuracy_score, plot_confusion_matrix
from sklearn.model_selection import train_test_split

X, y = load_digits(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, stratify=y)

In [None]:
max_depth = 10
n_estimators = 30

mlflow.set_experiment("/Users/daniel.perez@databricks.com/AA/ML with DB - Python/digits_experiment")

In [None]:
with mlflow.start_run(run_name="from local"):
    model = RandomForestClassifier(max_depth=max_depth,
                                   n_estimators=n_estimators)

    model.fit(X_train, y_train)

    #LOG MODEL
    mlflow.sklearn.log_model(model,
                           artifact_path = "model")

    mlflow.log_param("max_depth", max_depth)
    mlflow.log_param("n_estimators", n_estimators)

    #LOG TRAINING METRICS
    y_train_pred = model.predict(X_train)
    accuracy_train = balanced_accuracy_score(y_train, y_train_pred)
    mlflow.log_metric("accuracy_train", accuracy_train)
    
    y_test_pred = model.predict(X_test)
    accuracy_test = balanced_accuracy_score(y_test, y_test_pred)
    mlflow.log_metric("accuracy_test", accuracy_test)

<div>
    <center>
    <img src="./images/register_run.png" width="500"/>
    </center>
</div>

# ON-LINE INFERENCE

## FROM MODEL REGISTRY OUTSIDE OF DATABRICKS

Configure Registry Server

In [None]:
mlflow_client = mlflow.tracking.MlflowClient(registry_uri="databricks://DPP")

<div>
    <center>
    <img src="./images/configure_registry_server.png" width="500"/>
    </center>
</div>

LIST MODELS

In [None]:
registered_models = mlflow_client.list_registered_models()

In [None]:
len(registered_models)

In [None]:
[model for model in registered_models if model.name=='aa_digits']

<div>
    <center>
    <img src="./images/list_models.png" width="500"/>
    </center>
</div>

INSTALL LIBRARIES FOR MODEL

In [None]:
from mlflow.store.artifact.models_artifact_repo import ModelsArtifactRepository

model_uri = f"models:/digits_model/Production"
local_path = ModelsArtifactRepository(model_uri).download_artifacts("") # download model from remote registry

requirements_path = os.path.join(local_path, "requirements.txt")

In [None]:
!cat $requirements_path

In [None]:
!pip install -r $requirements_path 

<div>
    <center>
    <img src="./images/install_requirements.png" width="500"/>
    </center>
</div>

DEPLOY MODEL

In [None]:
mlflow_client = mlflow.tracking.MlflowClient(registry_uri="databricks://DPP")

In [None]:
registered_models = mlflow_client.list_registered_models()

In [None]:
registered_models[0]

In [None]:
model = mlflow.pyfunc.load_model("models:/digits_model/Production")

<div>
    <center>
    <img src="./images/load_model.png" width="500"/>
    </center>
</div>

SCORE MODEL

In [None]:
from sklearn.datasets import load_digits

In [None]:
X, y = load_digits(return_X_y = True)

In [None]:
y_hat = model.predict(X)

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score

In [None]:
confusion_matrix(y, y_hat)

In [None]:
accuracy_score(y, y_hat)

## FROM MODEL REGISTRY USING DATABRICKS

In [None]:
import os
import requests
import numpy as np
import pandas as pd

def create_tf_serving_json(data):
    return {'inputs': {name: data[name].tolist() for name in data.keys()} if isinstance(data, dict) else data.tolist()}

def score_model(dataset):
    url = 'https://adb-2173364778179441.1.azuredatabricks.net/model/aa_digits/1/invocations'
    headers = {'Authorization': f'Bearer dapieee4726f1683df255e4fb005eed8a08e'}
    data_json = dataset.to_dict(orient='split') if isinstance(dataset, pd.DataFrame) else create_tf_serving_json(dataset)
    response = requests.request(method='POST', headers=headers, url=url, json=data_json)
    
    if response.status_code != 200:
        raise Exception(f'Request failed with status {response.status_code}, {response.text}')
    return response.json()

In [None]:
y_hat = score_model(X)

In [None]:
confusion_matrix(y, y_hat)

In [None]:
accuracy_score(y, y_hat)