# 0. Setup

### 0.1 Imports

In [None]:
import os
from datetime import datetime
from random import randint, uniform
import time
from typing import List

try:
    import numpy as np
except ImportError:
    !pip install numpy
    import numpy as np

try:
    import pandas as pd
except ImportError:
    !pip install pandas
    import pandas as pd

try:
    from verta import Client, environment
    from verta.dataset import Path
    from verta.dataset.entities import Dataset
    from verta.deployment._deployedmodel import DeployedModel
    from verta.endpoint import Endpoint
    from verta.registry import VertaModelBase
    from verta.registry.entities import RegisteredModel, RegisteredModelVersion
    from verta.tracking.entities import ExperimentRun
    from verta.utils import ModelAPI
except ImportError:
    !pip install verta
    from verta.dataset import Path
    from verta.dataset.entities import Dataset
    from verta.deployment._deployedmodel import DeployedModel
    from verta.endpoint import Endpoint
    from verta.registry import VertaModelBase
    from verta.registry.entities import RegisteredModel, RegisteredModelVersion
    from verta.tracking.entities import ExperimentRun
    from verta.utils import ModelAPI

### 0.2 Verta Client Setup

In [None]:
# Use local env vars or uncomment and fill out the lines below:
os.environ['VERTA_EMAIL'] = 'ewagner@verta.ai'
os.environ['VERTA_DEV_KEY'] = 'fb0c7d6c-8f91-4f87-8730-febd36f74553'
os.environ['VERTA_HOST'] = 'ewagner2.dev.verta.ai'

NAME: str = "batch_reference_data_example-11"
MODEL_VERSION: str = "v1"
client: Client = Client()

# 1. Sample Data

### 1.1 Generate sample reference data

In [None]:
def generate_day_data(batch_size: int) -> pd.DataFrame:
    data = list()
    for _ in range(0, batch_size):
        num: int = randint(1,100)
        is_odd: float = 0. if num % 2 == 0 else 1.
        data.append(
            {
                "id": num,
                "isOdd": is_odd,
                "isOdd.confidence": 1.0,
            }
        )
    return pd.DataFrame(data)


def generate_data(batch_size: int) -> None:
    dest = f"data/examples/reference_data/monitoring-batch-reference-example-data.csv"
    os.makedirs(
        os.path.dirname(dest),
        exist_ok=True,
        )
    generate_day_data(batch_size).to_csv(
        dest,
        index=False,
        )

generate_data(batch_size=1000)

### 1.2 Use generated sample data to create a new versioned dataset

In [None]:
for root, subdirs, files in os.walk("./data"):
    root_dir: str = root

dataset: Dataset = client.get_or_create_dataset(NAME)
content: Path = Path([], enable_mdb_versioning=True)
content.add(root_dir)
dataset_version = dataset.create_version(content)

# 2.0 Create and register a model

In [None]:
odd_model_api: ModelAPI = ModelAPI(
    pd.DataFrame.from_records(
        [{"id": 1}, {"id": 2}]),
        pd.DataFrame.from_records([{"isOdd": 1, "isOdd.confidence": 1}, {"isOdd": 0, "isOdd.confidence": 1}]),
        )

class IsOdd(VertaModelBase):
    def __init__(self, artifacts):
        "ok"

    def predict(self, data):
        id: int = data['id']
        is_odd: float = 0. if id % 2 == 0 else 1.
        # Non-uniform flip
        if is_odd:
            if uniform(0,1) < 0.1:
                is_odd = 1-is_odd
        else:
            if uniform(0,1) < 0.3:
                is_odd = 1-is_odd
        # Non-uniform confidence
        if is_odd:
            confidence = uniform(0.8, 1)
        else:
            confidence = uniform(0.5, 1)
        return {"isOdd": is_odd, "isOdd.confidence": confidence}

### 2.1 Add a new project and start an experiment run

In [None]:
client.set_project(NAME)
client.set_experiment(NAME)
er: ExperimentRun = client.set_experiment_run(datetime.now().isoformat())

### 2.2 Associate model with experiment run

In [None]:
er.log_model(IsOdd, model_api=odd_model_api)
er.log_environment(environment.Python(requirements=[]))

### 2.3 Associate dataset version to experiment run

In [None]:
er.log_dataset_version(key="reference", dataset_version=dataset_version)

### 2.4 Register the model version with Verta

In [None]:
rm: RegisteredModel = client.get_or_create_registered_model(NAME)
rmv: RegisteredModelVersion = rm.create_version_from_run(
    run_id=er.id,
    name=MODEL_VERSION,
)

### 2.5 Associate the registered model with an endpoint

_Automatcially configures monitoring of the endpoint with default dashboards and drift alerts._

In [None]:
endpt: Endpoint = client.get_or_create_endpoint(NAME)
endpt.update(rmv, wait=True)
time.sleep(120)  # Give services on the new pod a moment to complete setup.

# 3.0 Use deployed model to make predictions

_Also logs ground truth after predictions are run._

In [None]:
deployed_model: DeployedModel = endpt.get_deployed_model()
sample_df: pd.DataFrame = pd.read_csv('data/examples/reference_data/monitoring-batch-reference-example-data.csv')
# read from existing sample data

def simulate_predictions(
        endpoint: Endpoint,
        prediction_count: int,
        ground_truth_df: pd.DataFrame,
        ) -> None:
    """ Fire a given number of predictions at the endpoint and update ground truth data. """

    ids: List[str] = list()
    %%time
    for i in range(prediction_count):
        data = {
            'id': ground_truth_df["id"][i]
        }
        prediction_id, val = deployed_model.predict_with_id(data)
        ids.append(prediction_id)

    ids_and_gt = zip(ids, ground_truth_df.iloc[:prediction_count]["isOdd"].tolist())

    %%time
    for t in ids_and_gt:
        endpoint.log_ground_truth(t[0], [t[1]], 'isOdd')

In [None]:
simulate_predictions(
    endpoint=endpt,
    prediction_count=500,
    ground_truth_df=sample_df,
    )

### 3.1 Introduce drift into the data

In [None]:
drifted_data: pd.DataFrame = sample_df.copy()
drifted_data['id']: int = drifted_data['id'] + 35

simulate_predictions(
    endpoint=endpt,
    prediction_count=300,
    ground_truth_df=drifted_data
)
