# 0. Setup

### 0.1 Imports

In [None]:
import os
from datetime import datetime
from random import randint, uniform
from time import sleep
from typing import List

try:
    import numpy as np
except ImportError:
    !pip install numpy
    import numpy as np

try:
    import pandas as pd
except ImportError:
    !pip install pandas
    import pandas as pd

try:
    from verta import Client, environment
    from verta.dataset import Path
    from verta.dataset.entities import Dataset
    from verta.deployment._deployedmodel import DeployedModel
    from verta.endpoint import Endpoint
    from verta.registry import VertaModelBase
    from verta.registry import lock
    from verta.registry.entities import RegisteredModel, RegisteredModelVersion
    from verta.tracking.entities import ExperimentRun
    from verta.utils import ModelAPI
except ImportError:
    !pip install verta
    from verta.dataset import Path
    from verta.dataset.entities import Dataset
    from verta.deployment._deployedmodel import DeployedModel
    from verta.endpoint import Endpoint
    from verta.registry import VertaModelBase
    from verta.registry import lock
    from verta.registry.entities import RegisteredModel, RegisteredModelVersion
    from verta.tracking.entities import ExperimentRun
    from verta.utils import ModelAPI

### 0.2 Verta Client Setup

In [None]:
# Use local env vars or uncomment and fill out the lines below:
# os.environ['VERTA_EMAIL'] = ''
# os.environ['VERTA_DEV_KEY'] = ''
# os.environ['VERTA_HOST'] = ''

NAME: str = "batch_reference_data_example-old"
MODEL_VERSION: str = "v1"
client: Client = Client()

# 1. Sample Data

### 1.1 Generate sample reference data

In [None]:
def generate_day_data(batch_size: int) -> pd.DataFrame:
    data = list()
    for _ in range(0, batch_size):
        num: int = randint(1,100)
        is_odd: float = 0. if num % 2 == 0 else 1.
        data.append(
            {
                "id": num,
                "isOdd": is_odd,
                "isOdd.confidence": 1.0,
            }
        )
    return pd.DataFrame(data)


df_reference = generate_day_data(batch_size=1000)
X_reference = df_reference[["id"]]
y_reference = df_reference["isOdd"]

# 2.0 Create and register a model

In [None]:
odd_model_api: ModelAPI = ModelAPI(
    pd.DataFrame.from_records(
        [{"id": 1}, {"id": 2}]),
        pd.DataFrame.from_records([{"isOdd": 1, "isOdd.confidence": 1}, {"isOdd": 0, "isOdd.confidence": 1}]),
        )

class IsOdd(VertaModelBase):
    def __init__(self, artifacts):
        "ok"

    def predict(self, data):
        id: int = data['id']
        is_odd: float = 0. if id % 2 == 0 else 1.
        # Non-uniform flip
        if is_odd:
            if uniform(0,1) < 0.1:
                is_odd = 1-is_odd
        else:
            if uniform(0,1) < 0.3:
                is_odd = 1-is_odd
        # Non-uniform confidence
        if is_odd:
            confidence = uniform(0.8, 1)
        else:
            confidence = uniform(0.5, 1)
        return {"isOdd": is_odd, "isOdd.confidence": confidence}

### 2.1 Register the model version with Verta

In [None]:
rm: RegisteredModel = client.get_or_create_registered_model(NAME)
rmv: RegisteredModelVersion = rm.create_version(
    name=MODEL_VERSION,
)
rmv.set_lock_level(lock.Redact())

### 2.2 Associate model with the model version

In [None]:
rmv.log_model(IsOdd, model_api=odd_model_api)
rmv.log_environment(environment.Python(requirements=[]))

### 2.3 Associate dataset version to model version

In [None]:
rmv.log_reference_data(X_reference, y_reference)

### 2.5 Associate the registered model with an endpoint

_Automatcially configures monitoring of the endpoint with default dashboards and drift alerts._

In [None]:
endpt: Endpoint = client.get_or_create_endpoint(NAME)
endpt.update(rmv, wait=True)

# 3.0 Use deployed model to make predictions

_Also logs ground truth after predictions are run._

In [None]:
deployed_model: DeployedModel = endpt.get_deployed_model()
sample_df: pd.DataFrame = df_reference
# read from existing sample data

def simulate_predictions(
        endpoint: Endpoint,
        prediction_count: int,
        ground_truth_df: pd.DataFrame,
        ) -> None:
    """ Fire a given number of predictions at the endpoint and update ground truth data. """

    ids: List[str] = list()
    for i in range(prediction_count):
        data = {
            'id': ground_truth_df["id"][i]
        }
        prediction_id, val = deployed_model.predict_with_id(data)
        ids.append(prediction_id)

    ids_and_gt = zip(ids, ground_truth_df.iloc[:prediction_count]["isOdd"].tolist())

    for t in ids_and_gt:
        endpoint.log_ground_truth(t[0], [t[1]], 'isOdd')

In [None]:
simulate_predictions(
    endpoint=endpt,
    prediction_count=1000,
    ground_truth_df=sample_df,
    )

### 3.1 Introduce drift into the data

In [None]:
drifted_data: pd.DataFrame = sample_df.copy()
drifted_data['id'] = drifted_data['id'] + 25

simulate_predictions(
    endpoint=endpt,
    prediction_count=500,
    ground_truth_df=drifted_data
)
