# Custom model monitoring

Configure the client.

In [1]:
from verta import Client

VERTA_HOST = "https://demo.dev.verta.ai"
client = Client(VERTA_HOST)

Create a custom monitor that is a mix of pre-existing histograms and a customization of the functionality.

In [2]:
# Assumptions: there is a single monitor and it tracks multiple different monitoring metrics


In [3]:
# In the voiceover, start with the code cell showing how to attach to endpoint 
# and then get into the monitor definition

# this is written in the order of how I would demo vs. how cells can actually execute

In [4]:
endpoint = client.get_endpoint("/census") # get a deployed model

monitor = MyModelMonitor() # define a monitor with custom statistics and metrics to monitor
monitor.add_processor(MissingCountProcessor())
monitor.add_processor(FeatureHistogramProcessor())
monitor.initialize(df_train)

endpoint.set_monitoring(monitor) # attach the monitor to the endpoint

In [5]:
class Monitor:
    def __init__(self):
        self.processors = []
        
    def add_processor(self, processor):
        self.processors.append(processor)
        
    def initialize(self, df):
        for processor in self.processors:
            processor.initialize(df)
        
    def process(self, inputs, outputs):
        for processor in self.processors:
            processor.process(inputs, outputs)

In [6]:
import pandas as pd
from verta.monitoring import FeatureHistogramProcessor, BinaryHistogram, Histogram, Monitor

class MissingCountProcessor(Processor):
    def __init__(self, df):
        super(Processor, self).__init__(df)
        
        # Collect information about current features to create a metric for missing features
        self.missing_features = {}
        for column, vals in df.iteritems():
            missing = vals.isnull().sum()
            present = vals.shape[0] - missing
            self.missing_features[column] = BinaryHistogram(name="missing_"+column, reference=[present, missing])
            self.register(self.missing_features[column])
            
    def process(self, inputs, outputs):
        inputs = pd.DataFrame.from_dict(inputs, orient="columns")
        outputs = pd.DataFrame.from_dict(outputs, orient="columns")
        values = inputs.join(outputs)
        
        # Update the metric of missing features
        for feature, hist in self.missing_features.items():
            for val in values[feature].isnull().tolist():
                hist.observe(val)