In [None]:
import datetime as dt
from datetime import datetime, timedelta, timezone, tzinfo
import joblib
import pytz
import json
import wallaroo
import wallaroo.assay_config
from wallaroo.object import EntityNotFoundError

import wallaroo.assay
from wallaroo.assay_config import BinMode, Aggregation, Metric

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

import seaborn as sns
import requests
import uuid

from upload_arrow_data import upload_arrow_data

plt.rcParams["figure.figsize"] = (12,6)
pd.options.display.float_format = '{:,.2f}'.format

In [None]:
def status_color(status):
    if status == "Ok":
        return "green"
    elif status == "Warning":
        return "orange"
    else:
        return "red"
    
def create_legend():
    ok_patch = mpatches.Patch(color='green', label='Status Ok')
    warning_patch = mpatches.Patch(color='orange', label='Warning')
    alert_patch = mpatches.Patch(color='red', label='Alert')
    plt.legend(handles=[ok_patch, warning_patch, alert_patch])
    
def pick_colors(s):
    return [status_color(status) for status in s]

def extract_arrow_prediction(t):
    return t['out']['dense_2'][0]

def extract_arrow_prediction_nested(t):
    return t['out.dense_2'][0][0]

In [None]:
import os
os.environ['DEPLOYMENT_CONFIG'] = json.dumps({"cpus": 0.1, "replica_count": 1, "memory": "100Mi"})
if os.environ.get("WALLAROO_SDK_CREDENTIALS"):
    auth_type="user_password"
else:
    auth_type="none"
auth_type = "sso"
client = wallaroo.Client(auth_type=auth_type, request_timeout=90, interactive=True)

In [None]:
# Deploy a model/pipeline

rand_id = f"{np.random.randint(10000):05d}"
pipeline_name = 'modelinsightse2e' + rand_id
model_name = 'baseline' + rand_id

onnx_file_model_name = "house_price_keras.onnx"

fraud = client.upload_model(model_name, onnx_file_model_name).configure('onnx')
pipeline = client.build_pipeline(pipeline_name)
pipeline = pipeline.add_model_step(fraud)
deployment = pipeline.deploy()

In [None]:
pipelines = client.pipelines_by_name(pipeline_name)
assert(len(pipelines) == 1)
pipeline = pipelines[0]
pipeline_name

In [None]:
topic = client.get_topic_name(pipeline.id())
topic

In [None]:
# Load canned data into notebook
X_val = joblib.load("X_val.pkl")
canned_inference_records = joblib.load('inference_records.pkl')
len(canned_inference_records)

In [None]:
# filter out some of the data

canned_inference_records = [r for i,r in enumerate(canned_inference_records) if i % 10 == 0]
len(canned_inference_records)

In [None]:
uploaded_logs = upload_arrow_data(canned_inference_records, pipeline_name, model_name, topic=topic)
num_uploaded_logs = len(uploaded_logs)
print(f"\n Uploaded {num_uploaded_logs} canned logs")
uploaded_logs[-1]

In [None]:
# check that we can query that data
day1 = dt.datetime(2022, 1,1, 0, 0, 0, 0, pytz.UTC)
day2 = dt.datetime(2022, 1,2, 0, 0, 0, 0, pytz.UTC)


In [None]:
# Do some test inferences
num_test_inferences = 10
inference_start = dt.datetime.now()
for i in range(num_test_inferences):
    data = pd.DataFrame.from_dict({"tensor": [X_val[i].tolist()]})
    res = deployment.infer(data)

In [None]:
# Visual
res

In [None]:
# visual
canned_inference_records[8]

In [None]:
# Make sure they extract correctly.ie are numbers. Don't think they should be the same
# need to check on that.
assert isinstance(extract_arrow_prediction_nested(res), float)

In [6]:
# JAMIESKIP
# This can be fleshed out to check the structure of the two records better.
# def inferences_match(i1, i2):
#     return i1.keys() == i2.keys()


# sample = res.loc[0,:]
# canned_sample =  canned_inference_records[0]

# assert inferences_match(sample, canned_sample)

# for key in sample.keys():
#     assert type(sample[key]) == type(canned_sample[key]), f"{key} is not the same {type(sample[key])} {type(canned_sample[key])}"

True

In [None]:
# Get the inference logs we just created through the api-lb using just the topic
# Logs can take a bit to get to plateau
import time

logs = client.get_raw_logs(topic, start=inference_start, end=dt.datetime.now(), parse=True)
counter = 0
while len(logs) < num_test_inferences and counter < 10:
    time.sleep(5)
    counter += 1
    print(len(logs))
    logs = client.get_raw_logs(topic, start=inference_start, end=dt.datetime.now(), parse=True)
assert len(logs) == num_test_inferences
# assert inferences_match(res[0].raw, logs[0])

In [None]:
assert len(client.get_raw_pipeline_inference_logs(topic, inference_start, dt.datetime.now(), model_name)) == num_test_inferences

In [None]:
assert len(client.get_raw_pipeline_inference_logs(topic, inference_start, dt.datetime.now(), "FOOBAR")) == 0

In [None]:
df = client.get_pipeline_inference_dataframe(topic, inference_start, dt.datetime.now(), model_name)
assert df.shape[0] == num_test_inferences

In [None]:
baseline_start = datetime.fromisoformat('2022-01-01T00:00:00+00:00')
baseline_end = datetime.fromisoformat('2022-01-02T00:00:00+00:00')
last_day = datetime.fromisoformat('2022-02-01T00:00:00+00:00')



In [None]:
all_inferences = client.get_raw_pipeline_inference_logs(topic, baseline_start, last_day, model_name, limit=1_000_000)
baseline_inferences = client.get_raw_pipeline_inference_logs(topic, baseline_start, baseline_end, model_name, limit=1_000_000)

assert len(all_inferences) == num_uploaded_logs
assert len(baseline_inferences) > 0


In [None]:
# Create dataframes from the inferences

all_preds = pd.DataFrame({"all_preds" : [extract_arrow_prediction(t) for _, t in all_inferences.iterrows()]})
baseline_preds = pd.DataFrame({"baseline_preds" : [extract_arrow_prediction(t)for _, t in baseline_inferences.iterrows()]})

min_pred = all_preds.min()[0]
max_pred = all_preds.max()[0]

In [None]:
assay_name = f"Test Assay {rand_id}"
assay_builder = client.build_assay(assay_name, pipeline, model_name, day1, day2)
print(assay_builder.build().to_json())

In [None]:
client.get_pipeline_inference_dataframe(client.get_topic_name(assay_builder.pipeline_id), assay_builder.baseline_builder.start, assay_builder.baseline_builder.end, assay_builder.baseline_builder.model_name)

In [None]:
assay_builder.baseline_dataframe()

In [None]:
assay_builder.baseline_kde()

In [None]:
assay_builder.baseline_ecdf()

In [None]:
assay_id = assay_builder.upload()
assay_id

In [None]:
ar = client.get_assay_results(assay_id, day1, datetime.now(timezone.utc))

In [None]:
ar.to_dataframe()

In [None]:
max_sleep = 90
elapsed_sleep = 0
sleep_interval = 3

assay_start = day1
while elapsed_sleep <= max_sleep:
    assay_results = client.get_assay_results(assay_id, day1, datetime.now(timezone.utc))
    if len(assay_results) == 30:
        break
    time.sleep(sleep_interval)
    elapsed_sleep += sleep_interval
    
print(f"results available in <{elapsed_sleep} seconds, length {len(assay_results)}")
assert len(assay_results) == 30

In [None]:
assay_results[0].raw

In [None]:
df = assay_results.to_dataframe()
assert len(df) == len(assay_results)
df

 # Test that df conversion worked

In [None]:
assay_results[0].raw

In [None]:
assay_results[0].chart()

In [None]:
assay_results[1].chart()

In [None]:
assay_results[1].compare_basic_stats()

In [None]:
comparison = assay_results[1].compare_bins()
print(f"Sum of absolute value of differences as percentage per bin {comparison.diff_in_pcts.abs().sum():5.3f}")

assert type(comparison) == pd.DataFrame
assert len(comparison) > 0
comparison

In [None]:
assay_config = client.build_assay("Input Assay", pipeline, model_name, day1, day2).add_run_until(last_day).build()
ardf = assay_config.interactive_run().to_dataframe()
assert ardf.shape[0] > 0
assert ardf.shape[1] > 0
ardf

In [None]:
assay_builder = client.build_assay(assay_name, pipeline, model_name, baseline_start, baseline_end)
assay_builder = assay_builder.add_run_until(last_day)

assay_builder.window_builder().add_width(hours=24).add_interval(hours=12)

assay_config = assay_builder.build()

assay_results = assay_config.interactive_run()
assay_results.to_dataframe()

In [None]:
assert len(assay_results) == 59

In [None]:
report_start = datetime.fromisoformat('2022-01-03T00:00:00+00:00')

assay_builder = client.build_assay(assay_name, pipeline, model_name, baseline_start, baseline_end)
assay_builder = assay_builder.add_run_until(last_day)

assay_builder.window_builder().add_width(weeks=1).add_interval(weeks=1).add_start(report_start)

assay_config = assay_builder.build()

assay_results = assay_config.interactive_run()
assert len(assay_results) == 4

In [None]:
labels = ['bedrooms', 'bathrooms', 'lat', 'long', 'waterfront', 'sqft_living', 'sqft_lot', 'floors', 'view', 'condition', 'grade', 'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated', 'sqft_living15', 'sqft_lot15']

assay_builder = client.build_assay("Input Assay", pipeline, model_name, day1, day2).add_run_until(last_day)
assay_builder.window_builder().add_width(hours=4)
assay_config = assay_builder.build()
assay_results = assay_config.interactive_input_run(all_inferences, labels)
iadf = assay_results.to_dataframe()
assert len(iadf) > 0
iadf

In [None]:
not_ok = iadf[iadf.status != "Ok"]
assert len(not_ok) > 0
not_ok

In [None]:
# equal bins vs quantiles
assay_builder = client.build_assay("Test Assay", pipeline, model_name, day1, day2).add_run_until(last_day)
assay_builder.summarizer_builder.add_bin_mode(BinMode.EQUAL)
assay_results = assay_builder.build().interactive_run()
ar = assay_results[0]
df = ar.compare_bins()
assert df.shape == (7, 9)

In [None]:
# User provided edges
edges = [11.0, 12.0, 13.0, 14.0, 15.0, 16.0]
assay_builder = client.build_assay("Test Assay", pipeline, model_name, day1, day2).add_run_until(last_day)
assay_builder.summarizer_builder.add_bin_mode(BinMode.PROVIDED, edges)
assay_results = assay_builder.build().interactive_run()
ar = assay_results[0]
df = ar.compare_bins()
assert df.shape == (7, 9)

In [None]:
# Number of bins
assay_builder = client.build_assay("Test Assay", pipeline, model_name, day1, day2).add_run_until(last_day)
assay_builder.summarizer_builder.add_bin_mode(BinMode.QUANTILE).add_num_bins(10)
assay_results = assay_builder.build().interactive_run()
ar = assay_results[0]
df = ar.compare_bins()
assert df.shape == (12, 9)

In [None]:
# bin weights
weights = [0] * 6
weights.extend([1] * 6)

assay_builder = client.build_assay("Test Assay", pipeline, model_name, day1, day2).add_run_until(last_day)
assay_builder.summarizer_builder.add_bin_mode(BinMode.QUANTILE).add_num_bins(10).add_bin_weights(weights)
assay_results = assay_builder.build().interactive_run()
ar = assay_results[0]
df = ar.compare_bins()
assert df.shape == (12, 9)
assert ar.score > 0.0

In [None]:
#  metrics
assay_builder = client.build_assay("Test Assay", pipeline, model_name, day1, day2).add_run_until(last_day)
assay_builder.summarizer_builder.add_metric(Metric.SUMDIFF)
assay_results = assay_builder.build().interactive_run()
ar = assay_results[0]
df = ar.compare_bins()
assert df.shape == (7, 9)
assert ar.score > 0.0

In [None]:
#  metrics
assay_builder = client.build_assay("Test Assay", pipeline, model_name, day1, day2).add_run_until(last_day)
assay_builder.summarizer_builder.add_metric(Metric.MAXDIFF)
assay_results = assay_builder.build().interactive_run()
ar = assay_results[0]
df = ar.compare_bins()
assert df.shape == (7, 9)
assert ar.score > 0.0

In [None]:
# aggregation
assay_builder = client.build_assay("Test Assay", pipeline, model_name, day1, day2).add_run_until(last_day)
assay_builder.summarizer_builder.add_aggregation(Aggregation.DENSITY)
assay_results = assay_builder.build().interactive_run()
ar = assay_results[0]
df = ar.compare_bins()
assert df.shape == (7, 9)
assert ar.score > 0.0

In [None]:
# aggregation
assay_builder = client.build_assay("Test Assay", pipeline, model_name, day1, day2).add_run_until(last_day)
assay_builder.summarizer_builder.add_aggregation(Aggregation.CUMULATIVE)
assay_results = assay_builder.build().interactive_run()
ar = assay_results[0]
df = ar.compare_bins()
assert df.shape == (7, 9)
assert ar.score > 0.0

In [None]:
deployment.undeploy()

In [None]:
for pipeline in client.list_pipelines():
    pipeline.undeploy()
    

In [None]:
client.list_pipelines()