# Sandbox - using Cloud Datastore to manage metadata for drift monitor runs

In [2]:
import datetime
from google.cloud import datastore

In [1]:
project_id = 'mlops-dev-env'
monitor_namespace = 'ModelMonitors'
monitor_entity_kind = 'DataDriftMonitor'
model_name = 'covertype_tf'
model_version = 'v3' 
monitor_entity_name = '{}-{}'.format(model_name, model_version)
schema_location = 'gs://mlops-dev-workspace/drift_monitor/schema/schema.pbtxt'
reports_location = 'gs://mlops-dev-workspace/drift_monitor/output/tf/test'
baseline_stats_location = None

In [3]:
client = datastore.Client(project_id)

monitor_key = client.key(monitor_entity_kind, monitor_entity_name, namespace=monitor_namespace)
value = {
    'model_name': model_name,
    'model_version': model_version,
    'schema_path': schema_location,
    'reports_location': reports_location,
    'baseline_stats_location': baseline_stats_location,
    'created': datetime.datetime.now()
}

monitor = datastore.Entity(
    monitor_key, exclude_from_indexes=['schema_path', 'reports_location', 'baseline_stats_location'])

monitor.update(value)

monitor

In [7]:
client.put(monitor)

In [8]:
query = client.query(kind=monitor_entity_kind, namespace=monitor_namespace)
query.order = ['created']
list(query.fetch())

In [18]:
report_entity_kind = 'DriftReports'
report_key = client.key(report_entity_kind, namespace=monitor_namespace, parent=monitor_key)
value = {
    'start_time': datetime.datetime.fromisoformat('2020-05-23T19:00:00'),
    'end_time': datetime.datetime.fromisoformat('2020-05-23T20:00:00'),
    'report_path': 'gs://mlops-dev-workspace/drift_monitor/output/tf/test',
    'status': 'scheduled',
    'scheduled_time': datetime.datetime.fromisoformat('2020-05-27T20:00:00'),
    'created_time': datetime.datetime.fromisoformat('2020-05-28T20:00:00')
}

report = datastore.Entity(
    report_key, exclude_from_indexes=['report_path'])

report.update(value)
report

<Entity('DataDriftMonitor', 'covertype_tf-v3', 'DriftReports') {'start_time': datetime.datetime(2020, 5, 23, 19, 0), 'end_time': datetime.datetime(2020, 5, 23, 20, 0), 'report_path': 'gs://mlops-dev-workspace/drift_monitor/output/tf/test', 'status': 'scheduled', 'scheduled_time': datetime.datetime(2020, 5, 27, 20, 0), 'created_time': datetime.datetime(2020, 5, 28, 20, 0)}>

In [19]:
client.put(report)

In [20]:
query = client.query(kind=report_entity_kind, namespace=monitor_namespace)
query.order = ['status']
list(query.fetch())

[<Entity('DataDriftMonitor', 'covertype_tf-v3', 'DriftReports', 5632499082330112) {'end_time': datetime.datetime(2020, 5, 23, 20, 0, tzinfo=<UTC>), 'start_time': datetime.datetime(2020, 5, 23, 19, 0, tzinfo=<UTC>), 'status': 'scheduled', 'created_time': datetime.datetime(2020, 5, 28, 20, 0, tzinfo=<UTC>), 'scheduled_time': datetime.datetime(2020, 5, 27, 20, 0, tzinfo=<UTC>), 'report_path': 'gs://mlops-dev-workspace/drift_monitor/output/tf/test'}>,
 <Entity('DataDriftMonitor', 'covertype_tf-v3', 'DriftReports', 5644004762845184) {'end_time': datetime.datetime(2020, 5, 23, 20, 0, tzinfo=<UTC>), 'start_time': datetime.datetime(2020, 5, 23, 19, 0, tzinfo=<UTC>), 'status': 'scheduled', 'created_time': datetime.datetime(2020, 5, 28, 20, 0, tzinfo=<UTC>), 'scheduled_time': datetime.datetime(2020, 5, 27, 20, 0, tzinfo=<UTC>), 'report_path': 'gs://mlops-dev-workspace/drift_monitor/output/tf/test'}>]