# Deploying and invoking Drift Detector Dataflow Flex template

In [1]:
%cd ..

/home/jarekk/repos/drift-monitor/detector


In [2]:
import json
from datetime import datetime
import time

## Build the Flex template docker image

In [3]:
template_name = 'log-analyzer'

project_id = !(gcloud config get-value core/project)
project_id = project_id[0]
template_image = 'gcr.io/{}/{}:latest'.format(project_id, template_name)
location = 'us-central1'

In [4]:
!gcloud builds submit --tag {template_image} .

.3 in /usr/local/lib/python3.5/site-packages (from tensorflow!=2.0.*,<3,>=1.15->tensorflow_data_validation[visualization]==0.22.0) (0.3.3)
Collecting httplib2<=0.12.0,>=0.8
  Downloading httplib2-0.12.0.tar.gz (218 kB)
Collecting typing<3.8.0,>=3.7.0; python_version < "3.5.3"
  Downloading typing-3.7.4.1-py3-none-any.whl (25 kB)
Collecting fastavro<0.22,>=0.21.4
  Downloading fastavro-0.21.24-cp35-cp35m-manylinux1_x86_64.whl (1.2 MB)
Collecting google-api-python-client<2,>=1.7.11
  Downloading google_api_python_client-1.9.1-py3-none-any.whl (59 kB)
Collecting tensorflow-serving-api<3,>=1.15
  Downloading tensorflow_serving_api-2.2.0-py2.py3-none-any.whl (38 kB)
Collecting jedi>=0.10
  Downloading jedi-0.17.0-py2.py3-none-any.whl (1.1 MB)
Collecting pexpect; sys_platform != "win32"
  Downloading pexpect-4.8.0-py2.py3-none-any.whl (59 kB)
Collecting decorator
  Downloading decorator-4.4.2-py2.py3-none-any.whl (9.2 kB)
Collecting traitlets>=4.2
  Downloading traitlets-4.3.3-py2.py3-none-a

## Deploy the Flex template using `gcloud`

In [5]:
flex_templates_location = 'gs://mlops-dev-workspace/flex-templates'
template_path = '{}/{}.json'.format(flex_templates_location, template_name)
metadata_file = 'metadata.json'

In [6]:
!gcloud beta dataflow flex-template build {template_path} \
  --image {template_image} \
  --sdk-language "PYTHON" \
  --metadata-file {metadata_file} \
  --image {template_image} \
  --sdk-language "PYTHON" \
  --metadata-file {metadata_file}

Successfully saved container spec in flex template file.
Template File GCS Location: gs://mlops-dev-workspace/flex-templates/log-analyzer.json
Container Spec:

{
    "image": "gcr.io/mlops-dev-env/log-analyzer:latest",
    "metadata": {
        "description": "Data drift detector Python flex template.",
        "name": "Data drift detector Python flex template",
        "parameters": [
            {
                "helpText": "A full name of the BQ request-response log table",
                "label": "Request response log table.",
                "name": "request_response_log_table",
                "regexes": [
                    "[-_.a-zA-Z0-9]+"
                ]
            },
            {
                "helpText": "A name of the AI Platform Prediction model.",
                "label": "Model name.",
                "name": "model",
                "regexes": [
                    "[-_a-zA-Z0-9]+"
                ]
            },
            {
                "helpText": "A v

## Trigger a run of the template using REST API

In [7]:
import googleapiclient.discovery

service = googleapiclient.discovery.build('dataflow', 'v1b3')

parameters = {
    'request_response_log_table': 'mlops-dev-env.data_validation.covertype_classifier_logs_tf',
    'model': 'covertype_tf',
    'version': 'v3',
    'start_time': '2020-06-203T17:00:00',
    'end_time': '2020-06-03T23:00:00',
    'output_path': 'gs://mlops-dev-workspace/drift-monitor/output/covertype-tf/test',
    'schema_file': 'gs://mlops-dev-workspace/drift-monitor/schema/schema.pbtxt',
    'baseline_stats_file': 'gs://mlops-dev-workspace/drift-monitor/baseline_stats/stats.pbtxt', 
    'time_window': '60m',
    'setup_file': './setup.py'
}
job_name = "rr-log-analyzer-{}".format(time.strftime("%Y%m%d-%H%M%S"))
body = {
    'launch_parameter': 
         {
             'jobName': job_name,
             'parameters' : parameters,
             'containerSpecGcsPath': template_path
         }}

request = service.projects().locations().flexTemplates().launch(
    location=location,
    projectId=project_id,
    body=body)

response = request.execute()
response

{'job': {'id': '2020-06-03_17_25_08-11190091457466143526',
  'projectId': 'mlops-dev-env',
  'name': 'data-drift-20200604-002508',
  'currentStateTime': '1970-01-01T00:00:00Z',
  'createTime': '2020-06-04T00:25:09.197611Z',
  'location': 'us-central1',
  'startTime': '2020-06-04T00:25:09.197611Z'}}

## Trigger a run of the template using REST API - Curl

In [7]:
service_url = 'https://dataflow.googleapis.com/v1b3/projects/{}/locations/us-central1/flexTemplates:launch'.format(project_id)
headers_content = 'Content-Type: application/json'
access_token = !(gcloud auth print-access-token) 
headers_auth = 'Authorization: Bearer {}'.format(access_token[0])
parameters = {
    'request_response_log_table': 'mlops-dev-env.data_validation.covertype_classifier_logs_tf',
    'start_time': '2020-05-15T00:15:00',
    'end_time': '2020-05-15T05:51:00',
    'output_path': 'gs://mlops-dev-workspace/drift_monitor/output/tf/test2',
    'schema_file': 'gs://mlops-dev-workspace/drift_monitor/schema/schema.pbtxt',
    'setup_file': './setup.py',

}
job_name = "data-drift-{}".format(time.strftime("%Y%m%d-%H%M%S"))
body = {
    'launch_parameter': 
         {
             'jobName': job_name,
             'parameters' : parameters,
             'containerSpecGcsPath': template_path
         }}

json_body = json.dumps(body)

In [8]:
!curl -X POST \
  "{service_url}" \
  -H "{headers_content}" \
  -H "{headers_auth}" \
  -d '{json_body}'

{
  "job": {
    "id": "2020-05-25_10_02_12-10856269937757314054",
    "projectId": "mlops-dev-env",
    "name": "data-drift-20200525-170210",
    "currentStateTime": "1970-01-01T00:00:00Z",
    "createTime": "2020-05-25T17:02:13.459065Z",
    "location": "us-central1",
    "startTime": "2020-05-25T17:02:13.459065Z"
  }
}


## Trigger a run of the template using `gcloud`

In [None]:
job_name = "data-drift-{}".format(time.strftime("%Y%m%d-%H%M%S"))
g_parameters = ','.join(['{}={}'.format(key,value) for key, value in parameters.items()])

!gcloud beta dataflow flex-template run {job_name} \
--template-file-gcs-location {template_path} \
--parameters {g_parameters}