## Define environment variables

In [1]:
# Set `PATH` to include user python binary directory and a directory containing `skaffold`.
PATH=%env PATH
%env PATH={PATH}:/home/jupyter/.local/bin

env: PATH=/usr/local/cuda/bin:/opt/conda/bin:/opt/conda/condabin:/usr/local/bin:/usr/bin:/bin:/usr/local/games:/usr/games:/home/jupyter/.local/bin


In [2]:
# Read GCP project id from env.
shell_output=!gcloud config list --format 'value(core.project)' 2>/dev/null
GOOGLE_CLOUD_PROJECT=shell_output[0]
%env GOOGLE_CLOUD_PROJECT={GOOGLE_CLOUD_PROJECT}
print("GCP project ID:" + GOOGLE_CLOUD_PROJECT)

env: GOOGLE_CLOUD_PROJECT=tfx-cloud-project
GCP project ID:tfx-cloud-project


## Create BQ Dataset

In [3]:
BQ_DATASET_NAME = 'data_validation'
BQ_TABLE_NAME = 'sentiment_analysis_logs' 

from google.cloud import bigquery
client = bigquery.Client(GOOGLE_CLOUD_PROJECT)
dataset_names = [dataset.dataset_id for dataset in client.list_datasets(GOOGLE_CLOUD_PROJECT)]

dataset = bigquery.Dataset("{}.{}".format(GOOGLE_CLOUD_PROJECT, BQ_DATASET_NAME))
dataset.location = "US"

if BQ_DATASET_NAME not in dataset_names:
    dataset = client.create_dataset(dataset)
    print("Created dataset {}.{}".format(client.project, dataset.dataset_id))

print("BigQuery dataset is ready.")

Created dataset tfx-cloud-project.data_validation
BigQuery dataset is ready.


## Create BQ Table

In [4]:
import json

table_schema_json = [
 {"name":"model", "type": "STRING", "mode": "REQUIRED"},
 {"name":"model_version", "type": "STRING", "mode":"REQUIRED"},
 {"name":"time", "type": "TIMESTAMP", "mode": "REQUIRED"},
 {"name":"raw_data", "type": "STRING", "mode": "REQUIRED"},
 {"name":"raw_prediction", "type": "STRING", "mode": "NULLABLE"},
 {"name":"groundtruth", "type": "STRING", "mode": "NULLABLE"}]

json.dump(table_schema_json, open('table_schema.json', 'w'))

In [5]:
!bq mk --table \
 --project_id={GOOGLE_CLOUD_PROJECT} \
 {GOOGLE_CLOUD_PROJECT}:{BQ_DATASET_NAME}.{BQ_TABLE_NAME} \
 'table_schema.json'

Table 'tfx-cloud-project:data_validation.sentiment_analysis_logs' successfully created.


## Activate logging

In [23]:
import googleapiclient.discovery
import re

PIPELINE_NAME = 'sentiment_analysis_tfx'
LOCATION = 'us-central1'
pattern = re.compile(f'(?<=versions/).+')
service = googleapiclient.discovery.build('ml', 'v1')

parent = f"projects/{GOOGLE_CLOUD_PROJECT}/models/{PIPELINE_NAME.replace('-', '_')}"
versions_list = service.projects().models().versions().list(parent=parent).execute()

try:
    name = [i['name'] for i in versions_list['versions'] if i.get('isDefault')][0]
    VERSION_NAME = pattern.search(name).group()
    print(name)
except:
    print('The request did not return a default version')

projects/tfx-cloud-project/models/sentiment_analysis_tfx/versions/v1665514300


In [1]:
sampling_percentage = 1.0
bq_full_table_name = '{}.{}.{}'.format(GOOGLE_CLOUD_PROJECT, BQ_DATASET_NAME, BQ_TABLE_NAME)

service = googleapiclient.discovery.build('ml', 'v1')

logging_config = {
   "requestLoggingConfig":{
       "samplingPercentage": sampling_percentage,
       "bigqueryTableName": bq_full_table_name
       }
   }

service.projects().models().versions().patch(
   name=name,
   body=logging_config,
   updateMask="requestLoggingConfig"
   ).execute()

## Make requests

In [8]:
import googleapiclient.discovery
import os

def predict_json(project, model, instances, signature_name, version=None):
    """Send json data to a deployed model for prediction.

    Args:
        project (str): project where the Cloud ML Engine Model is deployed.
        model (str): model name.
        instances ([Mapping[str: Any]]): Keys should be the names of Tensors
            your deployed model expects as inputs. Values should be datatypes
            convertible to Tensors, or (potentially nested) lists of datatypes
            convertible to tensors.
        version: str, version of the model to target.
    Returns:
        Mapping[str: any]: dictionary of prediction results defined by the
            model.
    """

    service = googleapiclient.discovery.build('ml', 'v1')
    name = 'projects/{}/models/{}'.format(project, model)

    if version is not None:
        name += '/versions/{}'.format(version)

    response = service.projects().predict(
        name=name,
        body={"instances": instances,
        "signature_name": signature_name},
    ).execute()

    if 'error' in response:
        raise RuntimeError(response['error'])

    return response['predictions']

In [10]:
import pandas as pd

In [11]:
df_to_predict = pd.read_csv('./modules/data/data.csv')

In [33]:
my_dict = df_to_predict.iloc[100:110][['title']].to_dict(orient='records')
dict_to_predict = [{k + '-prod': [v] for k, v in i.items()} for i in my_dict]

In [34]:
signature_name = "predict_raw"
model_name = "sentiment_analysis_tfx"
predicts = predict_json(GOOGLE_CLOUD_PROJECT, model_name, dict_to_predict, signature_name)

In [35]:
predicts

[{'probabilities': [0.0001798063749447465,
   0.9774225950241089,
   0.022397633641958237],
  'prediction_confidence': 0.9774225950241089,
  'label_key': 1},
 {'probabilities': [5.552982474910095e-05,
   0.9983223080635071,
   0.001622054260224104],
  'prediction_confidence': 0.9983223080635071,
  'label_key': 1},
 {'probabilities': [0.0006623921217396855,
   0.9977486729621887,
   0.0015888793859630823],
  'prediction_confidence': 0.9977486729621887,
  'label_key': 1},
 {'probabilities': [0.0020524056162685156,
   0.996985137462616,
   0.0009624735685065389],
  'prediction_confidence': 0.996985137462616,
  'label_key': 1},
 {'probabilities': [0.005938063841313124,
   0.9933116436004639,
   0.0007503399392589927],
  'prediction_confidence': 0.9933116436004639,
  'label_key': 1},
 {'probabilities': [0.00020802860672120005,
   0.9994733929634094,
   0.0003186532121617347],
  'prediction_confidence': 0.9994733929634094,
  'label_key': 1},
 {'probabilities': [6.33958843536675e-05,
   0.992

In [24]:
import create_view

In [25]:
create_view.create_view(GOOGLE_CLOUD_PROJECT, BQ_DATASET_NAME, BQ_TABLE_NAME, PIPELINE_NAME, VERSION_NAME)

View was created or replaced.


'\n    CREATE OR REPLACE VIEW data_validation.vw_sentiment_analysis_logs_v1665514300\n    AS\n    SELECT \n        model, \n        model_version, \n        time,\n     ARRAY(\n          SELECT AS STRUCT\n          JSON_EXTRACT(instances, \'$.title-prod[0]\') AS title, \n          CAST(JSON_EXTRACT(predictions, \'$.label_key\') AS FLOAT64) AS label_key, \r\n    CAST(JSON_EXTRACT(predictions, \'$.prediction_confidence\') AS FLOAT64) AS prediction_confidence\n          FROM \n          UNNEST(JSON_EXTRACT_ARRAY(raw_prediction, "$.predictions")\n          ) predictions WITH OFFSET AS f1 \n          JOIN\n          UNNEST(JSON_EXTRACT_ARRAY(raw_data, "$.instances")) instances WITH OFFSET AS f2\n          ON f1=f2\n      ) as request\n    FROM \n    tfx-cloud-project.data_validation.sentiment_analysis_logs\n    WHERE \n    model = "sentiment_analysis_tfx" AND\n    model_version = "v1665514300"\n    '