In [1]:
!pip install -qq transformers==4.27.1 onnxruntime==1.14.1 datarobot==3.1.0
from transformers import AutoTokenizer, TFBertForQuestionAnswering
import onnxruntime as ort
import numpy as np
from pprint import pprint

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.7/6.7 MB[0m [31m24.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.0/5.0 MB[0m [31m22.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m445.3/445.3 KB[0m [31m12.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.8/199.8 KB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m21.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.0/46.0 KB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.5/54.5 KB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.8/86.8 KB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
FOUNDATION_MODEL = "bert-large-uncased-whole-word-masking-finetuned-squad"

# Get BERT for question answering

In [4]:
tokenizer = AutoTokenizer.from_pretrained(FOUNDATION_MODEL)
BASE_PATH = "/content/blog"
tokenizer.save_pretrained(BASE_PATH)

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/443 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

('/content/blog/tokenizer_config.json',
 '/content/blog/special_tokens_map.json',
 '/content/blog/vocab.txt',
 '/content/blog/added_tokens.json',
 '/content/blog/tokenizer.json')

# Export the model as ONNX

In [5]:
!python -m transformers.onnx --model=$FOUNDATION_MODEL --feature=question-answering $BASE_PATH

2023-03-25 00:25:57.471893: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia
2023-03-25 00:25:57.472018: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/lib64-nvidia
Framework not requested. Using torch to export to ONNX.
Downloading pytorch_model.bin: 100% 1.34G/1.34G [00:05<00:00, 259MB/s]
Using framework PyTorch: 1.13.1+cu116
Overriding 1 configuration item(s)
	- use_cache -> False
Validating ONNX model...
	-[✓] ONNX model output names match reference model ({'start_logits', 'end_logits'})
	- Validating ONNX Model output "start_logits":
		-[✓] (3, 9) matches (3, 9)
		-[✓] all values close (at

# Deploy to DataRobot

## Assemble inference scripts & dependencies

In [6]:
%%writefile $BASE_PATH/custom.py
"""
Copyright 2021 DataRobot, Inc. and its affiliates.
All rights reserved.
This is proprietary source code of DataRobot, Inc. and its affiliates.
Released under the terms of DataRobot Tool and Utility Agreement.
"""
import json
import os
import io
from transformers import AutoTokenizer
import onnxruntime as ort
import numpy as np
import pandas as pd


def load_model(input_dir):
    global model_load_duration
    onnx_path = os.path.join(input_dir, "model.onnx")
    tokenizer_path = os.path.join(input_dir)
    tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
    sess = ort.InferenceSession(onnx_path)
    return sess, tokenizer


def _get_answer_in_text(output, input_ids, idx, tokenizer):
    answer_start = np.argmax(output[0], axis=1)[idx]
    answer_end = (np.argmax(output[1], axis=1) + 1)[idx]
    answer = tokenizer.convert_tokens_to_string(
        tokenizer.convert_ids_to_tokens(input_ids[answer_start:answer_end])
    )
    return answer


def score_unstructured(model, data, query, **kwargs):
    global model_load_duration
    sess, tokenizer = model

    # Assume batch input is sent with mimetype:"text/csv"
    # Treat as single prediction input if no mimetype is set
    is_batch = kwargs["mimetype"] == "text/csv"

    if is_batch:
        input_pd = pd.read_csv(io.StringIO(data), sep="|")
        input_pairs = list(zip(input_pd["context"], input_pd["question"]))

        inputs = tokenizer.batch_encode_plus(
            input_pairs, add_special_tokens=True, padding=True, return_tensors="np"
        )
        input_ids = inputs["input_ids"]
        output = sess.run(["start_logits", "end_logits"], input_feed=dict(inputs))
        responses = []
        for i, row in input_pd.iterrows():
            answer = _get_answer_in_text(output, input_ids[i], i, tokenizer)
            response = {
                "context": row["context"],
                "question": row["question"],
                "answer": answer,
            }
            responses.append(response)
        to_return = json.dumps(
            {
                "predictions": responses
            }
        )
    else:
        data_dict = json.loads(data)
        context, question = data_dict["context"], data_dict["question"]
        inputs = tokenizer(
            question,
            context,
            add_special_tokens=True,
            padding=True,
            return_tensors="np",
        )
        input_ids = inputs["input_ids"][0]
        output = sess.run(["start_logits", "end_logits"], input_feed=dict(inputs))
        answer = _get_answer_in_text(output, input_ids, 0, tokenizer)
        to_return = json.dumps(
            {
                "context": context,
                "question": question,
                "answer": answer
            }
        )

    return to_return

Writing /content/blog/custom.py


In [7]:
%%writefile $BASE_PATH/requirements.txt
transformers

Writing /content/blog/requirements.txt


## Upload using DataRobot Python API

In [8]:
def deploy_to_datarobot(folder_path, env_name, model_name, descr):
  API_TOKEN = "NjNlZjViYTM0ODI4Y2I2ZTZhOGIyZmZhOi1nY0t5c0VsWXhJdXJLN3BYZTlvdHoyc0h6NEtCLU1q"
  import datarobot as dr
  dr.Client(token=API_TOKEN, endpoint='https://app.datarobot.com/api/v2/')
  
  onnx_execution_env = dr.ExecutionEnvironment.list(search_for=env_name)[0]
  custom_model = dr.CustomInferenceModel.create(
      name=model_name,
      target_type=dr.TARGET_TYPE.UNSTRUCTURED,
      description=descr,
      language='python'
  )
  print(f"Creating custom model version on {onnx_execution_env}...")
  
  model_version = dr.CustomModelVersion.create_clean(
      custom_model_id=custom_model.id,
      base_environment_id=onnx_execution_env.id,
      folder_path=folder_path,
      maximum_memory=4096 * 1024 * 1024,
  )
  print(f"Created {model_version}.")

  versions = dr.CustomModelVersion.list(custom_model.id)
  sorted_versions = sorted(versions, key=lambda v: v.label)
  latest_version = sorted_versions[-1]

  print("Building the execution environment with dependency packages...")
  build_info = dr.CustomModelVersionDependencyBuild.start_build(
      custom_model_id=custom_model.id,
      custom_model_version_id=latest_version.id,
      max_wait=3600,
  )
  print(f"Environment build completed with {build_info.build_status}.")

  print("Creating model deployment...")
  default_prediction_server = dr.PredictionServer.list()[0]
  deployment = dr.Deployment.create_from_custom_model_version(latest_version.id,
                                                              label=model_name,
                                                              description=descr,
                                                              default_prediction_server_id=default_prediction_server.id,
                                                              max_wait=600,
                                                              importance=None)
  
  print(f"{deployment} is ready!")
  return deployment

In [9]:
"""
Usage:
    python datarobot-predict.py <input-file> [mimetype] [charset]

This example uses the requests library which you can install with:
    pip install requests
We highly recommend that you update SSL certificates with:
    pip install -U urllib3[secure] certifi
"""
import sys
import json
import requests

API_URL = 'https://mlops-dev.dynamic.orm.datarobot.com/predApi/v1.0/deployments/{deployment_id}/predictionsUnstructured'    # noqa
API_KEY = 'NjNlZjViYTM0ODI4Y2I2ZTZhOGIyZmZhOi1nY0t5c0VsWXhJdXJLN3BYZTlvdHoyc0h6NEtCLU1q'
DATAROBOT_KEY = '406748c1-3d69-84e0-a80e-0596b1e0991a'

# Don't change this. It is enforced server-side too.
MAX_PREDICTION_FILE_SIZE_BYTES = 52428800  # 50 MB


class DataRobotPredictionError(Exception):
    """Raised if there are issues getting predictions from DataRobot"""


def make_datarobot_deployment_unstructured_predictions(data, deployment_id, mimetype, charset):
    """
    Make unstructured predictions on data provided using DataRobot deployment_id provided.
    See docs for details:
         https://app.datarobot.com/docs/predictions/api/dr-predapi.html

    Parameters
    ----------
    data : bytes
        Bytes data read from provided file.
    deployment_id : str
        The ID of the deployment to make predictions with.
    mimetype : str
        Mimetype describing data being sent.
        If mimetype starts with 'text/' or equal to 'application/json',
        data will be decoded with provided or default(UTF-8) charset
        and passed into the 'score_unstructured' hook implemented in custom.py provided with the model.

        In case of other mimetype values data is treated as binary and passed without decoding.
    charset : str
        Charset should match the contents of the file, if file is text.

    Returns
    -------
    data : bytes
        Arbitrary data returned by unstructured model.


    Raises
    ------
    DataRobotPredictionError if there are issues getting predictions from DataRobot
    """
    # Set HTTP headers. The charset should match the contents of the file.
    headers = {
        'Content-Type': '{};charset={}'.format(mimetype, charset),
        'Authorization': 'Bearer {}'.format(API_KEY),
        'DataRobot-Key': DATAROBOT_KEY,
    }

    url = API_URL.format(deployment_id=deployment_id)

    # Make API request for predictions
    predictions_response = requests.post(
        url,
        data=data,
        headers=headers,
    )
    _raise_dataroboterror_for_status(predictions_response)
    # Return raw response content
    return predictions_response.content


def _raise_dataroboterror_for_status(response):
    """Raise DataRobotPredictionError if the request fails along with the response returned"""
    try:
        response.raise_for_status()
    except requests.exceptions.HTTPError:
        err_msg = '{code} Error: {msg}'.format(
            code=response.status_code, msg=response.text)
        raise DataRobotPredictionError(err_msg)


def datarobot_predict_file(filename, deployment_id, mimetype='text/csv', charset='utf-8'):
    """
    Return an exit code on script completion or error. Codes > 0 are errors to the shell.
    Also useful as a usage demonstration of
    `make_datarobot_deployment_unstructured_predictions(data, deployment_id, mimetype, charset)`
    """
    data = open(filename, 'rb').read()
    data_size = sys.getsizeof(data)
    if data_size >= MAX_PREDICTION_FILE_SIZE_BYTES:
        print((
                  'Input file is too large: {} bytes. '
                  'Max allowed size is: {} bytes.'
              ).format(data_size, MAX_PREDICTION_FILE_SIZE_BYTES))
        return 1
    try:
        predictions = make_datarobot_deployment_unstructured_predictions(data, deployment_id, mimetype, charset)
        return predictions
    except DataRobotPredictionError as exc:
        pprint(exc)
        return None

def datarobot_predict(input_dict, deployment_id, mimetype='application/json', charset='utf-8'):
    """
    Return an exit code on script completion or error. Codes > 0 are errors to the shell.
    Also useful as a usage demonstration of
    `make_datarobot_deployment_unstructured_predictions(data, deployment_id, mimetype, charset)`
    """
    data = json.dumps(input_dict).encode(charset)
    data_size = sys.getsizeof(data)
    if data_size >= MAX_PREDICTION_FILE_SIZE_BYTES:
        print((
                  'Input file is too large: {} bytes. '
                  'Max allowed size is: {} bytes.'
              ).format(data_size, MAX_PREDICTION_FILE_SIZE_BYTES))
        return 1
    try:
        predictions = make_datarobot_deployment_unstructured_predictions(data, deployment_id, mimetype, charset)
        return json.loads(predictions)['answer']
    except DataRobotPredictionError as exc:
        pprint(exc)
        return None

## Create deployment

In [10]:
deployment = deploy_to_datarobot(BASE_PATH, 
                                 "ONNX", 
                                 "bert-onnx-questionAnswering", 
                                 "Pretrained BERT model, fine-tuned on SQUAD for question answering")

Creating custom model version on ExecutionEnvironment('[DataRobot] Python 3.9 ONNX Drop-In')...
Created CustomModelVersion('v1.0').
Building the execution environment with dependency packages...
Environment build completed with success.
Creating model deployment...
Deployment(bert-onnx-questionAnswering) is ready!


# Use in production

In [11]:
test_input = {"context": "Healthcare tasks (e.g., patient care via disease treatment) and biomedical research (e.g., scientific discovery of new therapies) require expert knowledge that is limited and expensive. Foundation models present clear opportunities in these domains due to the abundance of data across many modalities (e.g., images, text, molecules) to train foundation models, as well as the value of improved sample efficiency in adaptation due to the cost of expert time and knowledge. Further, foundation models may allow for improved interface design (§2.5: interaction) for both healthcare providers and patients to interact with AI systems, and their generative capabilities suggest potential for open-ended research problems like drug discovery. Simultaneously, they come with clear risks (e.g., exacerbating historical biases in medical datasets and trials). To responsibly unlock this potential requires engaging deeply with the sociotechnical matters of data sources and privacy as well as model interpretability and explainability, alongside effective regulation of the use of foundation models for both healthcare and biomedicine.", "question": "Where can we use foundation models?"}
pprint(test_input)

{'context': 'Healthcare tasks (e.g., patient care via disease treatment) and '
            'biomedical research (e.g., scientific discovery of new therapies) '
            'require expert knowledge that is limited and expensive. '
            'Foundation models present clear opportunities in these domains '
            'due to the abundance of data across many modalities (e.g., '
            'images, text, molecules) to train foundation models, as well as '
            'the value of improved sample efficiency in adaptation due to the '
            'cost of expert time and knowledge. Further, foundation models may '
            'allow for improved interface design (§2.5: interaction) for both '
            'healthcare providers and patients to interact with AI systems, '
            'and their generative capabilities suggest potential for '
            'open-ended research problems like drug discovery. Simultaneously, '
            'they come with clear risks (e.g., exacerbating histori

In [12]:
datarobot_predict(test_input, deployment.id)

'both healthcare and biomedicine'