In [1]:
import cv2
import pickle
import tempfile
import chassisml
import numpy as np
import getpass
import torch
from torch.nn import functional as F
from shutil import rmtree
import json
import onnx
from onnx import backend
from onnx import numpy_helper
import onnxruntime as ort
from transformers import GPT2Model, GPT2LMHeadModel, GPT2Tokenizer

## Enter credentials
Dockerhub creds and Modzy API Key

In [2]:
dockerhub_user = getpass.getpass('docker hub username')
dockerhub_pass = getpass.getpass('docker hub password')
modzy_api_key = getpass.getpass('modzy api key')

docker hub username········
docker hub password········
modzy api key········


## Load ONNX Model and Test Locally
This model was downloaded from the [ONNX Model Zoo](https://github.com/onnx/models/tree/master/text/machine_comprehension/gpt-2), which contains several pre-trained models saved in the ONNX open standard format.

In [3]:
# check gpt-2 and gpt-2 head models are valid onnx models files
model = onnx.load("models/model.onnx")
head_model = onnx.load("models/head_model.onnx")

# check onnx file is valid model
onnx.checker.check_model(model)
onnx.checker.check_model(head_model)

# load input tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

In [4]:
# WORKING FUNTIONS NEEDED
def flatten(inputs):
    return [[flatten(i) for i in inputs] if isinstance(inputs, (list, tuple)) else inputs]
def to_numpy(x):
    if type(x) is not np.ndarray:
        x = x.detach().cpu().numpy().astype(np.int64) if x.requires_grad else x.cpu().numpy().astype(np.int64)
    return x

In [10]:
batch_size = 1
length = 10

text = "this is a test"
tokens = np.array(tokenizer.encode(text, add_special_tokens=True))
tensors = torch.tensor([[tokens]])

prev = tensors
output = tensors

for i in range(length):
    session = ort.InferenceSession("models/head_model.onnx")
    ort_inputs = dict((session.get_inputs()[i].name, to_numpy(input)) for i, input in enumerate(flatten(prev)))
    outputs = session.run(None, ort_inputs)
    logits = torch.from_numpy(outputs[0])
    logits = logits[:, -1, :]
    log_probs = F.softmax(logits, dim=-1)
    _, prev = torch.topk(log_probs, k=1, dim=-1)
#     print(output.shape, prev.shape)
    prev = torch.reshape(prev, (1, prev.shape[2], prev.shape[1]))
#     print(output.shape, prev.shape)
#     sdfadsf
    output = torch.cat((output, prev), dim=1)
#     print(output.shape)

In [None]:
output = output[:, len(tokens):].tolist()
generated = 0
for i in range(batch_size):
    generated += 1
    text = tokenizer.decode(output[i])
    print(text)    

In [None]:
batch_size = 1
length = 10

text = "Here is some text to encode : Hello World!"
tokens = np.array(tokenizer.encode(text))
context = torch.tensor(tokens, device=device, dtype=torch.long).unsqueeze(0).repeat(batch_size, 1)
prev = context
output = context

for i in range(length):
    outputs = model(prev)
    logits = outputs[0]
    logits = logits[:, -1, :]
    log_probs = F.softmax(logits, dim=-1)
    _, prev = torch.topk(log_probs, k=1, dim=-1)
    output = torch.cat((output, prev), dim=1)

output = output[:, len(tokens):].tolist()
generated = 0
for i in range(batch_size):
    generated += 1
    text = tokenizer.decode(output[i])
    print(text)

In [None]:
output = output[:, len(tokens):].tolist()
generated = 0
for i in range(batch_size):
    generated += 1
    text = tokenizer.decode(output[i])
    print(text)

In [112]:
text = "this is a test"
tokens = np.array(tokenizer.encode(text, add_special_tokens=True))
tensors = torch.tensor([[tokens]])

prev = tensors
output = tensors

for i in range(length):
    session = ort.InferenceSession("models/head_model.onnx")
    ort_inputs = dict((session.get_inputs()[i].name, to_numpy(input)) for i, input in enumerate(flatten(prev)))
    outputs = session.run(None, ort_inputs)
    logits = torch.from_numpy(outputs[0])
    logits = logits[:, -1, :]
    log_probs = F.softmax(logits, dim=-1)
    _, prev = torch.topk(log_probs, k=1, dim=-1)
#     print(output.shape, prev.shape)
    prev = torch.reshape(prev, (1, prev.shape[2], prev.shape[1]))
    output = torch.cat((output, prev), dim=1)

In [113]:
output = output[:, len(tokens):].tolist()
generated = 0
for i in range(batch_size):
    generated += 1
    text = tokenizer.decode(output[i])
    print(text)

TypeError: int() argument must be a string, a bytes-like object or a number, not 'list'

In [111]:
generated = 0
for i in range(batch_size):
    generated += 1
#     print(output[i])
    text = tokenizer.decode(output[i])
    print(text)

TypeError: int() argument must be a string, a bytes-like object or a number, not 'list'

In [104]:
print(tensors.shape) 
print(tokens.shape)

torch.Size([1, 1, 4])
(4,)


In [101]:
text = 'this is a test'
tokens = np.array(gpt2_tokenizer.encode(text))
tensors = torch.tensor(tokens, device="cpu", dtype=torch.long).unsqueeze(0)#.repeat(batch_size, 1)
tensors = torch.reshape(tensors, (1, tensors.shape[0], tensors.shape[1]))

session = ort.InferenceSession("models/head_model.onnx")
ort_inputs = dict((session.get_inputs()[i].name, to_numpy(input)) for i, input in enumerate(flatten(tensors)))
outputs = session.run(None, ort_inputs)

In [94]:
tensors.shape

torch.Size([1, 4])

In [None]:
# trying post processing
batch_size = 1
length = 10
text = 'ONNX Models are great. Chassisml makes it so easy to deploy it!'
tokens = np.array(gpt2_tokenizer.encode(text))
tensors = torch.tensor(tokens, device="cpu", dtype=torch.long).unsqueeze(0).repeat(batch_size, 1)
tensors = torch.reshape(tensors, (1, tensors.shape[0], tensors.shape[1]))
prev = tensors
output = tensors

# tensors = torch.tensor([[tokens]])
# tensors = torch.tensor([
#     [tokenizer.encode(text, add_special_tokens=True)]
# ])

# prev = tensors
# # output = torch.reshape(tensors, (1, tensors.shape[2], tensors.shape[1]))
# output = tensors
print(tokens.shape, tensors.shape)

for i in range(length):
    session = ort.InferenceSession("models/head_model.onnx")
    ort_inputs = dict((session.get_inputs()[i].name, to_numpy(input)) for i, input in enumerate(flatten(prev)))
    outputs = session.run(None, ort_inputs)
    logits = torch.from_numpy(outputs[0])
    logits = logits[:, -1, :]
    log_probs = F.softmax(logits, dim=-1)
    _, prev = torch.topk(log_probs, k=1, dim=-1)
    print(output.shape, prev.shape)
    prev = torch.reshape(prev, (1, prev.shape[2], prev.shape[1]))
    output = torch.cat((output, prev), dim=1)

print(output, output.shape)
output = output[:, len(tokens):].tolist()

In [88]:
# trying post processing - not working correctly 
batch_size = 1
length = 10
text = 'ONNX Models are great. Chassisml makes it so easy to deploy it!'
tokens = np.array(gpt2_tokenizer.encode(text))
tensors = torch.tensor(tokens, device="cpu", dtype=torch.long).unsqueeze(0).repeat(batch_size, 1)
tensors = torch.reshape(tensors, (1, tensors.shape[0], tensors.shape[1]))
prev = tensors
output = tensors

# tensors = torch.tensor([[tokens]])
# tensors = torch.tensor([
#     [tokenizer.encode(text, add_special_tokens=True)]
# ])

# prev = tensors
# # output = torch.reshape(tensors, (1, tensors.shape[2], tensors.shape[1]))
# output = tensors
print(tokens.shape, tensors.shape)

for i in range(length):
    session = ort.InferenceSession("models/head_model.onnx")
    ort_inputs = dict((session.get_inputs()[i].name, to_numpy(input)) for i, input in enumerate(flatten(prev)))
    outputs = session.run(None, ort_inputs)
    logits = torch.from_numpy(outputs[0])
    logits = logits[:, -1, :]
    log_probs = F.softmax(logits, dim=-1)
    _, prev = torch.topk(log_probs, k=1, dim=-1)
    print(output.shape, prev.shape)
    prev = torch.reshape(prev, (1, prev.shape[2], prev.shape[1]))
    output = torch.cat((output, prev), dim=1)

print(output, output.shape)
output = output[:, len(tokens):].tolist()

(19,) torch.Size([1, 1, 19])
torch.Size([1, 1, 19]) torch.Size([1, 19, 1])
torch.Size([1, 2, 19]) torch.Size([1, 19, 1])
torch.Size([1, 3, 19]) torch.Size([1, 19, 1])
torch.Size([1, 4, 19]) torch.Size([1, 19, 1])
torch.Size([1, 5, 19]) torch.Size([1, 19, 1])
torch.Size([1, 6, 19]) torch.Size([1, 19, 1])
torch.Size([1, 7, 19]) torch.Size([1, 19, 1])
torch.Size([1, 8, 19]) torch.Size([1, 19, 1])
torch.Size([1, 9, 19]) torch.Size([1, 19, 1])
torch.Size([1, 10, 19]) torch.Size([1, 19, 1])
tensor([[[ 1340,    45,    55, 32329,   389,  1049,    13,   609,   562,  1042,
             75,  1838,   340,   523,  2562,   284,  6061,   340,     0],
         [   13,  4877,    12,   198,  1695,   329,   314,  1817, 21080,   318,
            318,   606,  2562,   345,   284,   651,   290,    13,   198],
         [  198,    13,    16,   198,   287,  4321,  2640,    11,    11,   257,
            257,  1512,   284,   460,   779,   340,   484,   198,   198],
         [  198,   198,    13,   198,   262,  42

In [89]:
output

[[]]

In [78]:
output_text = []
batch_size=1
generated = 0
for i in range(batch_size):
    generated += 1
    print(output[i])
    text = tokenizer.decode(output[i])
    output_text.append(text)

[]


In [70]:
output

[[[13],
  [4877],
  [12],
  [198],
  [1695],
  [329],
  [314],
  [1817],
  [21080],
  [318],
  [318],
  [606],
  [2562],
  [345],
  [284],
  [651],
  [290],
  [13],
  [198],
  [13],
  [4877],
  [12],
  [198],
  [1695],
  [329],
  [314],
  [1817],
  [21080],
  [318],
  [318],
  [606],
  [2562],
  [345],
  [284],
  [651],
  [290],
  [13],
  [198],
  [13],
  [4877],
  [12],
  [198],
  [1695],
  [329],
  [314],
  [1817],
  [21080],
  [318],
  [318],
  [606],
  [2562],
  [345],
  [284],
  [651],
  [290],
  [13],
  [198],
  [13],
  [4877],
  [12],
  [198],
  [1695],
  [329],
  [314],
  [1817],
  [21080],
  [318],
  [318],
  [606],
  [2562],
  [345],
  [284],
  [651],
  [290],
  [13],
  [198],
  [13],
  [4877],
  [12],
  [198],
  [1695],
  [329],
  [314],
  [1817],
  [21080],
  [318],
  [318],
  [606],
  [2562],
  [345],
  [284],
  [651],
  [290],
  [13],
  [198],
  [13],
  [4877],
  [12],
  [198],
  [1695],
  [329],
  [314],
  [1817],
  [21080],
  [318],
  [318],
  [606],
  [2562],
  [345],


In [29]:
# This will be passed to Chassis:
context = {
    "model": head_model,
    "tokenizer": tokenizer,
    "utilities": {
        "flatten": flatten,
        "to_numpy": to_numpy
    }
}

## Write process function

* Must take bytes and context dict as input
* Preprocess bytes, run inference, postprocess model output, return results

In [30]:
def process(input_bytes,context):
    length = 10
    # save model to filepath for inference
    tmp_dir = tempfile.mkdtemp()
    import onnx
    onnx.save(context["model"], "{}/model.onnx".format(tmp_dir))
    
    text = input_bytes.decode()
    tokens = np.array(context["tokenizer"].encode(text, add_special_tokens=False))
    tokens = np.array(gpt2_tokenizer.encode(text, add_special_tokens=False))
    tensors = torch.tensor([[tokens]])

    prev = context
    output = context

    for i in range(length):
        session = ort.InferenceSession("{}/model.onnx".format(tmp_dir))
        ort_inputs = dict((session.get_inputs()[i].name, context["utilities"]["to_numpy"](input)) for i, input in enumerate(context["flatten"](tensor_inputs)))
        outputs = session.run(None, ort_inputs)
        logits = torch.from_numpy(outputs[0])
        logits = logits[:, -1, :]
        log_probs = F.softmax(logits, dim=-1)
        _, prev = torch.topk(log_probs, k=1, dim=-1)
        prev = torch.reshape(prev, (prev.shape[0], prev.shape[1]))
        output = torch.cat((output, prev), dim=1)

    output = output[:, len(tokens):].tolist()
    generated = 0
    output_text = []
    for i in range(1):
        generated += 1
        text = tokenizer.decode(output[i])
        output_text.append(text)
    
    # format results
    structured_result = {
        "data": {
            "result": {"nextWordPredictions": [{"word_{}".format(i): text_pred} for i, text_pred in enumerate(output_text)]}
        }
    }
    
    # remove temp directory
    rmtree(tmp_dir)
    return structured_result

## Initialize Chassis Client
We'll use this to interact with the Chassis service

In [31]:
chassis_client = chassisml.ChassisClient("http://localhost:5000")

## Create and test Chassis model
* Requires `context` dict containing all variables which should be loaded once and persist across inferences
* Requires `process_fn` defined above

In [32]:
# create Chassis model
chassis_model = chassis_client.create_model(context=context,process_fn=process)

# test Chassis model locally (can pass filepath, bufferedreader, bytes, or text here):
sample_filepath = 'data/sample_text.txt'
results = chassis_model.test(sample_filepath)
print(results)

IndexError: too many indices for tensor of dimension 2

In [None]:
# test environment and model within Chassis service, must pass filepath here:

# dry run before build
test_env_result = chassis_model.test_env(sample_filepath)
print(test_env_result)

## Publish model to Modzy
Need to provide model name, model version, Dockerhub credentials, and required Modzy info

In [None]:
response = chassis_model.publish(
    model_name="ONNX MobileNet Image Classification",
    model_version="0.0.1",
    registry_user=dockerhub_user,
    registry_pass=dockerhub_pass,
    modzy_sample_input_path=sample_filepath,
    modzy_api_key=modzy_api_key
)

job_id = response.get('job_id')
final_status = chassis_client.block_until_complete(job_id)

In [None]:
if chassis_client.get_job_status(job_id)["result"] is not None:
    print("New model URL: {}".format(chassis_client.get_job_status(job_id)["result"]["container_url"]))
else:
    print("Chassis job failed \n\n {}".format(chassis_client.get_job_status(job_id)))

## Run sample job using Modzy SDK
Submit inference job to our newly-deploy model running on Modzy

In [None]:
from modzy import ApiClient

client = ApiClient(base_url='https://integration.modzy.engineering/api', api_key=modzy_api_key)

input_name = final_status['result']['inputs'][0]['name']
model_id = final_status['result'].get("model").get("modelId")
model_version = final_status['result'].get("version")

inference_job = client.jobs.submit_file(model_id, model_version, {input_name: sample_filepath})
inference_job_result = client.results.block_until_complete(inference_job, timeout=None)
inference_job_results_json = inference_job_result.get_first_outputs()['results.json']
print(inference_job_results_json)