## Connect to workspace

In [None]:
from azureml.core import Workspace

ws = Workspace.from_config()
print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

## Create score script

- init() function is executed upon deployment
- run() function is executed on each service call

In [None]:
%%writefile score.py
import os
import json
import tensorflow as tf
from transformers import BertTokenizer
import logging
logging.getLogger("transformers.tokenization_utils").setLevel(logging.ERROR)

max_seq_length = 128

def init():
    global tokenizer
    tokenizer = BertTokenizer.from_pretrained('bert-base-cased')

def run(raw_data):
    
    # Encode inputs using tokenizer
    inputs = tokenizer.encode_plus(
        json.loads(raw_data)['text'],
        add_special_tokens=True,
        max_length=128
    )
    input_ids, token_type_ids = inputs["input_ids"], inputs["token_type_ids"]

    # The mask has 1 for real tokens and 0 for padding tokens.
    # Only real tokens are attended to.
    attention_mask = [1] * len(input_ids)

    # Zero-pad up to the sequence length.
    padding_length = max_seq_length - len(input_ids)
    input_ids = input_ids + ([0] * padding_length)
    attention_mask = attention_mask + ([0] * padding_length)
    token_type_ids = token_type_ids + ([0] * padding_length)

    result = {
        'input_ids': [input_ids],
        'attention_mask': [attention_mask],
        'token_type_ids': [token_type_ids]
    }

    return result

## Specify dependencies and inference config

In [None]:
from azureml.core import Environment
from azureml.core.conda_dependencies import CondaDependencies 

myenv = CondaDependencies.create(conda_packages=['numpy','pandas'],
                                 pip_packages=['inference-schema[numpy-support]', 'azureml-defaults', 'tensorflow==2.0.0', 'transformers==2.0.0'])

with open("myenv.yml","w") as f:
    f.write(myenv.serialize_to_string())

In [None]:
from azureml.core.model import InferenceConfig

inference_config = InferenceConfig(source_directory="./",
                                   runtime= "python", 
                                   entry_script="score.py",
                                   conda_file="myenv.yml"
                                  )

## Deploy ACI Service
(takes about 10 min)

In [None]:
from azureml.core.model import Model
from azureml.core.webservice import Webservice
from azureml.exceptions import WebserviceException
from azureml.core.webservice import AciWebservice, Webservice

aci_config = AciWebservice.deploy_configuration(cpu_cores=2, 
                                               memory_gb=4, 
                                               tags={"model": "BERT encoder",  "method" : "tensorflow"}, 
                                               description='Encodes text using bert tokenizer')

aci_service_name = 'bert-encoder-aciservice'

try:
    # if you want to get existing service below is the command
    # since aci name needs to be unique in subscription deleting existing aci if any
    # we use aci_service_name to create azure ac
    aci_service = Webservice(ws, name=aci_service_name)
    if aci_service:
        aci_service.delete()
except WebserviceException as e:
    print()

aci_service = Model.deploy(ws, aci_service_name, [], inference_config, aci_config)

aci_service.wait_for_deployment(True)
print(aci_service.state)

## Test Service

In [None]:
import json
raw_data = json.dumps({
    'text': 'Hello from BERT World'
})

result = aci_service.run(input_data=raw_data)
print(result)

## Test service using HTTP

In [None]:
print(aci_service.scoring_uri)

In [None]:
import requests
query = 'Hello from BERT World'
input_data = '{\"text\": \"'+ query +'\"}'
headers = {'Content-Type':'application/json'}
response = requests.post(aci_service.scoring_uri, input_data, headers=headers)

print(response.text)