# Deploy as Web Service

There are various way to put a model in production, we'll see how to make it available as a web service hosted in cloud, leveraging **Microsoft Azure Machine Learning Services** and their **Azure Container Instances (ACI)**. 

The following code is based on the official Microsoft Azure Machine Learning documentation tutorial:  
https://docs.microsoft.com/en-us/azure/machine-learning/service/tutorial-deploy-models-with-aml

## Prepare Model for Production

In [None]:
from fastai.text import *

In [None]:
DATA_PATH = Path('../datasets/20news')
DATA_PATH.mkdir(exist_ok=True)

In [None]:
bs = 32

In [None]:
drop_mult = 0.5

In [None]:
data_clas = TextClasDataBunch.load(DATA_PATH, 'tmp_clas', bs=bs)

In [None]:
learn = text_classifier_learner(data_clas, drop_mult=drop_mult)
learn.load('final')

The following parameters should match the one used to train the model.  
*In this specific example* most of them are fastai default values, so we can get them from fastai library source code.  
In the specific, fastai.text learner internal implementation and related modules.

In [None]:
drop_mult=0.5
dps = default_dropout['classifier'] * drop_mult
bptt=70
emb_sz=400
nh=1150
nl=3
pad_token=1
qrnn=False
max_len=70*20
lin_ftrs = [50]
ps = [0.1]
vocab_size = len(data_clas.vocab.itos)
n_class = data_clas.c
layers = [emb_sz*3] + lin_ftrs + [n_class]
ps = [dps[4]] + ps

In [None]:
torch.save(
    { "model": learn.model.state_dict(), 
      "model_params": {
          "drop_mult": drop_mult,
          "dps": dps,
          "bptt": bptt,
          "emb_sz": emb_sz,
          "nh": nh,
          "nl": nl,
          "pad_token": pad_token,
          "qrnn": qrnn,
          "max_len": max_len,
          "lin_ftrs": lin_ftrs,
          "ps": ps,
          "vocab_size": vocab_size,
          "n_class": n_class,
          "layers": layers,
          "ps": ps},
      "vocab": data_clas.vocab.itos,
      "classes": data_clas.classes
    }, DATA_PATH/'models'/'final_for_prod.pth')

In fastai v1.0, there is a built-in way to perform a similar production export for supported learners.

In [None]:
learn.export()

In [None]:
learn = load_learner(path)

In [None]:
pred_class, pred_idx, outputs = learn.predict("text to predict")

## Scoring script

In [None]:
%%writefile ./score_cmd.py
from fastai.text import *
from html.parser import HTMLParser

class HTMLTextExtractor(html.parser.HTMLParser):
    def __init__(self):
        super(HTMLTextExtractor, self).__init__()
        self.result = [ ]

    def handle_data(self, d):
        self.result.append(d)

    def get_text(self):
        return ''.join(self.result)
    
    def error(self, message):
        return

def html_to_text(html):
    s = HTMLTextExtractor()    
    try:
        s.feed(html)
        return s.get_text()
    except:
        return html

def custom_tagstrip(x:str) -> str:
    "Remove all html tags in `x`."
    return html_to_text(x)

def load_model(classifier_filename):
    """Load the classifier and related metadata"""
    
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    
    state = torch.load(Path(classifier_filename).open('rb'), map_location=device)
    
    if set(state.keys()) == {'model', 'model_params', 'vocab', 'classes'}:
        model_state = state['model']
        model_params = state['model_params']
        itos = state['vocab']
        classes = state['classes']
    else:
        raise RuntimeError("Invalid model provided.")
            
    # Turn it into a string to int mapping (which is what we need)
    stoi = collections.defaultdict(lambda:0, {str(v):int(k) for k,v in enumerate(itos)})
    
    # Get model reference from parameters (even if they are not used at runtime)
    model = get_rnn_classifier(bptt=model_params['bptt'],
                               max_seq=model_params['max_len'],
                               vocab_sz=model_params['vocab_size'], 
                               emb_sz=model_params['emb_sz'],
                               n_hid=model_params['nh'],
                               n_layers=model_params['nl'],
                               pad_token=model_params['pad_token'],
                               layers=model_params['layers'],
                               drops=model_params['ps'],
                               input_p=model_params['dps'][0],
                               weight_p=model_params['dps'][1],
                               embed_p=model_params['dps'][2],
                               hidden_p=model_params['dps'][3],
                               qrnn=model_params['qrnn'])

    # Load the trained classifier
    model.load_state_dict(model_state)
    
    # Put the classifier into evaluation mode
    model.reset()
    model.eval()

    return stoi, classes, model

def predict_text(stoi, model, lang, text):
    """Do the actual prediction on the text using the model and mapping files passed"""

    # Predictions are done on arrays of input.
    # We only have a single input, so turn it into a 1x1 array
    texts = [text]

    # Tokenize using the fastai wrapper around spaCy
    pre_rules = [custom_tagstrip] + defaults.text_pre_rules
    tokens = Tokenizer(lang=lang, pre_rules=pre_rules, n_cpus=1).process_all(texts)

    # Turn into integers for each word
    encoded = np.array([[stoi[o] for o in p] for p in tokens], dtype=np.int64)
    
    # Turn this array into a tensor
    data = torch.from_numpy(encoded)

    # Do the predictions
    predictions = model(data)
    
    # Get class probability from classifier predictions
    res = F.softmax(predictions[0], -1).detach().cpu().numpy()
    
    return res[0]

def init():
    global stoi
    global classes
    global model
    
    # Retrieve the path to the model file using the model name
    model_path = "../datasets/20news/models/final_for_prod.pth"
    stoi, classes, model = load_model(model_path)

def run(raw_data):
    deser_obj = raw_data
    lang = deser_obj['lang']
    text = deser_obj['text']
    
    # Make prediction  
    scores = predict_text(stoi, model, lang, text)
    pred_class = np.argmax(scores)
    
    print(f"Class: {classes[pred_class]} ({scores[pred_class]})")
    
    # You can return any data type as long as it is JSON-serializable
    # We have to cast numpy data types (non-serializable) to standard types
    return { "label": classes[pred_class], "label_index": int(pred_class), "label_score": float(scores[pred_class]), "all_scores": scores.tolist() }

if __name__ == '__main__':
    init()
    run({"lang": sys.argv[1], "text": sys.argv[2]})

We can test it by launching from the command line:

`python score_cmd.py en "Example text to classify"`

## Setup Azure ML Workspace

In [None]:
import azureml
from azureml.core import Workspace, Run
from azureml.core.model import Model
from azureml.core.image import ContainerImage
from azureml.core.conda_dependencies import CondaDependencies 
from azureml.core.webservice import Webservice
from azureml.core.webservice import AciWebservice

In [None]:
print("Azure ML SDK Version: ", azureml.core.VERSION)

You can choose to setup your workspace directly from the Azure Portal, or running the code below.

In [None]:
azure_subscription_id = '35d018a1-fd64-4a56-91e9-75f463fbfd0d'
azure_resource_group  = 'ps-fastai-rg2'
azure_mlworkspace_name  = 'ps-fastai'

In [None]:
# Create Azure Machine Learning Workspace
ws = Workspace.create(name=azure_mlworkspace_name,
                      subscription_id=azure_subscription_id, 
                      resource_group=azure_resource_group,
                      create_resource_group=True,
                      location='westeurope' # Or other supported Azure region   
                     )

# Save the configuration file
ws.write_config()

In [None]:
ws = Workspace.from_config()

If you created the Workspace from the Azure Portal, you can get a reference to it by running the following cell:

In [None]:
try:
    ws = Workspace(subscription_id = azure_subscription_id, resource_group = azure_resource_group, workspace_name = azure_mlworkspace_name)
    ws.write_config()
    print('Library configuration succeeded')
except:
    print('Workspace not found')

In [None]:
ws.get_details()

### Connect to Workspace

In [None]:
ws = Workspace.from_config()
print(ws.name, ws.location, ws.resource_group, ws.location, sep = '\t')

### Register the Model

In [None]:
model_path = '../datasets/20news/models/final_for_prod.pth'
model_name = "ps-fastai-nlp-classification"

In [None]:
model = Model.register(model_path = model_path,
                       model_name = model_name,
                       tags = {"key": "0.1"},
                       description = "Pluralsight Fast.AI NLP Classification Model",
                       workspace = ws)

### Retrieve the Model

In [None]:
model = Model.list(ws, name=model_name)[0]

## Create Scoring Script for AML Services

In [None]:
%%writefile ./score.py
from fastai.text import *
from azureml.core.model import Model
from html.parser import HTMLParser

class HTMLTextExtractor(html.parser.HTMLParser):
    def __init__(self):
        super(HTMLTextExtractor, self).__init__()
        self.result = [ ]

    def handle_data(self, d):
        self.result.append(d)

    def get_text(self):
        return ''.join(self.result)
    
    def error(self, message):
        return

def html_to_text(html):
    s = HTMLTextExtractor()    
    try:
        s.feed(html)
        return s.get_text()
    except:
        return html

def custom_tagstrip(x:str) -> str:
    "Remove all html tags in `x`."
    return html_to_text(x)

def load_model(classifier_filename):
    """Load the classifier and related metadata"""
    
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    
    if torch.cuda.is_available():
        print('USING CUDA-GPU')
    else:
        print('USING CPU')
    
    state = torch.load(Path(classifier_filename).open('rb'), map_location=device)
    
    if set(state.keys()) == {'model', 'model_params', 'vocab', 'classes'}:
        model_state = state['model']
        model_params = state['model_params']
        itos = state['vocab']
        classes = state['classes']
    else:
        raise RuntimeError("Invalid model provided.")
        
    # Turn it into a string to int mapping (which is what we need)
    stoi = collections.defaultdict(lambda:0, {str(v):int(k) for k,v in enumerate(itos)})
    
    # Get model reference from parameters (even if they are not used at runtime)
    model = get_rnn_classifier(bptt=model_params['bptt'],
                               max_seq=model_params['max_len'],
                               vocab_sz=model_params['vocab_size'], 
                               emb_sz=model_params['emb_sz'],
                               n_hid=model_params['nh'],
                               n_layers=model_params['nl'],
                               pad_token=model_params['pad_token'],
                               layers=model_params['layers'],
                               drops=model_params['ps'],
                               input_p=model_params['dps'][0],
                               weight_p=model_params['dps'][1],
                               embed_p=model_params['dps'][2],
                               hidden_p=model_params['dps'][3],
                               qrnn=model_params['qrnn'])

    # Load the trained classifier
    model.load_state_dict(model_state)
    
    # Put the classifier into evaluation mode
    model.reset()
    model.eval()

    return stoi, classes, model

def predict_text(stoi, model, lang, text):
    """Do the actual prediction on the text using the model and mapping files passed"""

    # Predictions are done on arrays of input.
    # We only have a single input, so turn it into a 1x1 array
    texts = [text]

    # Tokenize using the fastai wrapper around spaCy
    pre_rules = [custom_tagstrip] + defaults.text_pre_rules
    tokens = Tokenizer(lang=lang, pre_rules=pre_rules, n_cpus=1).process_all(texts)

    # Turn into integers for each word
    encoded = np.array([[stoi[o] for o in p] for p in tokens], dtype=np.int64)
    
    # Turn this array into a tensor
    data = torch.from_numpy(encoded)

    # Do the predictions
    predictions = model(data)
    
    # Get class probability from classifier predictions
    res = F.softmax(predictions[0], -1).detach().cpu().numpy()
    
    return res[0]

def init():
    global stoi
    global classes
    global model
    
    # Retrieve the path to the model file using the model name
    model_path = Model.get_model_path(model_name='ps-fastai-nlp-classification')
    stoi, classes, model = load_model(model_path)

def run(raw_data):
    deser_obj = json.loads(raw_data)
    
    if not set(deser_obj.keys()) == {'lang', 'text' }:
        return { "error": "invalid data" }
    
    lang = deser_obj['lang']
    text = deser_obj['text']
    
    # Make prediction  
    scores = predict_text(stoi, model, lang, text)
    pred_class = np.argmax(scores)
    
    # You can return any data type as long as it is JSON-serializable
    # We have to cast numpy data types (non-serializable) to standard types
    return { "label": classes[pred_class], "label_index": int(pred_class), "label_score": float(scores[pred_class]), "all_scores": scores.tolist() }

## Create Environment Files

In [None]:
myenv = CondaDependencies()
myenv.set_python_version("3.6.6")
myenv.add_pip_package("torch==1.0.0")
#myenv.add_pip_package("https://download.pytorch.org/whl/cu100/torch-1.0.0-cp36-cp36m-linux_x86_64.whl")
myenv.add_pip_package("torchvision==0.2.1")
myenv.add_pip_package("fastai==1.0.42")

with open("myenv.yml","w") as f:
    f.write(myenv.serialize_to_string())

In [None]:
%%writefile ./Dockerfile
ARG buildtime_scoring_var=30000
ENV SCORING_TIMEOUT_MS=$buildtime_scoring_var
RUN apt-get -y update && apt-get install -y gcc

 ## Create an Image Configuration

For details, see: https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.core.image.containerimage?view=azure-ml-py

In [None]:
image_config = ContainerImage.image_configuration(execution_script = "score.py",
                                                  runtime = "python",
                                                  conda_file = "myenv.yml",
                                                  docker_file="Dockerfile",
                                                  enable_gpu=False,
                                                  description = "Image with Fast.AI NLP classification model",
                                                  tags = {"data": "20newsgroups", "type": "classification"}
                                                 )

## Create the Image

In [None]:
%%time
image = ContainerImage.create(name = "myimage", 
                              models = [model],
                              image_config = image_config,
                              workspace = ws
                              )
image.wait_for_creation(show_output=True)

In [None]:
print(image.image_build_log_uri)

In [None]:
ws.images

In [None]:
print(ws.images['myimage:1'].image_build_log_uri)

## Deploy the image in ACI

In [None]:
aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1, 
                                               memory_gb = 1, 
                                               tags = {"data": "20newsgroups", "type": "classification"}, 
                                               description = 'fastai NLP Classification')

In [None]:
image = ws.images["myimage"]

In [None]:
image = ContainerImage(ws, id="myimage:1")

In [None]:
print(image)

In [None]:
%%time
service_name = 'aci-fastai-1'
service = Webservice.deploy_from_image(deployment_config = aciconfig,
                                            image = image,
                                            name = service_name,
                                            workspace = ws)
service.wait_for_deployment(show_output = True)
print(service.state)

In [None]:
print(service.scoring_uri)

### Troubleshooting

In [None]:
log = service.get_logs()

In [None]:
log

In [None]:
log.rstrip().split('\n')

In [None]:
print(image.image_location)

`az container logs --resource-group <resource-group> --name <containergroup> --container-name <container>`

https://docs.microsoft.com/en-us/azure/container-instances/container-instances-get-logs

https://docs.microsoft.com/en-us/azure/container-instances/container-instances-troubleshooting