# REST Inference

## Setup

Verify that following variable settings match your deployed model's resource name and rest URL. The following code assumes that the kube service is in the same namespace, but you could refer to it in full with the namespace.

In [None]:
# Upgrade pip and install all the required libraries
! pip install --upgrade pip
! pip install torch==2.3.0+cpu accelerate -f https://download.pytorch.org/whl/torch_stable.html
! pip install tensorflow transformers numpy

After that let's define some variables useful for deploying our ml model:


In [None]:
deployed_model_name = "itsmticketsovir"
rest_url = "http://modelmesh-serving:8008"
infer_url = f"{rest_url}/v2/models/{deployed_model_name}/infer"

## Request Function

Create a function and all the stuff needed to serialize data and submit the REST request:

In [None]:
import requests

from transformers import DistilBertTokenizer

import tensorflow as tf
tf.experimental.numpy.experimental_enable_numpy_behavior()

import numpy as np
tokenizer = DistilBertTokenizer.from_pretrained('alezzandro/itsm_tickets')


def rest_request(text):
    encoded_input = tokenizer(text, return_tensors='tf')
    json_data = {
        "inputs": [
            {
                "name": "input_ids",
                "shape": encoded_input['input_ids'].shape.as_list(),
                "datatype": "INT64",
                "data": encoded_input['input_ids'].tolist()
            },
            {
                "name": "attention_mask",
                "shape": encoded_input['attention_mask'].shape.as_list(),
                "datatype": "INT64",
                "data": encoded_input['attention_mask'].tolist()
            }
        ]
    }

    response = requests.post(infer_url, json=json_data)
    response_dict = response.json()
    return response_dict['outputs'][0]['data']

Now we can test the deployed model:

In [None]:
text = "Web server logs indicate multiple 404 Not Found errors for resources that should exist.  File paths appear correct in the codebase. Need to investigate potential caching issues, configuration mismatches, or incorrect deployments."
prediction = rest_request(text)
prediction

Finally we can map the prediction with the respective Category:

In [None]:
def get_max_value_position(arr):
    max_value = max(arr)
    max_index = arr.index(max_value)
    return max_index

index = get_max_value_position(prediction)

id2label = {0: "WebServer", 1: "Database", 2: "Filesystem"}

print("The ticket type is: "+id2label[index])
