# 1. Install the Hugging Face hub library

This will use the model hosting on the Hugging Face portal

https://huggingface.co/docs/huggingface_hub/index

In [10]:
!pip install transformers torch huggingface_hub -q

# 2. Create the Inference Client

Client will use the model hosted on the Hugging Face portal

**Class**

https://huggingface.co/docs/huggingface_hub/v0.20.2/en/package_reference/inference_client#huggingface_hub.InferenceClient

**Supported tasks**

https://huggingface.co/docs/huggingface_hub/guides/inference#supported-tasks

**NOTE:**

Sometimes API calls fail due to heavy usage of the model on HF. If you get a invocation error, try a again!!

In [53]:
from huggingface_hub import InferenceClient

In [12]:

# REPLACE THE KEY with your own key
HUGGINGFACEHUB_API_TOKEN = "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"

model_name = "distilbert-base-uncased-finetuned-sst-2-english"

client = InferenceClient(model=model_name, token=HUGGINGFACEHUB_API_TOKEN)


# 3. List deployed models

Returns a subset of models for the specified framework

https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.list_deployed_models

**Note:**

An invalid framework throws an HTTP error.

In [61]:
# For a specific framework
framework = "text-generation-inference"  # "text-to-speech", 
deployed_models = client.list_deployed_models([framework])
print(deployed_models)

## Get all the deploymed models
# deployed_models = client.list_deployed_models("all")
# print(deployed_models)


{'text2text-generation': ['google/flan-t5-xxl', 'google/flan-ul2'], 'text-generation': ['bigcode/starcoder', 'bigscience/bloom', 'codellama/CodeLlama-13b-hf', 'codellama/CodeLlama-34b-Instruct-hf', 'HuggingFaceH4/starchat-beta', 'HuggingFaceH4/zephyr-7b-alpha', 'HuggingFaceH4/zephyr-7b-beta', 'HuggingFaceM4/idefics-80b-instruct', 'mistralai/Mistral-7B-Instruct-v0.1', 'mistralai/Mistral-7B-Instruct-v0.2', 'mistralai/Mistral-7B-v0.1', 'openchat/openchat-3.5-0106', 'TheBloke/vicuna-7B-v1.5-GPTQ', 'tiiuae/falcon-7b-instruct']}


# 4. Check if a specific model is available as endpoint

In [55]:
model_id = "distilbert-base-uncased-finetuned-sst-2-english"

client.get_model_status(model_id)

ModelStatus(loaded=False, state='Loadable', compute_type='cpu', framework='transformers')

# 5. Inference

In [56]:
%%time

text = "I loved the restaurant"

client.text_classification(text)

CPU times: total: 31.2 ms
Wall time: 74.4 ms


[{'label': 'POSITIVE', 'score': 0.9998492002487183},
 {'label': 'NEGATIVE', 'score': 0.00015075372357387096}]

In [57]:
%%time

text = "i hated it"

client.text_classification(text)

CPU times: total: 0 ns
Wall time: 61.8 ms


[{'label': 'NEGATIVE', 'score': 0.9996846914291382},
 {'label': 'POSITIVE', 'score': 0.00031535723246634007}]