# 1. Install the Hugging Face hub library

This will use the model hosting on the Hugging Face portal

https://huggingface.co/docs/huggingface_hub/index

### If using local machine - run the following cell

In [None]:
# !pip install huggingface_hub

### On Google Collab - run the following cell

In [None]:
# !pip install transformers torch huggingface_hub -q

# 2. Create the Inference Client

Client will use the model hosted on the Hugging Face portal

**Class**

https://huggingface.co/docs/huggingface_hub/v0.20.2/en/package_reference/inference_client#huggingface_hub.InferenceClient

**Supported tasks**

https://huggingface.co/docs/huggingface_hub/guides/inference#supported-tasks

**NOTE:**

Sometimes API calls fail due to heavy usage of the model on HF. If you get a invocation error, try a again!!

In [10]:
from huggingface_hub import InferenceClient
import getpass

# You will prompted for the HuggingFace token
print("Copy/paste HuggingFace token and hit <enter>")
HUGGINGFACEHUB_API_TOKEN = getpass.getpass()

Copy/paste HuggingFace token and hit <enter>


 ········


In [12]:
# Change the model name if you would like to try out a different model
model_name = "distilbert-base-uncased-finetuned-sst-2-english"

# Create the client
client = InferenceClient(model=model_name, token=HUGGINGFACEHUB_API_TOKEN)


# 3. List deployed models

Returns a subset of models for the specified framework

https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.list_deployed_models

**Note:**

An invalid framework throws an HTTP error.

In [13]:
# For a specific framework
framework = "text-generation-inference"  # "text-to-speech", 
deployed_models = client.list_deployed_models([framework])
print(deployed_models)

## Get all the deploymed models
# deployed_models = client.list_deployed_models("all")
# print(deployed_models)


{'image-text-to-text': ['HuggingFaceM4/idefics2-8b', 'HuggingFaceM4/idefics2-8b-chatty'], 'text-generation': ['01-ai/Yi-1.5-34B-Chat', 'bigcode/octocoder', 'bigcode/santacoder', 'bigcode/starcoder', 'bigcode/starcoder2-15b', 'bigcode/starcoder2-3b', 'bigcode/starcoderplus', 'bigscience/bloom', 'codellama/CodeLlama-13b-hf', 'codellama/CodeLlama-34b-Instruct-hf', 'codellama/CodeLlama-7b-hf', 'CohereForAI/aya-23-35B', 'CohereForAI/c4ai-command-r-plus', 'deepseek-ai/DeepSeek-Coder-V2-Instruct', 'EleutherAI/gpt-neox-20b', 'google/gemma-1.1-2b-it', 'google/gemma-1.1-7b-it', 'google/gemma-2b', 'google/gemma-7b', 'HuggingFaceH4/starchat-beta', 'HuggingFaceH4/starchat2-15b-v0.1', 'HuggingFaceH4/zephyr-7b-alpha', 'HuggingFaceH4/zephyr-7b-beta', 'HuggingFaceM4/idefics-9b-instruct', 'kashif/stack-llama-2', 'llhf/Meta-Llama-3.1-70B-Instruct', 'llhf/Meta-Llama-3.1-8B-Instruct', 'meta-llama/Llama-2-13b-chat-hf', 'meta-llama/Llama-2-13b-hf', 'meta-llama/Llama-2-70b-chat-hf', 'meta-llama/Llama-2-7b-cha

# 4. Check if a specific model is available as endpoint

In [14]:
model_id = "distilbert-base-uncased-finetuned-sst-2-english"

client.get_model_status(model_id)

ModelStatus(loaded=False, state='Loadable', compute_type='cpu', framework='transformers')

# 5. Inference

In [15]:
%%time

text = "I loved the restaurant"

client.text_classification(text)

CPU times: total: 0 ns
Wall time: 101 ms


[TextClassificationOutputElement(label='POSITIVE', score=0.9998492002487183),
 TextClassificationOutputElement(label='NEGATIVE', score=0.0001507535926066339)]

In [None]:
%%time

text = "i hated it"

client.text_classification(text)