In [1]:
#The main package that contains functions to use Hugging Face
import transformers

#Set to avoid warning messages.
transformers.logging.set_verbosity_error()

## 05.02. Reviewing the Pipeline

Use the pipeline registry to look at available pipeline tasks and also explore a specific pipeline

In [2]:
from transformers.pipelines import PIPELINE_REGISTRY

#Get the list of tasks that are supported by Huggingface pipeline
print(PIPELINE_REGISTRY.get_supported_tasks())


2023-11-02 14:40:51.673841: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


['audio-classification', 'automatic-speech-recognition', 'conversational', 'depth-estimation', 'document-question-answering', 'feature-extraction', 'fill-mask', 'image-classification', 'image-segmentation', 'image-to-image', 'image-to-text', 'mask-generation', 'ner', 'object-detection', 'question-answering', 'sentiment-analysis', 'summarization', 'table-question-answering', 'text-classification', 'text-generation', 'text-to-audio', 'text-to-speech', 'text2text-generation', 'token-classification', 'translation', 'video-classification', 'visual-question-answering', 'vqa', 'zero-shot-audio-classification', 'zero-shot-classification', 'zero-shot-image-classification', 'zero-shot-object-detection']


In [3]:
#Get information about a specific task
print("\nDefault Model for Sentiment Analysis: ")
print(PIPELINE_REGISTRY.check_task('sentiment-analysis')[1].get('default'))


Default Model for Sentiment Analysis: 
{'model': {'pt': ('distilbert-base-uncased-finetuned-sst-2-english', 'af0f99b'), 'tf': ('distilbert-base-uncased-finetuned-sst-2-english', 'af0f99b')}}


## 05.03 Loading a Pipeline

In [4]:
from transformers import pipeline
import os

#Load a pipeline. This will download the model checkpoint from huggingface and cache it 
#locally on disk. If model is already available in cache, it will simply use the cached version
#Download will usually take a long time, depending on network bandwidth

sentiment_classifier = pipeline("sentiment-analysis")

#Cache usually available at : <<user-home>>.cache\huggingface\hub

cache_dir = os.path.expanduser('~') + "/.cache/huggingface/hub"
print("Huggingface Cache directory is : ", cache_dir)

#Contents of cache directory
os.listdir(cache_dir)

Downloading (…)lve/main/config.json: 100%|██████████| 629/629 [00:00<00:00, 41.2kB/s]
Downloading model.safetensors: 100%|██████████| 268M/268M [00:11<00:00, 24.3MB/s] 
Downloading (…)okenizer_config.json: 100%|██████████| 48.0/48.0 [00:00<00:00, 7.18kB/s]
Downloading (…)solve/main/vocab.txt: 100%|██████████| 232k/232k [00:00<00:00, 18.6MB/s]


Huggingface Cache directory is :  /Users/hu3745fi/.cache/huggingface/hub


['models--distilbert-base-uncased-finetuned-sst-2-english', 'version.txt']

## 05.04. Predicting Sentiment with Default Model

In [8]:
#Predict sentiment using the pipeline
sentiment_results=sentiment_classifier("This is a fantastic course")
print(sentiment_results)


[{'label': 'POSITIVE', 'score': 0.9998838901519775}]


In [9]:
#A second example
sentiment_results=sentiment_classifier("The download speed is soooo slow")
print(sentiment_results)

[{'label': 'NEGATIVE', 'score': 0.9995249509811401}]


## 05.05. Using a custom Model

In [10]:
sentiment_classifier = pipeline(task="sentiment-analysis",
                                model="finiteautomata/bertweet-base-sentiment-analysis")

sentiment_result=sentiment_classifier("This is a great course")

print(sentiment_result)

#Contents of cache directory
os.listdir(cache_dir)

Downloading (…)lve/main/config.json: 100%|██████████| 949/949 [00:00<00:00, 1.07MB/s]
Downloading pytorch_model.bin: 100%|██████████| 540M/540M [00:24<00:00, 21.7MB/s] 
Downloading (…)okenizer_config.json: 100%|██████████| 338/338 [00:00<00:00, 396kB/s]
Downloading (…)solve/main/vocab.txt: 100%|██████████| 843k/843k [00:00<00:00, 9.83MB/s]
Downloading (…)solve/main/bpe.codes: 100%|██████████| 1.08M/1.08M [00:00<00:00, 9.79MB/s]
Downloading (…)in/added_tokens.json: 100%|██████████| 22.0/22.0 [00:00<00:00, 14.1kB/s]
Downloading (…)cial_tokens_map.json: 100%|██████████| 167/167 [00:00<00:00, 192kB/s]


[{'label': 'POS', 'score': 0.9920700192451477}]


['models--distilbert-base-uncased-finetuned-sst-2-english',
 'version.txt',
 'models--finiteautomata--bertweet-base-sentiment-analysis']