In [1]:
#The main package that contains functions to use Hugging Face
import transformers

#Set to avoid warning messages.
transformers.logging.set_verbosity_error()

  from .autonotebook import tqdm as notebook_tqdm


## 05.02. Reviewing the Pipeline

Use the pipeline registry to look at available pipeline tasks and also explore a specific pipeline

In [2]:
from transformers.pipelines import PIPELINE_REGISTRY

#Get the list of tasks that are supported by Huggingface pipeline
print(PIPELINE_REGISTRY.get_supported_tasks())


['audio-classification', 'automatic-speech-recognition', 'conversational', 'depth-estimation', 'document-question-answering', 'feature-extraction', 'fill-mask', 'image-classification', 'image-segmentation', 'image-to-text', 'ner', 'object-detection', 'question-answering', 'sentiment-analysis', 'summarization', 'table-question-answering', 'text-classification', 'text-generation', 'text2text-generation', 'token-classification', 'translation', 'visual-question-answering', 'vqa', 'zero-shot-classification', 'zero-shot-image-classification', 'zero-shot-object-detection']


In [3]:
#Get information about a specific task
print("\nDefault Model for Sentiment Analysis: ")
print(PIPELINE_REGISTRY.check_task('sentiment-analysis')[1].get('default'))


Default Model for Sentiment Analysis: 
{'model': {'pt': ('distilbert-base-uncased-finetuned-sst-2-english', 'af0f99b'), 'tf': ('distilbert-base-uncased-finetuned-sst-2-english', 'af0f99b')}}


## 05.03 Loading a Pipeline

In [4]:
from transformers import pipeline
import os

#Load a pipeline. This will download the model checkpoint from huggingface and cache it 
#locally on disk. If model is already available in cache, it will simply use the cached version
#Download will usually take a long time, depending on network bandwidth

sentiment_classifier = pipeline("sentiment-analysis")

#Cache usually available at : <<user-home>>.cache\huggingface\hub

cache_dir = os.path.expanduser('~') + "/.cache/huggingface/hub"
print("Huggingface Cache directory is : ", cache_dir)

#Contents of cache directory
os.listdir(cache_dir)

Downloading: 100%|██████████████████████████████████████████████████████████████████████████████| 629/629 [00:00<00:00, 155kB/s]
Downloading: 100%|███████████████████████████████████████████████████████████████████████████| 268M/268M [00:09<00:00, 28.7MB/s]
Downloading: 100%|███████████████████████████████████████████████████████████████████████████| 48.0/48.0 [00:00<00:00, 28.7kB/s]
Downloading: 100%|████████████████████████████████████████████████████████████████████████████| 232k/232k [00:00<00:00, 632kB/s]


Huggingface Cache directory is :  /Users/linkedin/.cache/huggingface/hub


['models--distilbert-base-uncased-finetuned-sst-2-english', 'version.txt']

## 05.04. Predicting Sentiment with Default Model

In [5]:
#Predict sentiment using the pipeline
sentiment_results=sentiment_classifier("This is a great course")
print(sentiment_results)


[{'label': 'POSITIVE', 'score': 0.9998713731765747}]


In [6]:
#A second example
sentiment_results=sentiment_classifier("The download speed is really bad")
print(sentiment_results)

[{'label': 'NEGATIVE', 'score': 0.9998070597648621}]


## 05.05. Using a custom Model

In [7]:
sentiment_classifier = pipeline(task="sentiment-analysis",
                                model="finiteautomata/bertweet-base-sentiment-analysis")

sentiment_result=sentiment_classifier("This is a great course")

print(sentiment_result)

#Contents of cache directory
os.listdir(cache_dir)

Downloading: 100%|██████████████████████████████████████████████████████████████████████████████| 890/890 [00:00<00:00, 445kB/s]
Downloading: 100%|███████████████████████████████████████████████████████████████████████████| 540M/540M [00:18<00:00, 29.5MB/s]
Downloading: 100%|██████████████████████████████████████████████████████████████████████████████| 295/295 [00:00<00:00, 112kB/s]
Downloading: 100%|███████████████████████████████████████████████████████████████████████████| 843k/843k [00:00<00:00, 1.70MB/s]
Downloading: 100%|█████████████████████████████████████████████████████████████████████████| 1.08M/1.08M [00:00<00:00, 1.74MB/s]
Downloading: 100%|███████████████████████████████████████████████████████████████████████████| 17.0/17.0 [00:00<00:00, 9.05kB/s]
Downloading: 100%|█████████████████████████████████████████████████████████████████████████████| 150/150 [00:00<00:00, 76.8kB/s]


[{'label': 'POS', 'score': 0.9905492663383484}]


['models--distilbert-base-uncased-finetuned-sst-2-english',
 'version.txt',
 'models--finiteautomata--bertweet-base-sentiment-analysis']