In [1]:
!pip install --quiet transformers

In [2]:
#The main package that contains functions to use Hugging Face
import transformers

#Set to avoid warning messages.
transformers.logging.set_verbosity_error()

  from .autonotebook import tqdm as notebook_tqdm


## Reviewing the Pipeline

Use the pipeline registry to look at available pipeline tasks and also explore a specific pipeline

In [3]:
from transformers.pipelines import PIPELINE_REGISTRY

#Get the list of tasks that are supported by Huggingface pipeline
print(PIPELINE_REGISTRY.get_supported_tasks())

['audio-classification', 'automatic-speech-recognition', 'conversational', 'depth-estimation', 'document-question-answering', 'feature-extraction', 'fill-mask', 'image-classification', 'image-feature-extraction', 'image-segmentation', 'image-to-image', 'image-to-text', 'mask-generation', 'ner', 'object-detection', 'question-answering', 'sentiment-analysis', 'summarization', 'table-question-answering', 'text-classification', 'text-generation', 'text-to-audio', 'text-to-speech', 'text2text-generation', 'token-classification', 'translation', 'video-classification', 'visual-question-answering', 'vqa', 'zero-shot-audio-classification', 'zero-shot-classification', 'zero-shot-image-classification', 'zero-shot-object-detection']


In [4]:
#Get information about a specific task
print("\nDefault Model for Summarization: ")
print(PIPELINE_REGISTRY.check_task('summarization')[1].get('default'))


Default Model for Summarization: 
{'model': {'pt': ('sshleifer/distilbart-cnn-12-6', 'a4f8f3e'), 'tf': ('google-t5/t5-small', 'd769bba')}}


## Loading a Pipeline

In [6]:
from transformers import pipeline
import os

#Load a pipeline. This will download the model checkpoint from huggingface and cache it
#locally on disk. If model is already available in cache, it will simply use the cached version
#Download will usually take a long time, depending on network bandwidth

text_summarizarion_classifier = pipeline("summarization")

#Cache usually available at : <<user-home>>.cache\huggingface\hub

cache_dir = os.path.expanduser('~') + "/.cache/huggingface/hub"
print("Huggingface Cache directory is : ", cache_dir)

#Contents of cache directory
os.listdir(cache_dir)



Huggingface Cache directory is :  /home/codespace/.cache/huggingface/hub


['models--facebook--bart-large-cnn',
 'version.txt',
 '.locks',
 'models--distilbert--distilbert-base-uncased-finetuned-sst-2-english',
 'models--finiteautomata--bertweet-base-sentiment-analysis',
 'models--sshleifer--distilbart-cnn-12-6']

## Predicting Summarization with Default Model

In [7]:
#Predict summarizarion using the pipeline
summarizarion_results=text_summarizarion_classifier("This is a great course")
print(summarizarion_results)

[{'summary_text': " This is a great course for people who want to take part in the Olympics . This is the first of its kind in the history of the Olympics and Paralympics . It's a great way to learn how to use the language of diplomacy and diplomacy to help people around the world ."}]


## Using a Custom Model for Summarization

In [None]:
text_summarizarion_classifier = pipeline(task="sentiment-analysis",
                                model="finiteautomata/bertweet-base-sentiment-analysis")

summarizarion_results=text_summarizarion_classifier("This is a great course")

print(summarizarion_results)

#Contents of cache directory
os.listdir(cache_dir)