In [1]:
from transformers import pipeline

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
text = """Moby-Dick; or, The Whale is an 1851 novel by American writer Herman Melville.
The book is the sailor Ishmael's narrative of the maniacal quest of Ahab, captain of the whaling ship Pequod, for vengeance against Moby Dick, the giant white sperm whale that bit off his leg on the ship's previous voyage. 
A contribution to the literature of the American Renaissance, Moby-Dick was published to mixed reviews, was a commercial failure, and was out of print at the time of the author's death in 1891.
"""

## Text Classification: judge given texts are positive or not

In [3]:
classifier = pipeline("text-classification")

No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Downloading model.safetensors: 100%|█████████████████████████████████| 268M/268M [01:06<00:00, 4.02MB/s]
Downloading (…)okenizer_config.json: 100%|███████████████████████████| 48.0/48.0 [00:00<00:00, 10.3kB/s]
Downloading (…)solve/main/vocab.txt: 100%|████████████████████████████| 232k/232k [00:00<00:00, 995kB/s]
Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.


In [5]:
import pandas as pd
outputs = classifier(text)

pd.DataFrame(outputs)

Unnamed: 0,label,score
0,NEGATIVE,0.98976


## Named Entity Recognition

In [6]:
ner_tagger = pipeline("ner", aggregation_strategy="normal")
outputs = ner_tagger(text)

pd.DataFrame(outputs)

No model was supplied, defaulted to dbmdz/bert-large-cased-finetuned-conll03-english and revision f2482bf (https://huggingface.co/dbmdz/bert-large-cased-finetuned-conll03-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Downloading (…)lve/main/config.json: 100%|██████████████████████████████| 998/998 [00:00<00:00, 247kB/s]
Downloading pytorch_model.bin:  17%|█████▎                          | 220M/1.33G [00:49<05:02, 3.68MB/s]

KeyboardInterrupt: 

Downloading pytorch_model.bin:  17%|█████▎                          | 220M/1.33G [01:07<05:02, 3.68MB/s]

## Question Answering

In [None]:
reader = pipeline("question-answering")
question = "What is the given text about?"
outputs = reader(question=question, context=text)

pd.DataFrame([outputs])

## Text Summarization

In [None]:
summarizer = pipeline("summarization")
outputs = summarizer(text, max_length=45, clean_up_tokenization_spaces=True)

pd.DataFrame(outputs[0]["summary_text"])

## Translation

In [None]:
translator = pipeline("translation_en_to_ja")
outputs = translator(text, clean_up_tokenization_spaces=True, min_length=100)

print(outputs[0]["translation_text"])

## Text Generation

In [None]:
generator = pipeline("text-generation")
customer_response = "I've read this book about 30 years ago and this is my bible since that moment."

full_text = text + "Customer response: " + customer_response
outputs = generator(full_text, max_length=300)
print(outputs[0]["generated_text"])