In [3]:
from transformers import pipeline

# Pipeline of 3 main steps 
1. The text is preprocessed into a format that the model can understaend
2. The preprocessed inputs are passed to the model 
3. The predictions of the model are post-processed

# Available pipelines
- sentiment-analysis
- zero-shot-classification
- text-generation
- feature-extraction (get the vector representation of a text)
- fill-mask
- ner (named entity recognition)
- question-answering
- summarization
- translation

In [2]:
# download and cached sentiment analysis model that has been fine-tuned in english
classifier = pipeline('sentiment-analysis')

output = classifier("I've been waiting for a Hugging Face course my whole life")

print(output)

output = classifier(["I've been waiting for a Hugging Face course my whole life", "I hate this so much"])
                     
print(output)
                     
output = classifier(["I HATE THIS SO MUCH", "I hate this so much"])

print(output)

Downloading:   0%|          | 0.00/629 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/268M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

[{'label': 'POSITIVE', 'score': 0.9982088208198547}]
[{'label': 'POSITIVE', 'score': 0.9982088208198547}, {'label': 'NEGATIVE', 'score': 0.9995144605636597}]
[{'label': 'NEGATIVE', 'score': 0.9995144605636597}, {'label': 'NEGATIVE', 'score': 0.9995144605636597}]


In [4]:
from transformers import pipeline

# zero-shot classification provide probability score for any list of labels with the data provided without training
# so that dont need to rely on labels
classifier = pipeline("zero-shot-classification")
output = classifier(["This is a netflix series about the Queens Gambit", "NVIDIA GPUs are commonly used for deep learning training", "This is a tumbler"], 
                   candidate_labels = [
                       'drama', 'computer', 'random'
                   ])

print(output)

[{'sequence': 'This is a netflix series about the Queens Gambit', 'labels': ['drama', 'computer', 'random'], 'scores': [0.9526435732841492, 0.03914288803935051, 0.008213580586016178]}, {'sequence': 'NVIDIA GPUs are commonly used for deep learning training', 'labels': ['computer', 'random', 'drama'], 'scores': [0.9674652218818665, 0.01740030199289322, 0.015134437009692192]}, {'sequence': 'This is a tumbler', 'labels': ['drama', 'computer', 'random'], 'scores': [0.6555460095405579, 0.19893909990787506, 0.14551487565040588]}]


In [12]:
# text generation include randomness, so its normal to not get the same results every time

from transformers import pipeline


prompt_text = "Hello World. "

generator = pipeline("text-generation")
output = generator(prompt_text)

print(output)

# you can control how many different sequences are generated with the argument
# num_return_sequences

# and the total length of the output text with the argument
# max_length

output = generator(prompt_text, num_return_sequences = 2, max_length = 50)

print(output)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': 'Hello World. \xa0It\'s also worth noting that this week\'s "Wizard of Oz" has had a few slight alterations to the story and overall tone. \xa0It\'s a bit more like "The Wizard of Oz," a version we'}]
[{'generated_text': 'Hello World. \xa0The name is derived from a French word for water, water, water and water. \xa0There may be many meanings of the phrase "water," and we might think of the word (and it is quite a simple name'}, {'generated_text': 'Hello World. \xa0On the third and final day of the conference, I asked my colleagues who my main focus should be here because I knew they would have to be ready for that meeting and also had to accept that a lot of these kinds of'}]


In [15]:
from transformers import pipeline


prompt_text = "Christmas Eve is "

generator = pipeline("text-generation", model = "distilgpt2")

output = generator(prompt_text, num_return_sequences = 2, max_length = 50)

print(output)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': 'Christmas Eve is iced from apple juice for a healthy and easy to prepare holiday dinner with healthy options.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n'}, {'generated_text': 'Christmas Eve is iced with vanilla ice cream and vanilla ice cream. You can also just use something frozen, ice cream, or a bit more salt or coconut.'}]


In [20]:
from transformers import pipeline

unmasker = pipeline("fill-mask")


# top_k: controls how many possibiltiies you want to be displayed
# <mask> word also refer as mask token

# different mask-filling model shave different mask tokens, its always good to verify the proper mask word when exploring other models

unmasker("Women is <mask> human", top_k = 2)

[{'sequence': 'Women is inherently human',
  'score': 0.303619384765625,
  'token': 22646,
  'token_str': ' inherently'},
 {'sequence': 'Women is fundamentally human',
  'score': 0.10839539021253586,
  'token': 16894,
  'token_str': ' fundamentally'}]

In [27]:
from transformers import pipeline

# this pipline works by extracting information from the provided context, it does not generate answer

# when group_entities = True, it allows regroup together the parts of the sentence 

classifier = pipeline("ner", grouped_entities=True)

output = classifier("Benny stays in an orphanage at Penang, Malaysia")

print(output)

[{'entity_group': 'PER', 'score': 0.9976969957351685, 'word': 'Benny', 'start': 0, 'end': 5}, {'entity_group': 'LOC', 'score': 0.9990065693855286, 'word': 'Penang', 'start': 31, 'end': 37}, {'entity_group': 'LOC', 'score': 0.9998182654380798, 'word': 'Malaysia', 'start': 39, 'end': 47}]


In [26]:
from transformers import pipeline

classifier = pipeline("question-answering")


output = classifier(question = "Where do i work", context = "Hi there. Intel is where I worked at. ")

print(output)

Downloading:   0%|          | 0.00/473 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/261M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/436k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

{'score': 0.9885739684104919, 'start': 10, 'end': 15, 'answer': 'Intel'}


In [28]:
from transformers import pipeline

summarizer = pipeline("summarization")

#like with text gneration and summarization, you can specify a max_length or min_length for the result

output = summarizer("""KUALA LUMPUR: The Malay rulers are of the opinion that there is no need for the state of emergency to be extended after the Aug 1, 2021 deadline.

This was after today's Special Discussion of the Malay Rulers with the Yang di-Pertuan Agong, held at Istana Negara, said Keeper of the Rulers' Seal, Tan Sri Syed Danial Syed Ahmad.

He said in a statement today the Malay rulers, including Yang di-Pertuan Agong Al-Sultan Abdullah Ri'ayatuddin Mustafa Billah Shah, had expressed their opinions on several matters.

These include the high daily number of Covid-19 cases, the country's worrying state of finance and economy, unstable political climate, disunity among the people, issues on well-being, unemployment, the education system, the inability of Muslims to perform religious obligations such as the Friday prayers as well as people of other religions being restricted from their religious activities.

MORE NEWS
Emergency and Sarawak state elections
Sabah gov't supports State of Emergency move
Reconvene by all means, but no more drama, please!
PM: Malaysia still open for business despite State of Emergency
"The Malay rulers are of the same opinion on these matters: The people's lives and livelihood must be prioritised above all else. (Second) the vaccination process must be expedited by reducing bureaucracy so that the 80 per cent herd immunity target can be achieved.

"(Third) Covid-19 management programmes must be understood and supported by the public without raising any doubts or being perceived as a political agenda.


"(Fourth) the methods of handling the Covid-19 virus must be inclusive, involving various stakeholders while instilling the willing spirit to listen, learn, making adjustments and improvements and willingness to explore new methods, so that the people will be confident and give their support.

"(Fifth) the hot political climate must be curbed. (Sixth) it is important to have a stable government that has the support and confidence of the majority of the people.

"(Lastly), there is no necessity to place the country under a state of emergency after Aug 1, 2021," the statement read.

The Malay rulers also defended the Agong's call earlier for Parliament to sit as soon as possible, saying that the check and balance mechanism between the executives, legislative and judiciary must be respected.

They said this was to ensure transparent administrative works, integrity and accountability to the people, especially on matters involving finance and the country's spending.

The rulers also called on state legislative assemblies to reconvene immediately by observing all Covid-19 standard operating procedure (SOP) to avoid further infection.

"Methods and procedures practiced by other countries when sitting (for parliament) proved that the chain of Covid-19 infection can be curbed and therefore, it is fitting that (these methods) be introduced and practiced in this country."

""")

print(output)

Downloading:   0%|          | 0.00/1.80k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /opt/conda/conda-bld/pytorch_1623448216815/work/aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)


[{'summary_text': " The Malay rulers are of the opinion that there is no need for the state of emergency to be extended after the Aug 1, 2021 deadline . This was after today's Special Discussion of the Malay Rulers with the Yang di-Pertuan Agong, held at Istana Negara ."}]


In [1]:
#!pip install transformers[sentencepiece]

from transformers import pipeline

# like with text gneration and summarization, you can specify a max_length or min_length for the result
translator = pipeline("translation", model = "Helsinki-NLP/opus-mt-fr-en")

output = translator("Ce cours est produit par Hugging Face.")


print(output)


Downloading:   0%|          | 0.00/802k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/778k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.34M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /opt/conda/conda-bld/pytorch_1623448216815/work/aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)


[{'translation_text': 'This course is produced by Hugging Face.'}]
