In [1]:
#text classification

from transformers import pipeline
import pandas as pd

text = '''
Dear Amazon, last week I ordered an Optimus Prime action figure from your online store in Germany. Unfortunately, when I  opened the package, I discovered to my horror that I 
has been sent an action figure of Megatron instead! As a lifelong enemy of the Decepticons, I hope you can understand my dilemma. To resolve the issue, I demand an exchange of Megatron for the Optimus Prime
figure I ordered. Enclosed are copies of my records concerning this purchase. I expect to hear from you soon. Sincerely, Bumblebee.
'''

classifier = pipeline('text-classification')
outputs = classifier(text)
pd.DataFrame(outputs)

  from .autonotebook import tqdm as notebook_tqdm
No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.


Unnamed: 0,label,score
0,NEGATIVE,0.934849


In [2]:
#Named entity recognition 
ner_tagger = pipeline("ner", aggregation_strategy="simple")
outputs = ner_tagger(text)
pd.DataFrame(outputs)

No model was supplied, defaulted to dbmdz/bert-large-cased-finetuned-conll03-english and revision f2482bf (https://huggingface.co/dbmdz/bert-large-cased-finetuned-conll03-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


Unnamed: 0,entity_group,score,word,start,end
0,ORG,0.881478,Amazon,6,12
1,MISC,0.990439,Optimus Prime,37,50
2,LOC,0.999753,Germany,91,98
3,MISC,0.564085,Mega,211,215
4,PER,0.588245,##tron,215,219
5,ORG,0.672827,Decept,256,262
6,MISC,0.495363,##icons,262,267
7,MISC,0.765792,Megatron,353,361
8,MISC,0.987283,Optimus Prime,370,383
9,PER,0.812155,Bumblebee,505,514


In [3]:
#Question Answering

reader = pipeline('question-answering')
question='What does the customer want?'
outputs = reader(question=question, context=text)
pd.DataFrame([outputs])

No model was supplied, defaulted to distilbert-base-cased-distilled-squad and revision 626af31 (https://huggingface.co/distilbert-base-cased-distilled-squad).
Using a pipeline without specifying a model name and revision in production is not recommended.


Unnamed: 0,score,start,end,answer
0,0.622535,338,361,an exchange of Megatron


In [4]:
#Summarization

summarizer = pipeline('summarization')
outputs = summarizer(text, min_length = 20, max_length=60, clean_up_tokenization_spaces=True) #oops, doesn't work as expected, even telling the source of the article
print(outputs[0]['summary_text'])

No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 and revision a4f8f3e (https://huggingface.co/sshleifer/distilbart-cnn-12-6).
Using a pipeline without specifying a model name and revision in production is not recommended.


 Bumblebee demands an exchange of Megatron for the Optimus Prime figure he ordered. The Decepticons are a lifelong enemy of the Decepticon.


In [5]:
#Translation
translator = pipeline('translation_en_to_hi', model="Helsinki-NLP/opus-mt-en-hi") # :P not working
# translator = pipeline('translation_en_to_hi', model='anjankumar/Anjan-finetuned-iitbombay-en-to-hi')  # same fine tuned version of above, so not working :P 
outputs = translator(text, clean_up_tokenization_spaces=True)
print(outputs[0]['translation_text'])


प्रिय एमिला, पिछले सप्ताह मैंने जर्मनी में आपके ऑनलाइन स्टोर से एक ऑपिश क्रिया आकृति का आदेश दिया. दुःख की बात है, जब मैंने पैकेज खोला है, मुझे डर लगा कि मैं मेगानो की एक कार्य आकृति भेजा गया है! इसके बजाय, मैं आशा करता हूँ कि आप मेरी दुविधा को समझ सकते हैं. मुझे लगता है कि Megoligigiocighate के मामले की मांग की मांग की है, मैं अपनी दुकान के बारे में जल्द ही सुन रहा हूँ.


In [6]:
#Text generation
generator = pipeline('text-generation', model='gpt2-large')
prompt = text + '\n\nCustomer service response: Dear Bumblebee, I am sorry to hear that your order was mixed up.'
outputs = generator(prompt, min_length=100, max_length=200)
print(outputs[0]['generated_text'])

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Dear Amazon, last week I ordered an Optimus Prime action figure from your online store in Germany. Unfortunately, when I  opened the package, I discovered to my horror that I 
has been sent an action figure of Megatron instead! As a lifelong enemy of the Decepticons, I hope you can understand my dilemma. To resolve the issue, I demand an exchange of Megatron for the Optimus Prime
figure I ordered. Enclosed are copies of my records concerning this purchase. I expect to hear from you soon. Sincerely, Bumblebee.


Customer service response: Dear Bumblebee, I am sorry to hear that your order was mixed up. Please understand that our standard policy does not allow for such actions so, please accept my apologies. I'll have your order dispatched as soon as possible and should receive a response from you soon thereafter. Thanks for your understanding.

Dear Bumblebee, I have no problem with Transformers: The Last Knight, but


In [19]:
#Datasets
from datasets import list_datasets
all_datasets = list_datasets()
print(f"The first 10 datasets are: {all_datasets[:10]}")

The first 10 datasets are: ['acronym_identification', 'ade_corpus_v2', 'adversarial_qa', 'aeslc', 'afrikaans_ner_corpus', 'ag_news', 'ai2_arc', 'air_dialogue', 'ajgt_twitter_ar', 'allegro_reviews']
All datasets are: ['acronym_identification', 'ade_corpus_v2', 'adversarial_qa', 'aeslc', 'afrikaans_ner_corpus', 'ag_news', 'ai2_arc', 'air_dialogue', 'ajgt_twitter_ar', 'allegro_reviews', 'allocine', 'alt', 'amazon_polarity', 'amazon_reviews_multi', 'amazon_us_reviews', 'ambig_qa', 'americas_nli', 'ami', 'amttl', 'anli', 'app_reviews', 'aqua_rat', 'aquamuse', 'ar_cov19', 'ar_res_reviews', 'ar_sarcasm', 'arabic_billion_words', 'arabic_pos_dialect', 'arabic_speech_corpus', 'arcd', 'arsentd_lev', 'art', 'arxiv_dataset', 'ascent_kb', 'aslg_pc12', 'asnq', 'asset', 'assin', 'assin2', 'atomic', 'autshumato', 'facebook/babi_qa', 'banking77', 'bbaw_egyptian', 'bbc_hindi_nli', 'bc2gm_corpus', 'beans', 'best2009', 'bianet', 'bible_para', 'big_patent', 'billsum', 'bing_coronavirus_query_set', 'biomrc',

In [None]:
from datasets import load_dataset
emotions = load_dataset('emotion')
print(emotions)