## 1. Token Classification / Named Entity Recognition

In [None]:
!pip install transformers

In [1]:
from transformers import AutoTokenizer, AutoModelForTokenClassification
from transformers import pipeline

tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")
model = AutoModelForTokenClassification.from_pretrained("dbmdz/bert-large-cased-finetuned-conll03-english")

nlp = pipeline("ner", model=model, tokenizer=tokenizer, grouped_entities=True)

In [2]:
def extract_ents(sent): 
    """
    Author: Deepak John Reji
    Description: function to extract person names from sentence/paragraphs
    Input: raw sentences/paragraphs
    Output: extracted entities in the form of list

    """
    ner_results = nlp(sent)
    
    entity_list = []
    for i in ner_results:
        if i['entity_group'] == 'PER': #'LOC', 'MISC', 'ORG'
            entity_list.append(i['word'])
        
    return entity_list


##### Example usages

Single sentence

In [3]:
extract_ents("Deepak and Roshan are childhood friends.")

['Deepak', 'Roshan']

Executing on a batch or dataframe

In [4]:
import os
import pandas as pd

os.chdir(r'C:\Users\deepak.reji\Documents\Python Scripts\Huggingface Tutorial')
data = pd.read_excel('Marvel Datastore.xlsx')
data.head()

Unnamed: 0,Sentences
0,"Thor Odinson is the Asgardian God of Thunder, ..."
1,"Upon being welcomed back to Asgard as a hero, ..."
2,Thor returned to Asgard having defeated his br...
3,Loki Laufeyson was the biological son of Laufe...
4,"Transported by the wormhole to Sanctuary, Loki..."


In [5]:
data['entity list'] = data['Sentences'].apply(lambda x: extract_ents(x))
data.head()

Unnamed: 0,Sentences,entity list
0,"Thor Odinson is the Asgardian God of Thunder, ...","[Thor Odinson, God of Thunder, Thor, Odin, Tho..."
1,"Upon being welcomed back to Asgard as a hero, ...","[Thor, Loki, Thor, Loki, Loki]"
2,Thor returned to Asgard having defeated his br...,"[Thor, Thor, Thor, Jane Foster, Thor, Loki, Fo..."
3,Loki Laufeyson was the biological son of Laufe...,"[Loki Laufeyson, Laufey, Odin, Loki, Frigga, T..."
4,"Transported by the wormhole to Sanctuary, Loki...","[Loki, The Other, Thanos, Loki, Loki, Thanos, ..."


## 2. Text Classification

In [6]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-sst-2-english")

classifier = pipeline('text-classification', model=model, tokenizer=tokenizer) # cuda = 0 based on gpu availability

Downloading:   0%|          | 0.00/629 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/268M [00:00<?, ?B/s]

Single sentence

In [8]:
classi_out = classifier("The irony, of course, is that the exhibit that invites people to throw trash at vacuuming Ivanka Trump lookalike reflects every stereotype feminists claim to stand against, oversexualizing Ivanka’s body and ignoring her hard work.")
classi_out

[{'label': 'NEGATIVE', 'score': 0.9919543266296387}]

In [9]:
classi_out[0]['label']

'NEGATIVE'

In [10]:
classi_out[0]['score']

0.9919543266296387

Executing on a batch or dataframe

In [11]:
data['classification'] = data['Sentences'].apply(lambda x: classifier(x))
data

Unnamed: 0,Sentences,entity list,classification
0,"Thor Odinson is the Asgardian God of Thunder, ...","[Thor Odinson, God of Thunder, Thor, Odin, Tho...","[{'label': 'POSITIVE', 'score': 0.987520217895..."
1,"Upon being welcomed back to Asgard as a hero, ...","[Thor, Loki, Thor, Loki, Loki]","[{'label': 'POSITIVE', 'score': 0.970530450344..."
2,Thor returned to Asgard having defeated his br...,"[Thor, Thor, Thor, Jane Foster, Thor, Loki, Fo...","[{'label': 'NEGATIVE', 'score': 0.992479979991..."
3,Loki Laufeyson was the biological son of Laufe...,"[Loki Laufeyson, Laufey, Odin, Loki, Frigga, T...","[{'label': 'NEGATIVE', 'score': 0.828045129776..."
4,"Transported by the wormhole to Sanctuary, Loki...","[Loki, The Other, Thanos, Loki, Loki, Thanos, ...","[{'label': 'NEGATIVE', 'score': 0.958619356155..."
5,Asgard was one of the Nine Realms and the home...,"[Thor, Loki, Surtur, Hela]","[{'label': 'POSITIVE', 'score': 0.939087033271..."
6,"Mjølnir (""that which smashes"")[1] was a powerf...","[Hela, Thor, Loki, Thor, Captain America, Than...","[{'label': 'POSITIVE', 'score': 0.996382296085..."
7,Stormbreaker is an enchanted axe used by Thor....,[Thor],"[{'label': 'POSITIVE', 'score': 0.987600088119..."
8,Doctor Jane Foster is one of the world's leadi...,"[Jane Foster, Thor, Loki, Loki]","[{'label': 'POSITIVE', 'score': 0.994690060615..."
9,She found out of Thor's return through the foo...,"[Thor, Foster, Thor, Thor, Malekith, Foster, T...","[{'label': 'NEGATIVE', 'score': 0.996562004089..."


In [13]:
data['classification label'] = data['classification'].apply(lambda x: x[0]['label'])
data['classification score'] = data['classification'].apply(lambda x: x[0]['score'])
data

Unnamed: 0,Sentences,entity list,classification,classification label,classification score
0,"Thor Odinson is the Asgardian God of Thunder, ...","[Thor Odinson, God of Thunder, Thor, Odin, Tho...","[{'label': 'POSITIVE', 'score': 0.987520217895...",POSITIVE,0.98752
1,"Upon being welcomed back to Asgard as a hero, ...","[Thor, Loki, Thor, Loki, Loki]","[{'label': 'POSITIVE', 'score': 0.970530450344...",POSITIVE,0.97053
2,Thor returned to Asgard having defeated his br...,"[Thor, Thor, Thor, Jane Foster, Thor, Loki, Fo...","[{'label': 'NEGATIVE', 'score': 0.992479979991...",NEGATIVE,0.99248
3,Loki Laufeyson was the biological son of Laufe...,"[Loki Laufeyson, Laufey, Odin, Loki, Frigga, T...","[{'label': 'NEGATIVE', 'score': 0.828045129776...",NEGATIVE,0.828045
4,"Transported by the wormhole to Sanctuary, Loki...","[Loki, The Other, Thanos, Loki, Loki, Thanos, ...","[{'label': 'NEGATIVE', 'score': 0.958619356155...",NEGATIVE,0.958619
5,Asgard was one of the Nine Realms and the home...,"[Thor, Loki, Surtur, Hela]","[{'label': 'POSITIVE', 'score': 0.939087033271...",POSITIVE,0.939087
6,"Mjølnir (""that which smashes"")[1] was a powerf...","[Hela, Thor, Loki, Thor, Captain America, Than...","[{'label': 'POSITIVE', 'score': 0.996382296085...",POSITIVE,0.996382
7,Stormbreaker is an enchanted axe used by Thor....,[Thor],"[{'label': 'POSITIVE', 'score': 0.987600088119...",POSITIVE,0.9876
8,Doctor Jane Foster is one of the world's leadi...,"[Jane Foster, Thor, Loki, Loki]","[{'label': 'POSITIVE', 'score': 0.994690060615...",POSITIVE,0.99469
9,She found out of Thor's return through the foo...,"[Thor, Foster, Thor, Thor, Malekith, Foster, T...","[{'label': 'NEGATIVE', 'score': 0.996562004089...",NEGATIVE,0.996562


## 3. Zero Shot Classification

In [14]:
from transformers import pipeline
classifier = pipeline("zero-shot-classification",
                      model="facebook/bart-large-mnli")

Downloading:   0%|          | 0.00/1.15k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

Single sentence

In [15]:
sequence_to_classify = "one day I will see the world"
candidate_labels = ['travel', 'cooking', 'dancing']

In [17]:
zero_shot_out = classifier(sequence_to_classify, candidate_labels)
zero_shot_out

{'sequence': 'one day I will see the world',
 'labels': ['travel', 'dancing', 'cooking'],
 'scores': [0.9938651323318481, 0.0032737781293690205, 0.0028610320296138525]}

Executing on a batch or dataframe

In [19]:
candidate_labels = ['comics', 'nature', 'warrior']
data['zero shot classification'] = data['Sentences'].apply(lambda x: classifier(x, candidate_labels))
data

Unnamed: 0,Sentences,entity list,classification,classification label,classification score,zero shot classification
0,"Thor Odinson is the Asgardian God of Thunder, ...","[Thor Odinson, God of Thunder, Thor, Odin, Tho...","[{'label': 'POSITIVE', 'score': 0.987520217895...",POSITIVE,0.98752,{'sequence': 'Thor Odinson is the Asgardian Go...
1,"Upon being welcomed back to Asgard as a hero, ...","[Thor, Loki, Thor, Loki, Loki]","[{'label': 'POSITIVE', 'score': 0.970530450344...",POSITIVE,0.97053,{'sequence': 'Upon being welcomed back to Asga...
2,Thor returned to Asgard having defeated his br...,"[Thor, Thor, Thor, Jane Foster, Thor, Loki, Fo...","[{'label': 'NEGATIVE', 'score': 0.992479979991...",NEGATIVE,0.99248,{'sequence': 'Thor returned to Asgard having d...
3,Loki Laufeyson was the biological son of Laufe...,"[Loki Laufeyson, Laufey, Odin, Loki, Frigga, T...","[{'label': 'NEGATIVE', 'score': 0.828045129776...",NEGATIVE,0.828045,{'sequence': 'Loki Laufeyson was the biologica...
4,"Transported by the wormhole to Sanctuary, Loki...","[Loki, The Other, Thanos, Loki, Loki, Thanos, ...","[{'label': 'NEGATIVE', 'score': 0.958619356155...",NEGATIVE,0.958619,{'sequence': 'Transported by the wormhole to S...
5,Asgard was one of the Nine Realms and the home...,"[Thor, Loki, Surtur, Hela]","[{'label': 'POSITIVE', 'score': 0.939087033271...",POSITIVE,0.939087,{'sequence': 'Asgard was one of the Nine Realm...
6,"Mjølnir (""that which smashes"")[1] was a powerf...","[Hela, Thor, Loki, Thor, Captain America, Than...","[{'label': 'POSITIVE', 'score': 0.996382296085...",POSITIVE,0.996382,"{'sequence': 'Mjølnir (""that which smashes"")[1..."
7,Stormbreaker is an enchanted axe used by Thor....,[Thor],"[{'label': 'POSITIVE', 'score': 0.987600088119...",POSITIVE,0.9876,{'sequence': 'Stormbreaker is an enchanted axe...
8,Doctor Jane Foster is one of the world's leadi...,"[Jane Foster, Thor, Loki, Loki]","[{'label': 'POSITIVE', 'score': 0.994690060615...",POSITIVE,0.99469,{'sequence': 'Doctor Jane Foster is one of the...
9,She found out of Thor's return through the foo...,"[Thor, Foster, Thor, Thor, Malekith, Foster, T...","[{'label': 'NEGATIVE', 'score': 0.996562004089...",NEGATIVE,0.996562,{'sequence': 'She found out of Thor's return t...


In [20]:
data['zero shot classification label'] = data['zero shot classification'].apply(lambda x: x['labels'])
data['zero shot classification score'] = data['zero shot classification'].apply(lambda x: x['scores'])
data

Unnamed: 0,Sentences,entity list,classification,classification label,classification score,zero shot classification,zero shot classification label,zero shot classification score
0,"Thor Odinson is the Asgardian God of Thunder, ...","[Thor Odinson, God of Thunder, Thor, Odin, Tho...","[{'label': 'POSITIVE', 'score': 0.987520217895...",POSITIVE,0.98752,{'sequence': 'Thor Odinson is the Asgardian Go...,"[warrior, nature, comics]","[0.9035865664482117, 0.053326088935136795, 0.0..."
1,"Upon being welcomed back to Asgard as a hero, ...","[Thor, Loki, Thor, Loki, Loki]","[{'label': 'POSITIVE', 'score': 0.970530450344...",POSITIVE,0.97053,{'sequence': 'Upon being welcomed back to Asga...,"[warrior, comics, nature]","[0.872516393661499, 0.06743672490119934, 0.060..."
2,Thor returned to Asgard having defeated his br...,"[Thor, Thor, Thor, Jane Foster, Thor, Loki, Fo...","[{'label': 'NEGATIVE', 'score': 0.992479979991...",NEGATIVE,0.99248,{'sequence': 'Thor returned to Asgard having d...,"[warrior, nature, comics]","[0.850460946559906, 0.11994372308254242, 0.029..."
3,Loki Laufeyson was the biological son of Laufe...,"[Loki Laufeyson, Laufey, Odin, Loki, Frigga, T...","[{'label': 'NEGATIVE', 'score': 0.828045129776...",NEGATIVE,0.828045,{'sequence': 'Loki Laufeyson was the biologica...,"[warrior, nature, comics]","[0.7911844849586487, 0.16940189898014069, 0.03..."
4,"Transported by the wormhole to Sanctuary, Loki...","[Loki, The Other, Thanos, Loki, Loki, Thanos, ...","[{'label': 'NEGATIVE', 'score': 0.958619356155...",NEGATIVE,0.958619,{'sequence': 'Transported by the wormhole to S...,"[warrior, nature, comics]","[0.7974494099617004, 0.11554315686225891, 0.08..."
5,Asgard was one of the Nine Realms and the home...,"[Thor, Loki, Surtur, Hela]","[{'label': 'POSITIVE', 'score': 0.939087033271...",POSITIVE,0.939087,{'sequence': 'Asgard was one of the Nine Realm...,"[warrior, nature, comics]","[0.5355643033981323, 0.4095378518104553, 0.054..."
6,"Mjølnir (""that which smashes"")[1] was a powerf...","[Hela, Thor, Loki, Thor, Captain America, Than...","[{'label': 'POSITIVE', 'score': 0.996382296085...",POSITIVE,0.996382,"{'sequence': 'Mjølnir (""that which smashes"")[1...","[warrior, nature, comics]","[0.7085233330726624, 0.20401310920715332, 0.08..."
7,Stormbreaker is an enchanted axe used by Thor....,[Thor],"[{'label': 'POSITIVE', 'score': 0.987600088119...",POSITIVE,0.9876,{'sequence': 'Stormbreaker is an enchanted axe...,"[warrior, nature, comics]","[0.6454160809516907, 0.2828575670719147, 0.071..."
8,Doctor Jane Foster is one of the world's leadi...,"[Jane Foster, Thor, Loki, Loki]","[{'label': 'POSITIVE', 'score': 0.994690060615...",POSITIVE,0.99469,{'sequence': 'Doctor Jane Foster is one of the...,"[warrior, nature, comics]","[0.7060877680778503, 0.18263615667819977, 0.11..."
9,She found out of Thor's return through the foo...,"[Thor, Foster, Thor, Thor, Malekith, Foster, T...","[{'label': 'NEGATIVE', 'score': 0.996562004089...",NEGATIVE,0.996562,{'sequence': 'She found out of Thor's return t...,"[warrior, nature, comics]","[0.7804948687553406, 0.12850791215896606, 0.09..."


## 4. Question Answering

In [28]:
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline

model_name = "distilbert-base-cased-distilled-squad"
nlp = pipeline('question-answering', model=model_name, tokenizer=model_name)

Downloading:   0%|          | 0.00/473 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/261M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/436k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Single sentence

In [29]:
QA_input = {
    'question': 'Why is model conversion important?',
    'context': 'The option to convert models between FARM and transformers gives freedom to the user and let people easily switch between frameworks.'
}
res = nlp(QA_input)
res

{'score': 0.29852819442749023,
 'start': 59,
 'end': 132,
 'answer': 'gives freedom to the user and let people easily switch between frameworks'}

Executing on a batch or dataframe

In [30]:
QA_input = {
    'question': 'Who is thor?',
    'context': '. '.join(data['Sentences'].tolist())
}
res = nlp(QA_input)
res

{'score': 0.8303152322769165, 'start': 7529, 'end': 7535, 'answer': 'father'}

In [31]:
QA_input = {
    'question': "What is thor's weapon?",
    'context': '. '.join(data['Sentences'].tolist())
}
res = nlp(QA_input)
res

{'score': 0.641624391078949,
 'start': 4564,
 'end': 4576,
 'answer': 'Stormbreaker'}