In [1]:
import torch

import transformers
from transformers import pipeline
from transformers import AutoModelForSequenceClassification, AutoTokenizer

import numpy as np
import pandas as pd


In [None]:
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

In [3]:
sequences_to_classify = ["one day I will see the world", "best steak in america"]
candidate_labels = ['travel', 'cooking', 'dancing', 'exploration']
classifier(sequences_to_classify, candidate_labels)

[{'sequence': 'one day I will see the world',
  'labels': ['travel', 'exploration', 'dancing', 'cooking'],
  'scores': [0.7957560420036316,
   0.1993318647146225,
   0.0026212281081825495,
   0.0022907459642738104]},
 {'sequence': 'best steak in america',
  'labels': ['cooking', 'exploration', 'travel', 'dancing'],
  'scores': [0.7510271668434143,
   0.1638369858264923,
   0.06331372261047363,
   0.02182212844491005]}]

In [4]:
type(classifier)

transformers.pipelines.zero_shot_classification.ZeroShotClassificationPipeline

In [4]:
results = classifier(sequences_to_classify, candidate_labels, multi_label=True)

In [5]:
results

[{'sequence': 'one day I will see the world',
  'labels': ['travel', 'exploration', 'dancing', 'cooking'],
  'scores': [0.994511067867279,
   0.9383884072303772,
   0.005706187337636948,
   0.001819287077523768]},
 {'sequence': 'best steak in america',
  'labels': ['cooking', 'exploration', 'travel', 'dancing'],
  'scores': [0.5870057344436646,
   0.007772187702357769,
   0.0012448065681383014,
   9.23419720493257e-05]}]

In [14]:
df = pd.DataFrame(0, index=sequences_to_classify, columns=candidate_labels)

nlabels = len(candidate_labels)
for r in results:
    for i in range(nlabels):
        df.loc[r['sequence'], r['labels'][i]] = r['scores'][i]

df

Unnamed: 0,travel,cooking,dancing,exploration
one day I will see the world,0.994511,0.001819,0.005706,0.938388
best steak in america,0.001245,0.587006,9.2e-05,0.007772


In [7]:
# manual step by step
nli_model = AutoModelForSequenceClassification.from_pretrained('facebook/bart-large-mnli')
tokenizer = AutoTokenizer.from_pretrained('facebook/bart-large-mnli')

In [21]:
hypotheses = [f'This example is {l}.' for l in candidate_labels]
premises = [sequence] * len(hypotheses)

x = tokenizer(premises, hypotheses, return_tensors='pt',
                     truncation_strategy='only_first')

x



{'input_ids': tensor([[   0, 1264,  183,   38,   40,  192,    5,  232,    2,    2,  713, 1246,
           16, 1504,    4,    2],
        [   0, 1264,  183,   38,   40,  192,    5,  232,    2,    2,  713, 1246,
           16, 6836,    4,    2],
        [   0, 1264,  183,   38,   40,  192,    5,  232,    2,    2,  713, 1246,
           16, 7950,    4,    2],
        [   0, 1264,  183,   38,   40,  192,    5,  232,    2,    2,  713, 1246,
           16, 6942,    4,    2]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}

In [29]:
y = nli_model(**x)
logits = y.logits
logits

tensor([[-3.0856,  1.0612,  2.1139],
        [ 2.5710,  1.2230, -3.7365],
        [ 1.5588,  1.7296, -3.6017],
        [-1.9937,  1.3917,  0.7296]], grad_fn=<AddmmBackward0>)

In [31]:
# we throw away "neutral" (dim 1) and take the probability of
# "entailment" (2) as the probability of the label being true 
e_c_logits = logits[:, [0, 2]]
e_c_logits.softmax(dim=1)

tensor([[0.0055, 0.9945],
        [0.9982, 0.0018],
        [0.9943, 0.0057],
        [0.0616, 0.9384]], grad_fn=<SoftmaxBackward0>)