In [None]:
from transformers import pipeline

In [None]:
# define the pipeline
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") 

In [None]:
# sample 1
candidate_labels_sentiment = ["positive", "negative"]

text1 = "I hate this movie and I don't recommend people to watch this."
classifier(sequences=text1, 
           candidate_labels=candidate_labels_sentiment,
           multi_label=True)

In [None]:
# sample 2
text2 = "I really love this movie!"
classifier(sequences=text2, 
           candidate_labels=candidate_labels_sentiment,
           multi_label=True)

In [None]:
# sample 3
candidate_labels_news = ["world", "sports", "business", "sci/tech"]

text3 = "Liverpool were not given a penalty by VAR in the final minute of their pulsating 1-1 draw against Manchester City on Super Sunday."
classifier(sequences=text3, 
           candidate_labels=candidate_labels_news,
           multi_label=True)

In [None]:
# sample 4
text4 = "Microsoft has expanded the availability of its AI-powered cybersecurity assistant, Copilot for Security, using the power of generative AI (Gen AI)"
classifier(sequences=text4, 
           candidate_labels=candidate_labels_news,
           multi_label=True)

In [None]:
# Evaluation
from datasets import load_dataset, load_metric
import numpy as np
import math
from sklearn.metrics import classification_report

### IMDB: Sentiment Analysis

In [None]:
dataset = load_dataset('imdb')

In [None]:
print(dataset["train"][0])

In [None]:
from tqdm.notebook import tqdm

candidate_labels = ["positive", "negative"]
predictions = []
for offset in tqdm(range(math.ceil(len(dataset["test"])/16))):
    preds = classifier([dataset["test"][16*offset+i]["text"] for i in range(16) if 16*offset+i<len(dataset["test"])], candidate_labels)
    pred_labels = [pred["labels"][np.argmax(pred["scores"])] for pred in preds]
    predictions.extend([0 if pred_label=="negative" else 1 for pred_label in pred_labels])

In [None]:
print(classification_report([x["label"] for x in dataset["test"]], predictions, digits=4))

### AG_News: News Categorization

In [None]:
dataset = load_dataset('ag_news')

In [None]:
candidate_labels = ["world", "sports", "business", "sci/tech"]
predictions = []
for offset in tqdm(range(math.ceil(len(dataset["test"])/16))):
    preds = classifier([dataset["test"][16*offset+i]["text"] for i in range(16) if 16*offset+i<len(dataset["test"])], candidate_labels)
    pred_labels = [pred["labels"][np.argmax(pred["scores"])] for pred in preds]
    predictions.extend([candidate_labels.index(pred_label) for pred_label in pred_labels])

In [None]:
print(classification_report([x["label"] for x in dataset["test"]], predictions, digits=4))