This is an example of a zero-shot classifier

In [1]:
from transformers import pipeline
from datasets import Dataset
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


Load dataset, define classes, and take validation subset

In [2]:
df = pd.read_csv('data/GB-GOV-1.csv')
classes_verbalized = df.label.unique()
id2label = {i: label for i, label in enumerate(classes_verbalized)}
label2id = {id2label[i]: i for i in id2label.keys()}
dataset = Dataset.from_pandas(df).class_encode_column("label").train_test_split(
    test_size=0.3,
    stratify_by_column="label",
    shuffle=True,
)

Casting to class labels: 100%|██████████| 3878/3878 [00:00<00:00, 231600.16 examples/s]


Load model as zero-shot pipeline

In [3]:
zeroshot_classifier = pipeline("zero-shot-classification", model="MoritzLaurer/deberta-v3-large-zeroshot-v1.1-all-33")
hypothesis_template = "This example is {}"

Iterate through validation set and score

In [4]:
correct = 0
total = 0
for i, text in enumerate(dataset['test']['text']):
    total += 1
    output = zeroshot_classifier(text, classes_verbalized, hypothesis_template=hypothesis_template, multi_label=False)
    guess = output['labels'][0]
    correct_answer = id2label[dataset['test']['label'][i]]
    if guess == correct_answer:
        correct += 1
    if i % 10 == 0:
        print("{}.{}\nAnswer: {}\nGuess: {}\n\n".format(i, text, correct_answer, guess))
    if i == 100:
        break

0.PROCOFSERVICES and P0110 for Research inspired Water Policy
Answer: Not related to climate
Guess: Not related to climate


10.Part Time Secondment of Climate Finance Adviser into Agence Francaise de Developpment Office Jakarta
Answer: Related to climate
Guess: Related to climate


20.NOTFORPROFITORG and P0209 for ICG Programmatic Funding
Answer: Not related to climate
Guess: Not related to climate


30.Funding for Treatment Chemicals through UNICEF
Answer: Not related to climate
Guess: Not related to climate


40.Global Environment Facility 7th replenishment - Environment
Answer: Not related to climate
Guess: Related to climate


50.Climate Change and Environmental Governance Coordination
Answer: Related to climate
Guess: Related to climate


60.CSSF State Formation Strand
Answer: Not related to climate
Guess: Not related to climate


70.Frontline Diplomatic Enabling Activity in Haiti
Answer: Not related to climate
Guess: Not related to climate


80.World Bank Impact Evaluation Suppo

Print accuracy

In [5]:
accuracy = correct / total
print(
    "{}%".format(
        round(accuracy * 100, 2)
    )
)

75.25%
