<a href="https://colab.research.google.com/github/akdeniz27/Huggingface_Evaluate_for_zero-shot-text-classification/blob/main/HF_Evaluator_Text_Classification_TTC4900_ipynb_adl%C4%B1_dosyan%C4%B1n_kopyas%C4%B1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%%capture
!pip install transformers
!pip install evaluate
!pip install datasets
!pip install sentencepiece

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
from evaluate import evaluator
from datasets import load_dataset, get_dataset_split_names
import sentencepiece
from sklearn.pipeline import Pipeline
import pandas as pd

In [None]:
class ScikitEvalPipeline:
    def __init__(self, pipeline, labels, template):
        self.pipeline = pipeline
        self.task = "text-classification"
        self.labels = labels
        self.template = template

    def __call__(self, input_text, **kwargs):
        result_zs = self.pipeline(sequences=input_text, candidate_labels=self.labels, hypothesis_template=self.template)
        return [{"label": r["labels"][0], "score": r["scores"][0]} for r in result_zs]

    # For zero-shot text classification pipeline, default hypothesis_template is "This example is {}".

In [None]:
import sys
import csv
maxInt = sys.maxsize

while True:
    # decrease the maxInt value by factor 10 
    # as long as the OverflowError occurs.

    try:
        csv.field_size_limit(maxInt)
        break
    except OverflowError:
        maxInt = int(maxInt/10)

In [None]:
dataset_name = "ttc4900"

In [None]:
get_dataset_split_names(dataset_name)

Downloading builder script:   0%|          | 0.00/2.11k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/1.27k [00:00<?, ?B/s]

['train']

In [None]:
split_type = "train"

In [None]:
dataset = load_dataset(dataset_name, split=split_type)

Downloading and preparing dataset ttc4900/ttc4900 (download: 10.14 MiB, generated: 10.15 MiB, post-processed: Unknown size, total: 20.28 MiB) to /root/.cache/huggingface/datasets/ttc4900/ttc4900/1.0.0/991f49f6526d95d6da4dd4b668c376cfe05c1f0522234a44c5fd98e89b3b0224...


Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/3.44M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/4900 [00:00<?, ? examples/s]

Dataset ttc4900 downloaded and prepared to /root/.cache/huggingface/datasets/ttc4900/ttc4900/1.0.0/991f49f6526d95d6da4dd4b668c376cfe05c1f0522234a44c5fd98e89b3b0224. Subsequent calls will reuse this data.


In [None]:
# Model Repository on huggingface.co
models = [
    ("vicgalle/xlm-roberta-large-xnli-anli", "zero-shot"),
    ("joeddav/xlm-roberta-large-xnli", "zero-shot"),
    ("emrecan/bert-base-turkish-cased-allnli_tr", "zero-shot"),
    ("MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7", "zero-shot")
]

In [None]:
label_dict = {"politics": 0, "world": 1, "economy": 2, "culture": 3, "health": 4, "sport": 5, "technology": 6}
candidate_labels = ["politics", "world", "economy", "culture", "health", "sport", "technology"]
hypothesis_template = "This example is {}." # default for zero-shot text classification pipeline

In [None]:
label_dict = {"siyaset": 0, "dünya": 1, "ekonomi": 2, "kültür": 3, "sağlık": 4, "spor": 5, "teknoloji": 6}
candidate_labels = ["siyaset", "dünya", "ekonomi", "kültür", "sağlık", "spor", "teknoloji"]
hypothesis_template = "Bu yazı {} konusundadır." # "Bu metin {} kategorisine aittir."

In [None]:
results = []
for model in models:
    print(f"Model: {model} \n")
    # Load Model and Tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model[0])
    checkpoint = AutoModelForSequenceClassification.from_pretrained(model[0])

    # Create a pipeline for text classification
    if model[1] == "fine-tuned":
        text_clf = pipeline("text-classification", model=checkpoint, tokenizer=tokenizer, device=0)
    elif model[1] == "zero-shot":
        zero_shot_pipe = pipeline("zero-shot-classification", model=checkpoint, tokenizer=tokenizer, device=0)
        text_clf = ScikitEvalPipeline(zero_shot_pipe, candidate_labels, hypothesis_template)
    
    # Test pipeline
    example = ["Galatasaray'dan sezon başında ayrılan ve Rayo Vallecano'nun yolunu tutan Radamel Falcao, İspanya'da şov yapmaya devam ediyor. Kolombiyalı yıldızın, Barcelona karşısında attığı gol maça damga vurdu."]
    result = text_clf(example)
    print(f"Result of {model} for example: {result} \n")

    # define evaluator
    task_evaluator = evaluator("text-classification")

    # run baseline
    results.append(task_evaluator.compute(
        model_or_pipeline=text_clf,
        data=dataset,
        metric="accuracy",
        input_column = "text",
        label_column = "category",
        label_mapping= label_dict
        )
    )


Model: ('vicgalle/xlm-roberta-large-xnli-anli', 'zero-shot') 

Result of ('vicgalle/xlm-roberta-large-xnli-anli', 'zero-shot') for example: [{'label': 'spor', 'score': 0.9873095154762268}] 

Model: ('joeddav/xlm-roberta-large-xnli', 'zero-shot') 



Some weights of the model checkpoint at joeddav/xlm-roberta-large-xnli were not used when initializing XLMRobertaForSequenceClassification: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Result of ('joeddav/xlm-roberta-large-xnli', 'zero-shot') for example: [{'label': 'dünya', 'score': 0.5395767688751221}] 

Model: ('emrecan/bert-base-turkish-cased-allnli_tr', 'zero-shot') 

Result of ('emrecan/bert-base-turkish-cased-allnli_tr', 'zero-shot') for example: [{'label': 'spor', 'score': 0.8837675452232361}] 

Model: ('MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7', 'zero-shot') 

Result of ('MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7', 'zero-shot') for example: [{'label': 'spor', 'score': 0.9702922105789185}] 



In [None]:
print(f"candidate_labels = {candidate_labels} \nhypothesis_template = {hypothesis_template} \nsplit_type = {split_type}")
df = pd.DataFrame(results, index=models)
df[["accuracy", "total_time_in_seconds", "samples_per_second", "latency_in_seconds"]]

candidate_labels = ['siyaset', 'dünya', 'ekonomi', 'kültür', 'sağlık', 'spor', 'teknoloji'] 
hypothesis_template = Bu metin {} kategorisine aittir. 
split_type = train


Unnamed: 0,accuracy,total_time_in_seconds,samples_per_second,latency_in_seconds
"(vicgalle/xlm-roberta-large-xnli-anli, zero-shot)",0.739388,1822.072382,2.689246,0.371852
"(joeddav/xlm-roberta-large-xnli, zero-shot)",0.480408,1655.903693,2.959109,0.33794
"(emrecan/bert-base-turkish-cased-allnli_tr, zero-shot)",0.445102,495.019287,9.898604,0.101024
"(MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7, zero-shot)",0.667347,1233.554487,3.972261,0.251746


In [None]:
print(f"candidate_labels = {candidate_labels} \nhypothesis_template = {hypothesis_template} \nsplit_type = {split_type}")
df = pd.DataFrame(results, index=models)
df[["accuracy", "total_time_in_seconds", "samples_per_second", "latency_in_seconds"]]

candidate_labels = ['siyaset', 'dünya', 'ekonomi', 'kültür', 'sağlık', 'spor', 'teknoloji'] 
hypothesis_template = Bu yazı {} konusundadır. 
split_type = train


Unnamed: 0,accuracy,total_time_in_seconds,samples_per_second,latency_in_seconds
"(vicgalle/xlm-roberta-large-xnli-anli, zero-shot)",0.711837,1650.881359,2.968112,0.336915
"(joeddav/xlm-roberta-large-xnli, zero-shot)",0.541633,1650.371826,2.969028,0.336811
"(emrecan/bert-base-turkish-cased-allnli_tr, zero-shot)",0.47,496.080986,9.877419,0.101241
"(MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7, zero-shot)",0.669796,1234.813256,3.968211,0.252003
