# Zero-shot classification 


In [1]:
from transformers import pipeline
import pandas as pd
from datasets import load_dataset
from sentence_transformers import SentenceTransformer

## Prepare Data

In [12]:
sample_size = 100  # Running model is resourceful, so we limit sample size

In [2]:
dataset = load_dataset("PolyAI/banking77", revision="main") # taking the data from the main branch
train_data = pd.DataFrame(dataset['train'])
test_data = pd.DataFrame(dataset['test'])

label_names = [
            "activate_my_card",
            "age_limit",
            "apple_pay_or_google_pay",
            "atm_support",
            "automatic_top_up",
            "balance_not_updated_after_bank_transfer",
            "balance_not_updated_after_cheque_or_cash_deposit",
            "beneficiary_not_allowed",
            "cancel_transfer",
            "card_about_to_expire",
            "card_acceptance",
            "card_arrival",
            "card_delivery_estimate",
            "card_linking",
            "card_not_working",
            "card_payment_fee_charged",
            "card_payment_not_recognised",
            "card_payment_wrong_exchange_rate",
            "card_swallowed",
            "cash_withdrawal_charge",
            "cash_withdrawal_not_recognised",
            "change_pin",
            "compromised_card",
            "contactless_not_working",
            "country_support",
            "declined_card_payment",
            "declined_cash_withdrawal",
            "declined_transfer",
            "direct_debit_payment_not_recognised",
            "disposable_card_limits",
            "edit_personal_details",
            "exchange_charge",
            "exchange_rate",
            "exchange_via_app",
            "extra_charge_on_statement",
            "failed_transfer",
            "fiat_currency_support",
            "get_disposable_virtual_card",
            "get_physical_card",
            "getting_spare_card",
            "getting_virtual_card",
            "lost_or_stolen_card",
            "lost_or_stolen_phone",
            "order_physical_card",
            "passcode_forgotten",
            "pending_card_payment",
            "pending_cash_withdrawal",
            "pending_top_up",
            "pending_transfer",
            "pin_blocked",
            "receiving_money",
            "Refund_not_showing_up",
            "request_refund",
            "reverted_card_payment?",
            "supported_cards_and_currencies",
            "terminate_account",
            "top_up_by_bank_transfer_charge",
            "top_up_by_card_charge",
            "top_up_by_cash_or_cheque",
            "top_up_failed",
            "top_up_limits",
            "top_up_reverted",
            "topping_up_by_card",
            "transaction_charged_twice",
            "transfer_fee_charged",
            "transfer_into_account",
            "transfer_not_received_by_recipient",
            "transfer_timing",
            "unable_to_verify_identity",
            "verify_my_identity",
            "verify_source_of_funds",
            "verify_top_up",
            "virtual_card_not_working",
            "visa_or_mastercard",
            "why_verify_identity",
            "wrong_amount_of_cash_received",
            "wrong_exchange_rate_for_cash_withdrawal"]

train_data["label_name"] = train_data["label"].apply(lambda x: label_names[x])
test_data["label_name"] = test_data["label"].apply(lambda x: label_names[x])
eval_data: pd.DataFrame = train_data.sample(sample_size)

## Import Model
More about this model: https://huggingface.co/MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli


In [9]:
# Initialize classificator
classifier = pipeline("zero-shot-classification", model="MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli")

## Run Predictions

In [10]:
# Run prediction
predictions = []
def get_predicted_label(prediction):
    highest_score_idx = prediction['scores'].index(max(prediction['scores']))
    return prediction['labels'][highest_score_idx]

predictions: list = classifier(eval_data['text'].tolist(), label_names)
predicted_labels = list(map(get_predicted_label, predictions))

eval_data['pred'] = predicted_labels
percentage_correct = (eval_data['label_name'] == eval_data['pred']).mean()
print(percentage_correct)

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


0.33


In [11]:
from sklearn.metrics import classification_report
print(classification_report(eval_data['label_name'], eval_data['pred']))

                                                  precision    recall  f1-score   support

                           Refund_not_showing_up       1.00      0.50      0.67         2
                                activate_my_card       0.00      0.00      0.00         3
                                       age_limit       0.00      0.00      0.00         1
                                     atm_support       0.00      0.00      0.00         1
                                automatic_top_up       1.00      0.50      0.67         2
         balance_not_updated_after_bank_transfer       1.00      1.00      1.00         1
balance_not_updated_after_cheque_or_cash_deposit       0.75      0.60      0.67         5
                                 cancel_transfer       0.00      0.00      0.00         1
                            card_about_to_expire       0.67      1.00      0.80         2
                                 card_acceptance       0.00      0.00      0.00         0
         

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
