In [1]:
import pandas as pd
import numpy as np

In [2]:
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

In [3]:
train.iloc[0]

text        I am still waiting on my card?
category                      card_arrival
Name: 0, dtype: object

In [4]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

model_id = 'philschmid/BERT-Banking77'
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSequenceClassification.from_pretrained(model_id)
classifier = pipeline('text-classification', tokenizer=tokenizer, model=model)
classifier('What is the base of the exchange rates?')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/313 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/5.95k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

  return self.fget.__get__(instance, owner)()


[{'label': 'exchange_rate', 'score': 0.9740924835205078}]

In [5]:
# Run the inference code on the entire testing set
results = []
for index, row in test.iterrows():
    text = row['text']
    prediction = classifier(text)
    results.append({
        'text': text,  # Storing the text for reference
        'true_category': row['category'],  # Storing the true category
        'predicted_label': prediction[0]['label'],  # Storing the predicted label
        'score': prediction[0]['score']  # Storing the score of the prediction
    })

In [6]:
# Convert the results to a DataFrame
results_df = pd.DataFrame(results)

# Calculate whether the prediction was correct
results_df['is_correct'] = results_df['true_category'] == results_df['predicted_label']

# Calculate the accuracy per category
accuracy_per_category = results_df.groupby('true_category')['is_correct'].mean()

# Identify rows with incorrect predictions
incorrect_predictions = results_df[results_df['is_correct'] == False]

In [7]:
incorrect_predictions.head(10)

Unnamed: 0,text,true_category,predicted_label,score,is_correct
3,Is there a way to know when my card will arrive?,card_arrival,card_delivery_estimate,0.944849,False
5,When will I get my card?,card_arrival,card_delivery_estimate,0.967209,False
11,How long does a card delivery take?,card_arrival,card_delivery_estimate,0.967351,False
21,Status of the card I ordered.,card_arrival,lost_or_stolen_card,0.374373,False
32,How do I know when my card will arrive?,card_arrival,card_delivery_estimate,0.96585,False
93,Is it a good time to exchange?,exchange_rate,exchange_via_app,0.888302,False
138,Why am I being charged more ?,card_payment_wrong_exchange_rate,card_payment_fee_charged,0.903134,False
156,How can I check the exchange rate applied to m...,card_payment_wrong_exchange_rate,exchange_rate,0.357621,False
173,Where did this fee come from?,extra_charge_on_statement,card_payment_fee_charged,0.964972,False
178,Why are there so many fees on my statement?,extra_charge_on_statement,card_payment_fee_charged,0.469085,False


In [8]:
# All true categories which don't match with their predicted values
incorrect_predictions['true_category'].unique()

array(['card_arrival', 'exchange_rate',
       'card_payment_wrong_exchange_rate', 'extra_charge_on_statement',
       'pending_cash_withdrawal', 'fiat_currency_support',
       'card_delivery_estimate', 'automatic_top_up', 'card_not_working',
       'exchange_via_app', 'lost_or_stolen_card', 'pin_blocked',
       'contactless_not_working', 'top_up_by_bank_transfer_charge',
       'pending_top_up', 'cancel_transfer', 'top_up_limits',
       'wrong_amount_of_cash_received', 'card_payment_fee_charged',
       'transfer_not_received_by_recipient',
       'supported_cards_and_currencies', 'getting_virtual_card',
       'card_acceptance', 'top_up_reverted',
       'balance_not_updated_after_cheque_or_cash_deposit',
       'card_payment_not_recognised', 'why_verify_identity',
       'unable_to_verify_identity', 'get_physical_card',
       'visa_or_mastercard', 'topping_up_by_card',
       'disposable_card_limits', 'compromised_card', 'atm_support',
       'direct_debit_payment_not_recognised

In [9]:
# Value counts of those true categories which don't match with their predicted values
incorrect_predictions['true_category'].value_counts()

topping_up_by_card                         11
declined_transfer                          11
balance_not_updated_after_bank_transfer     9
pin_blocked                                 7
pending_transfer                            7
                                           ..
unable_to_verify_identity                   1
get_physical_card                           1
atm_support                                 1
getting_spare_card                          1
lost_or_stolen_phone                        1
Name: true_category, Length: 64, dtype: int64

In [10]:
category_counts = incorrect_predictions['true_category'].value_counts()

# Filter categories with more than 7 incorrect predictions
top_categories = category_counts[category_counts > 7].index

# Extract rows from the incorrect predictions that have these categories
err_df = incorrect_predictions[incorrect_predictions['true_category'].isin(top_categories)]

In [11]:
err_df

Unnamed: 0,text,true_category,predicted_label,score,is_correct
1324,How do I top up my card using your app? I'm ne...,topping_up_by_card,apple_pay_or_google_pay,0.372667,False
1326,Can I top up using my car?,topping_up_by_card,top_up_limits,0.481315,False
1330,I followed the instructions to transfer money ...,topping_up_by_card,top_up_reverted,0.559969,False
1331,Where's the money that got charged to my card?...,topping_up_by_card,transfer_not_received_by_recipient,0.780983,False
1332,how can i top up?,topping_up_by_card,top_up_limits,0.917691,False
1333,I am missing some funds from my account - I tr...,topping_up_by_card,top_up_reverted,0.559718,False
1337,"I did a top-up, but I'm not seeing it in my wa...",topping_up_by_card,pending_top_up,0.335356,False
1338,"i know i entered the right info, but my top up...",topping_up_by_card,pending_top_up,0.619046,False
1340,Is there a way to transfer funds directly from...,topping_up_by_card,card_linking,0.210411,False
1351,Why is my money gone right when I attempted to...,topping_up_by_card,top_up_failed,0.634921,False
