In [1]:
import tensorflow as tf
import numpy as np 
import pandas as pd

In [2]:
from transformers import pipeline

classifier = pipeline("zero-shot-classification", model="MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli")

  from .autonotebook import tqdm as notebook_tqdm
Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFDebertaV2ForSequenceClassification: ['deberta.embeddings.position_ids']
- This IS expected if you are initializing TFDebertaV2ForSequenceClassification from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDebertaV2ForSequenceClassification from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFDebertaV2ForSequenceClassification were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDebertaV2ForSequenceClassification for predictions without further training.


In [3]:
df = pd.read_excel('data/Book1.xlsx')

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 353 entries, 0 to 352
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   text       353 non-null    object
 1   Sentiment  353 non-null    object
 2   Category   353 non-null    object
dtypes: object(3)
memory usage: 8.4+ KB


In [5]:
df.head(5)

Unnamed: 0,text,Sentiment,Category
0,The SMS reminders from CreditCardCo are so va...,Negative,Reminder System
1,The credit card rewards program from CreditCa...,Negative,Reward Programme
2,The text message from CreditCardCo notifying ...,Negative,Miscellaneous
3,"Hey CreditCardCo, integrating payment reminde...",Neutral,Reminder System
4,Just discovered that CreditCardCo's so-called...,Negative,MisSelling


In [6]:
candidate_labels = ['Reminder System',
'Reward Programme',
'Miscellaneous',
'MisSelling',
'Channel of Communication',
'Inappropriate pricing',
'Transparency of communication',
'Ineffectual Customer Service'
]
# Create an empty DataFrame to store the results
results_df = pd.DataFrame(columns=['text'] + candidate_labels + ['Category'])

for risk in df['text']:
    sequence_to_classify = risk
    classification = classifier(sequence_to_classify, candidate_labels, multi_label=False)

    # Sort labels based on scores in descending order
    sorted_labels = [label for _, label in sorted(zip(classification['scores'], classification['labels']), reverse=True)]

    # Create a dictionary for the current risk and its sorted labels
    result = {'text': risk}
    for label in candidate_labels:
        result[label] = classification['scores'][classification['labels'].index(label)]

    # Convert the result dictionary to a DataFrame and append it to results_df
    result_df = pd.DataFrame(result, index=[0])
    results_df = pd.concat([results_df, result_df], ignore_index=True)

print(results_df)

                                                  text  Reminder System  \
0     The SMS reminders from CreditCardCo are so va...         0.473942   
1     The credit card rewards program from CreditCa...         0.003299   
2     The text message from CreditCardCo notifying ...         0.015354   
3     Hey CreditCardCo, integrating payment reminde...         0.600265   
4     Just discovered that CreditCardCo's so-called...         0.000569   
..                                                 ...              ...   
348  BarclaysUK just written to my wife to say they...         0.002000   
349  JetBlue Applied for a JetBlue plus Barclays ca...         0.001229   
350  BarclaysUKHelp Your staff has cancelled my eld...         0.000755   
351  JenniButtonUSA SarahLiz2006 adamundefined I’m ...         0.008721   
352  Hello there Thanks for your post! I'm very sor...         0.001767   

     Reward Programme  Miscellaneous  MisSelling  Channel of Communication  \
0            0.001478

In [7]:
# Add a new column 'output' with the name of the column with the highest value
results_df['output'] = results_df[candidate_labels].idxmax(axis=1)

# Display the updated dataframe
print(results_df)


                                                  text  Reminder System  \
0     The SMS reminders from CreditCardCo are so va...         0.473942   
1     The credit card rewards program from CreditCa...         0.003299   
2     The text message from CreditCardCo notifying ...         0.015354   
3     Hey CreditCardCo, integrating payment reminde...         0.600265   
4     Just discovered that CreditCardCo's so-called...         0.000569   
..                                                 ...              ...   
348  BarclaysUK just written to my wife to say they...         0.002000   
349  JetBlue Applied for a JetBlue plus Barclays ca...         0.001229   
350  BarclaysUKHelp Your staff has cancelled my eld...         0.000755   
351  JenniButtonUSA SarahLiz2006 adamundefined I’m ...         0.008721   
352  Hello there Thanks for your post! I'm very sor...         0.001767   

     Reward Programme  Miscellaneous  MisSelling  Channel of Communication  \
0            0.001478

In [8]:
results_df.head()

Unnamed: 0,text,Reminder System,Reward Programme,Miscellaneous,MisSelling,Channel of Communication,Inappropriate pricing,Transparency of communication,Ineffectual Customer Service,Category,output
0,The SMS reminders from CreditCardCo are so va...,0.473942,0.001478,0.009313,0.00472,0.023786,0.000707,0.001407,0.484647,,Ineffectual Customer Service
1,The credit card rewards program from CreditCa...,0.003299,0.765192,0.04378,0.011599,0.002752,0.003639,0.003616,0.166123,,Reward Programme
2,The text message from CreditCardCo notifying ...,0.015354,0.007836,0.025312,0.011044,0.165733,0.013699,0.019644,0.741378,,Ineffectual Customer Service
3,"Hey CreditCardCo, integrating payment reminde...",0.600265,0.025903,0.134723,0.022479,0.075658,0.014682,0.093894,0.032396,,Reminder System
4,Just discovered that CreditCardCo's so-called...,0.000569,0.001752,0.008983,0.959757,0.000915,0.026779,0.000459,0.000786,,MisSelling


In [9]:
results_df.to_csv('output/Book1.csv', index=False)