In [2]:
!pip install --quiet transformers evaluate datasets

In [3]:
!huggingface-cli login --token

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [4]:
from collections import Counter

def compute_metrics(ground_truth, predictions):
    unique_labels = set(ground_truth + predictions)
    true_positives = Counter()
    false_positives = Counter()
    false_negatives = Counter()

    for true_label, predicted_label in zip(ground_truth, predictions):
        if true_label == predicted_label:
            true_positives[true_label] += 1
        else:
            false_positives[predicted_label] += 1
            false_negatives[true_label] += 1

    precision = {}
    recall = {}
    f1_score = {}

    for label in unique_labels:
        precision[label] = true_positives[label] / (true_positives[label] + false_positives[label]) if (true_positives[label] + false_positives[label]) else 0
        recall[label] = true_positives[label] / (true_positives[label] + false_negatives[label]) if (true_positives[label] + false_negatives[label]) else 0
        f1_score[label] = 2 * (precision[label] * recall[label]) / (precision[label] + recall[label]) if (precision[label] + recall[label]) else 0

    macro_precision = sum(precision.values()) / len(unique_labels)
    macro_recall = sum(recall.values()) / len(unique_labels)
    macro_f1_score = sum(f1_score.values()) / len(unique_labels)
    accuracy = sum(true_positives.values()) / len(ground_truth)

    return accuracy, macro_precision, macro_recall, macro_f1_score

# Example usage:
ground_truth = ['a', 'b', 'c', 'a', 'b', 'c', "d", "d"]
predictions = ['a', 'b', 'c', 'c', 'b', 'a', "e", "d"]
accuracy, precision, recall, f1_score = compute_metrics(ground_truth, predictions)
print(f"Accuracy: {accuracy}\nPrecision: {precision}\nRecall: {recall}\nF1 Score: {f1_score}")

Accuracy: 0.625
Precision: 0.6
Recall: 0.5
F1 Score: 0.5333333333333333


In [5]:
import pandas as pd
df = pd.read_excel('test data annotated far from msa sept 2024 v2.xlsx')
ground_truths = df['intent'].tolist()
df['darija'] = df['darija'].astype(str)
ar = df['arabic'].tolist()
dar = df['darija'].tolist()

In [6]:
df["different from msa from 1 to 3"] = df["different from msa from 1 to 3"].astype(int)

In [7]:
score_of_differences_dar_msa = df["different from msa from 1 to 3"].tolist()

In [8]:
df

Unnamed: 0,arabic,darija,different from msa from 1 to 3,intent
0,هذا رائع جدا,هادشي واعر,3,general_positive_feedback
1,كيف يمكنني أن أضع طابعة جلدية ؟,كيفاش نصلح طابعة عالقة؟,2,oodoos
2,أيمكنني إلغاء أمر للحصول على شهادة إيداع جديدة ؟,واش نقدر نلغي طلبية لشهادة ايداع جديدة؟,2,cancel_order
3,معاملات مشكوك فيها على حسابي.,عمليات مشبوهة فحسابي.,1,compromised_card
4,تم تأجيل المباراة ، ماذا علي أن أفعل ؟,تأجل الماتش، شنو لازم ندير؟,3,get_refund
...,...,...,...,...
357,أيمكنني أن أسأل عميلا ما هي ساعات دعم العملاء ؟,تقدر تسول الوكيل على ساعات دعم الزبائن؟,2,contact_customer_service
358,كيف لي أن أعرف إن كانت سياسة التأمين الخاصة بي...,كيفاش نعرف بلي تأميني مازال ساري؟,2,insurance
359,أريد تغيير المعلومات عن بروفايلي ، كيف يمكنني ...,بغيت نعدل المعلومات فبروفيل ديالي، كيفاش نديرها؟,2,edit_account
360,أيمكنك على الأقل أن تحاول أن تعالج شواغلي بدلا...,تقدر تحاول تواجه مشاكلي بلا ما تعطيني جوابات ج...,1,general_negative_feedback


# Bert like model Finetuning

In [9]:
from transformers import pipeline
from tqdm import tqdm
model = pipeline('text-classification', model="AbderrahmanSkiredj1/BERTouch")
ar_classified = model(ar)
dar_classified = model(dar)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/2.03k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/541M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.52k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/751k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.77M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


In [10]:
intent_pred_ara = ar_classified
intent_pred_ara = [x['label'] for x in intent_pred_ara]
intent_pred_dar = dar_classified
intent_pred_dar = [x['label'] for x in intent_pred_dar]
acc_msa, prec_msa, rec_msa, f1_msa = compute_metrics(ground_truths, intent_pred_ara)
acc_dar, prec_dar, rec_dar, f1_dar = compute_metrics(ground_truths, intent_pred_dar)
print(acc_msa, prec_msa, rec_msa, f1_msa)
print(acc_dar, prec_dar, rec_dar, f1_dar)

0.9585635359116023 0.9575797466422467 0.9550175518925519 0.9555177498683695
0.9806629834254144 0.9798349628312862 0.9786095848595849 0.9786949074595171


In [11]:
from sklearn.metrics import classification_report

In [16]:
report_ar = classification_report(ground_truths, intent_pred_ara, output_dict=True)
report_dar = classification_report(ground_truths, intent_pred_dar, output_dict=True)

# Convert dictionaries to DataFrames
df_ar = pd.DataFrame(report_ar).transpose()
df_dar = pd.DataFrame(report_dar).transpose()

# Remove the 'accuracy' row for sorting purposes
df_ar = df_ar.drop(['accuracy'])
df_dar = df_dar.drop(['accuracy'])

# Sort intents by F1-score
sorted_ar = df_ar.sort_values(by='f1-score', ascending=False)
sorted_dar = df_dar.sort_values(by='f1-score', ascending=False)

In [17]:
sorted_ar

Unnamed: 0,precision,recall,f1-score,support
create_account,1.0,1.0,1.0,19.0
delete_account,1.0,1.0,1.0,19.0
cancel_order,1.0,1.0,1.0,15.0
cancel_transfer,1.0,1.0,1.0,13.0
general_positive_feedback,1.0,1.0,1.0,13.0
change_order,1.0,1.0,1.0,18.0
insurance,1.0,1.0,1.0,13.0
contact_customer_service,1.0,1.0,1.0,19.0
edit_account,1.0,1.0,1.0,14.0
get_refund,1.0,1.0,1.0,18.0


In [18]:
sorted_dar

Unnamed: 0,precision,recall,f1-score,support
create_account,1.0,1.0,1.0,19.0
get_refund,1.0,1.0,1.0,18.0
general_positive_feedback,1.0,1.0,1.0,13.0
general_negative_feedback,1.0,1.0,1.0,14.0
lost_or_stolen_phone,1.0,1.0,1.0,12.0
age_limit,1.0,1.0,1.0,12.0
edit_account,1.0,1.0,1.0,14.0
delete_account,1.0,1.0,1.0,19.0
deactivate_my_card,1.0,1.0,1.0,13.0
get_invoice,1.0,1.0,1.0,20.0


In [None]:
#for arabic, besides idoos, compromised card scores are low, so then if an utterance is compromised card, it is often misclassified as what?


In [12]:
df['intent_pred_ara'] = intent_pred_ara
df['intent_pred_dar'] = intent_pred_dar
df

Unnamed: 0,arabic,darija,different from msa from 1 to 3,intent,intent_pred_ara,intent_pred_dar
0,هذا رائع جدا,هادشي واعر,3,general_positive_feedback,general_positive_feedback,general_positive_feedback
1,كيف يمكنني أن أضع طابعة جلدية ؟,كيفاش نصلح طابعة عالقة؟,2,oodoos,oodoos,oodoos
2,أيمكنني إلغاء أمر للحصول على شهادة إيداع جديدة ؟,واش نقدر نلغي طلبية لشهادة ايداع جديدة؟,2,cancel_order,cancel_order,cancel_order
3,معاملات مشكوك فيها على حسابي.,عمليات مشبوهة فحسابي.,1,compromised_card,compromised_card,compromised_card
4,تم تأجيل المباراة ، ماذا علي أن أفعل ؟,تأجل الماتش، شنو لازم ندير؟,3,get_refund,get_refund,get_refund
...,...,...,...,...,...,...
357,أيمكنني أن أسأل عميلا ما هي ساعات دعم العملاء ؟,تقدر تسول الوكيل على ساعات دعم الزبائن؟,2,contact_customer_service,contact_customer_service,contact_customer_service
358,كيف لي أن أعرف إن كانت سياسة التأمين الخاصة بي...,كيفاش نعرف بلي تأميني مازال ساري؟,2,insurance,insurance,insurance
359,أريد تغيير المعلومات عن بروفايلي ، كيف يمكنني ...,بغيت نعدل المعلومات فبروفيل ديالي، كيفاش نديرها؟,2,edit_account,edit_account,edit_account
360,أيمكنك على الأقل أن تحاول أن تعالج شواغلي بدلا...,تقدر تحاول تواجه مشاكلي بلا ما تعطيني جوابات ج...,1,general_negative_feedback,general_negative_feedback,general_negative_feedback


In [27]:
compro_card = df[df['intent']=="compromised_card"]
compro_card_preds = compro_card['intent_pred_ara'].tolist()

In [30]:
compro_card_preds, len(compro_card_preds)

(['compromised_card',
  'compromised_card',
  'compromised_card',
  'compromised_card',
  'compromised_card',
  'compromised_card',
  'compromised_card',
  'compromised_card',
  'compromised_card',
  'general_negative_feedback',
  'compromised_card',
  'compromised_card',
  'activate_my_card',
  'compromised_card',
  'compromised_card'],
 15)

In [None]:
#only two errors but as they are few, the scores get low fast

In [29]:
df['intent'].value_counts()

Unnamed: 0_level_0,count
intent,Unnamed: 1_level_1
get_invoice,20
contact_customer_service,19
delete_account,19
contact_human_agent,19
create_account,19
get_refund,18
change_order,18
card_swallowed,16
exchange,15
cancel_order,15


In [13]:


# Iterate through each level of differences (0, 1, 2, 4) and filter by indices
for level in [1, 2, 3]:
    print(f"\nMetrics for Darija texts where difference level is {level}:")
    sub_df = df[df["different from msa from 1 to 3"] == level]
    sub_ground_truths = sub_df['intent'].tolist()
    sub_intent_pred_ara = sub_df['intent_pred_ara'].tolist()
    sub_intent_pred_dar = sub_df['intent_pred_dar'].tolist()
    #acc_msa, prec_msa, rec_msa, f1_msa = compute_metrics(sub_ground_truths, sub_intent_pred_ara)
    acc_dar, prec_dar, rec_dar, f1_dar = compute_metrics(sub_ground_truths, sub_intent_pred_dar)
    #print(acc_msa, prec_msa, rec_msa, f1_msa)
    print(acc_dar, prec_dar, rec_dar, f1_dar)
    print('-----------')


Metrics for Darija texts where difference level is 1:
0.9848484848484849 0.9848484848484848 0.9772727272727273 0.9757575757575758
-----------

Metrics for Darija texts where difference level is 2:
0.9775280898876404 0.9748842592592594 0.9725694444444445 0.9730312233589088
-----------

Metrics for Darija texts where difference level is 3:
1.0 1.0 1.0 1.0
-----------


# Retriever Intent Detector

In [14]:
!pip install --quiet faiss-gpu datasets
!pip install --quiet  -U sentence-transformers

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m249.1/249.1 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [15]:
from sentence_transformers import SentenceTransformer

pretrained_model = SentenceTransformer("intfloat/multilingual-e5-base")

modules.json:   0%|          | 0.00/387 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/179k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/57.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/694 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/418 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/200 [00:00<?, ?B/s]

In [16]:
import pandas as pd
df_train = pd.read_excel('train set 7 february 2024 from V6 reshaped.xlsx')

In [17]:
embeddings_normed = pretrained_model.encode(df_train['text'].tolist(), normalize_embeddings=True)

In [18]:
embeddings_test_dar_normed = pretrained_model.encode(dar, normalize_embeddings=True)
embeddings_test_ar_normed = pretrained_model.encode(ar, normalize_embeddings=True)

In [14]:
df_train

Unnamed: 0,text,label,language
0,"i want a refund, help me egt one",get_refund,en
1,"I want to download an invoice, can you show me...",get_invoice,en
2,can u ask an agent how I can obtain an invoice?,get_invoice,en
3,How do I dispute a charge on my credit card?,idoos,en
4,At what age can a person open an account?,age_limit,en
...,...,...,...
5775,كيفاش نقدر نلغي تحويلة دارت عبر حوالة مالية؟,cancel_transfer,ar
5776,ما بغيتش حسابي، عاونوني نمسحوه,delete_account,ar
5777,تم الغاء الحفلة، غادي تحصّل ردّة.,get_refund,ar
5778,ما كتعاونيش، أشنو لازم ندير باش نهدر مع إنسان؟,contact_human_agent,ar


In [19]:
ground_truths_train = df_train['label'].tolist()

In [20]:
import faiss

index = faiss.IndexFlatL2(embeddings_normed.shape[1])
index.add(embeddings_normed)

query = embeddings_test_ar_normed
#y = y_test_darija
k = 1 # number of nearest neighbors to return
D, I = index.search(query, k)
predicted_intents_for_ar = [ground_truths_train[x[0]] for x in I.tolist()]

In [21]:
index = faiss.IndexFlatL2(embeddings_normed.shape[1])
index.add(embeddings_normed)

query = embeddings_test_dar_normed
#y = y_test_darija
k = 1 # number of nearest neighbors to return
D, I = index.search(query, k)
predicted_intents_for_dar = [ground_truths_train[x[0]] for x in I.tolist()]

In [22]:
ground_truths_test = df['intent'].tolist()

In [33]:
acc_msa, prec_msa, rec_msa, f1_msa = compute_metrics(ground_truths_test, predicted_intents_for_ar)
acc_dar, prec_dar, rec_dar, f1_dar = compute_metrics(ground_truths_test, predicted_intents_for_dar)
print(acc_msa, prec_msa, rec_msa, f1_msa)
print(acc_dar, prec_dar, rec_dar, f1_dar)

0.8977900552486188 0.9049829277189958 0.8890199925561767 0.8891016422543996
0.8812154696132597 0.8819146922765345 0.8724208357432043 0.8714264631329186


In [35]:
report_ar = classification_report(ground_truths_test, predicted_intents_for_ar, output_dict=True)
report_dar = classification_report(ground_truths_test, predicted_intents_for_dar, output_dict=True)

# Convert dictionaries to DataFrames
df_ar = pd.DataFrame(report_ar).transpose()
df_dar = pd.DataFrame(report_dar).transpose()

# Remove the 'accuracy' row for sorting purposes
df_ar = df_ar.drop(['accuracy'])
df_dar = df_dar.drop(['accuracy'])

# Sort intents by F1-score
sorted_ar = df_ar.sort_values(by='f1-score', ascending=False)
sorted_dar = df_dar.sort_values(by='f1-score', ascending=False)

In [36]:
sorted_ar

Unnamed: 0,precision,recall,f1-score,support
general_positive_feedback,1.0,1.0,1.0,13.0
contact_customer_service,1.0,1.0,1.0,19.0
get_invoice,0.952381,1.0,0.97561,20.0
change_order,0.947368,1.0,0.972973,18.0
get_refund,1.0,0.944444,0.971429,18.0
cancel_transfer,0.928571,1.0,0.962963,13.0
insurance,0.928571,1.0,0.962963,13.0
age_limit,1.0,0.916667,0.956522,12.0
delete_account,0.947368,0.947368,0.947368,19.0
cancel_order,0.882353,1.0,0.9375,15.0


In [37]:
sorted_dar

Unnamed: 0,precision,recall,f1-score,support
insurance,1.0,1.0,1.0,13.0
get_refund,0.947368,1.0,0.972973,18.0
contact_human_agent,1.0,0.947368,0.972973,19.0
cancel_order,1.0,0.933333,0.965517,15.0
edit_account,0.933333,1.0,0.965517,14.0
contact_customer_service,0.904762,1.0,0.95,19.0
change_order,0.9,1.0,0.947368,18.0
get_invoice,0.904762,0.95,0.926829,20.0
create_account,0.9,0.947368,0.923077,19.0
general_positive_feedback,1.0,0.846154,0.916667,13.0


In [None]:
# the intents compromised_card, general_negative_feedback, idoos, activate_my_card, fee have low scores for Arabic
# the intents fee, delete_account, loan and idoos have low scores for Darija

In [23]:
df['intent_pred_ara_retriever'] = predicted_intents_for_ar
df['intent_pred_dar_retriever'] = predicted_intents_for_dar

In [24]:
# Iterate through each level of differences (0, 1, 2, 4) and filter by indices
for level in [1, 2, 3]:
    print(f"\nMetrics for Darija texts where difference level is {level}:")
    sub_df = df[df["different from msa from 1 to 3"] == level]
    sub_ground_truths = sub_df['intent'].tolist()
    sub_intent_pred_ara = sub_df['intent_pred_ara_retriever'].tolist()
    sub_intent_pred_dar = sub_df['intent_pred_dar_retriever'].tolist()
    #acc_msa, prec_msa, rec_msa, f1_msa = compute_metrics(sub_ground_truths, sub_intent_pred_ara)
    acc_dar, prec_dar, rec_dar, f1_dar = compute_metrics(sub_ground_truths, sub_intent_pred_dar)
    #print(acc_msa, prec_msa, rec_msa, f1_msa)
    print(acc_dar, prec_dar, rec_dar, f1_dar)
    print('-----------')


Metrics for Darija texts where difference level is 1:
0.8181818181818182 0.7130434782608696 0.7401656314699794 0.7012260686173729
-----------

Metrics for Darija texts where difference level is 2:
0.9101123595505618 0.9159705694632166 0.8994858364101787 0.9024178136907812
-----------

Metrics for Darija texts where difference level is 3:
0.7586206896551724 0.6833333333333333 0.65 0.65
-----------


In [43]:
# when the darija is too close to MSA, the model gets lost
# when the darija is too far from MSA, the model gets lost too
# when the darija is simple darija not very far from MSA and not very close to it, the model have good understanding of it

# Intent Classification by ChatGPT Prompting

In [5]:
prompt = """Context: You are an advanced banking chatbot designed for a Moroccan bank, equipped to assist customers with a range of inquiries and services related to banking. Your capabilities extend from handling basic account management to addressing complex service requests. Your primary objective is to accurately discern the customer's intent from their utterances, using the list of predefined intents to provide relevant assistance or guide them to the appropriate service channel.

Here is the list of all intents and their meanings:
- activate_my_card: Initiate the use of a new banking card.
- age_limit: Inquire about the minimum age requirement for a service.
- cancel_order: Request to cancel a previously placed order.
- cancel_transfer: Request to cancel a previously initiated money transfer.
- card_swallowed: Report an ATM machine retaining a banking card.
- change_order: Modify details of a previously placed order.
- compromised_card: Report a banking card suspected of being at risk of fraud.
- contact_customer_service: Request for assistance from the bank’s customer service.
- contact_human_agent: Seek to speak with a live customer support agent.
- create_account: Initiate the process of opening a new bank account.
- deactivate_my_card: Disable a currently active banking card.
- delete_account: Request the closure of a bank account.
- edit_account: Make changes to the account information.
- exchange: Inquire about currency exchange services.
- fee: Question about the charges associated with a service.
- general_negative_feedback: Provide negative feedback on the overall service.
- general_positive_feedback: Provide positive feedback on the overall service.
- get_invoice: Request a bill or invoice for a transaction.
- get_refund: Request a return of funds for a transaction.
- insurance: Inquire about insurance products offered by the bank.
- loan: Request information on loan products.
- lost_or_stolen_phone: Report a lost or stolen phone linked to mobile banking.
- idoos: An intent not in the list of intents but within the banking domain, like asking for Western Union facilities.
- oodoos: An intent not in the list of intents and not related to banking, like asking the distance between the Earth and the Moon.

When you receive the 5 utterances from a customer, analyze the content to determine the most applicable intents. Consider the context of banking practices in Morocco, including services and customer expectations.

Instructions:
1. Read the customer's utterances carefully.
2. Identify the most relevant intent for each utterance from the predefined list.
3. Return the detected intents in JSON format for easy parsing:

```
{"intents": ["intent1", "intent2", "intent3", "intent4", "intent5"]}
```

Make sure to return only one intent for each utterance. Select the intent that best matches the customer's query or service need for each of the five utterances. If an utterance does not fit any predefined intents or falls outside the banking domain, use "oodoos" for unrelated queries and "idoos" for banking-related queries not listed among the predefined intents.
Here are the five utterances:
"""

In [33]:
import pandas as pd
df = pd.read_excel('test data annotated far from msa sept 2024.xlsx')
ground_truths = df['intent'].tolist()
df['darija'] = df['darija'].astype(str)
dar = df['darija'].tolist()

In [7]:

batched_darija_texts = [dar[k:k+5] for k in range(0,len(dar),5)]

In [8]:
len(dar), len(batched_darija_texts)

(362, 73)

In [9]:
prompted_batched_dar = [prompt + "\n".join(x) for x in batched_darija_texts]

In [10]:
len(prompted_batched_dar)

73

In [11]:
from random import choice
print(choice(prompted_batched_dar))

Context: You are an advanced banking chatbot designed for a Moroccan bank, equipped to assist customers with a range of inquiries and services related to banking. Your capabilities extend from handling basic account management to addressing complex service requests. Your primary objective is to accurately discern the customer's intent from their utterances, using the list of predefined intents to provide relevant assistance or guide them to the appropriate service channel.

Here is the list of all intents and their meanings:
- activate_my_card: Initiate the use of a new banking card.
- age_limit: Inquire about the minimum age requirement for a service.
- cancel_order: Request to cancel a previously placed order.
- cancel_transfer: Request to cancel a previously initiated money transfer.
- card_swallowed: Report an ATM machine retaining a banking card.
- change_order: Modify details of a previously placed order.
- compromised_card: Report a banking card suspected of being at risk of fra

In [12]:
!pip install openai

Collecting openai
  Downloading openai-1.46.0-py3-none-any.whl.metadata (24 kB)
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting jiter<1,>=0.4.0 (from openai)
  Downloading jiter-0.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.6 kB)
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)
  Downloading httpcore-1.0.5-py3-none-any.whl.metadata (20 kB)
Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai)
  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)
Downloading openai-1.46.0-py3-none-any.whl (375 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m375.0/375.0 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpx-0.27.2-py3-none-any.whl (76 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.4/76.4 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpcore-1.0.5-py3-none-any.whl (77 kB)
[2K   [90m━━

In [15]:
from openai import OpenAI
client = OpenAI(api_key="sk-...")

def use_chatgpt(prompt):
    response = client.chat.completions.create(
      model="gpt-4-turbo-preview",
      messages=[
        {"role": "user", "content": prompt}
      ]
    )
    return response.choices[0].message.content

In [16]:
resp0 = use_chatgpt(prompted_batched_dar[0])
resp0

'```\n{"intents": ["general_positive_feedback", "oodoos", "cancel_order", "compromised_card", "oodoos"]}\n```'

In [17]:
from tqdm import tqdm
answers = [resp0]
for x in tqdm(prompted_batched_dar[1:]):
    answers.append(use_chatgpt(x))

100%|██████████| 72/72 [01:30<00:00,  1.26s/it]


In [18]:
import pickle

with open('chatgpt4_darija_intent_detection_on_test_17sept2024.pickle', 'wb') as handle:
    pickle.dump(answers, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [25]:
import pickle
with open('chatgpt4_darija_intent_detection_on_test_17sept2024.pickle', 'rb') as handle:
    answers = pickle.load(handle)

In [19]:
answers

['```\n{"intents": ["general_positive_feedback", "oodoos", "cancel_order", "compromised_card", "oodoos"]}\n```',
 '```\n{"intents": ["oodoos", "cancel_order", "contact_customer_service", "get_refund", "contact_customer_service"]}\n```',
 '```\n{"intents": ["card_swallowed", "fee", "contact_customer_service", "contact_customer_service", "change_order"]}\n```',
 '```\n{"intents": ["delete_account", "compromised_card", "oodoos", "contact_customer_service", "contact_human_agent"]}\n```',
 '```\n{"intents": ["change_order", "contact_human_agent", "loan", "contact_human_agent", "compromised_card"]}\n```',
 '```\n{"intents": ["get_refund", "get_refund", "card_swallowed", "idoos", "delete_account"]}\n```',
 '```\n{"intents": ["compromised_card", "insurance", "activate_my_card", "get_invoice", "delete_account"]}\n```',
 '```\n{"intents": ["change_order", "idoos", "change_order", "idoos", "idoos"]}\n```',
 '```\n{"intents": ["cancel_order", "idoos", "idoos", "delete_account", "loan"]}\n```',
 '`

In [39]:
predicted_intents_dar = []
for j, x in enumerate(answers):
    next_predictions = eval(x.strip("`").strip().strip('json').strip())['intents']
    if len(next_predictions) != 5:
        print('bizarre')
        print(j)
    predicted_intents_dar.extend(next_predictions)

bizarre
56
bizarre
72


In [41]:
x = answers[56]
x

'```\n{"intents": ["card_swallowed", "contact_human_agent", "loan", "change_order"]}\n```'

In [43]:
print(prompted_batched_dar[56])

Context: You are an advanced banking chatbot designed for a Moroccan bank, equipped to assist customers with a range of inquiries and services related to banking. Your capabilities extend from handling basic account management to addressing complex service requests. Your primary objective is to accurately discern the customer's intent from their utterances, using the list of predefined intents to provide relevant assistance or guide them to the appropriate service channel.

Here is the list of all intents and their meanings:
- activate_my_card: Initiate the use of a new banking card.
- age_limit: Inquire about the minimum age requirement for a service.
- cancel_order: Request to cancel a previously placed order.
- cancel_transfer: Request to cancel a previously initiated money transfer.
- card_swallowed: Report an ATM machine retaining a banking card.
- change_order: Modify details of a previously placed order.
- compromised_card: Report a banking card suspected of being at risk of fra

In [44]:
#we redo the inference to get 5 answers not 4
use_chatgpt(prompted_batched_dar[56])

'```\n{"intents": ["card_swallowed", "contact_human_agent", "loan", "change_order"]}\n```'

In [26]:
answers[56] = '```{"intents": ["card_swallowed","exchange", "contact_human_agent", "loan", "change_order"]}```'

In [27]:
predicted_intents_dar = []
for j, x in enumerate(answers):
    next_predictions = eval(x.strip("`").strip().strip('json').strip())['intents']
    if len(next_predictions) != 5:
        print('bizarre')
        print(j)
    predicted_intents_dar.extend(next_predictions)

bizarre
72


In [47]:
predicted_intents_dar

['general_positive_feedback',
 'oodoos',
 'cancel_order',
 'compromised_card',
 'oodoos',
 'oodoos',
 'cancel_order',
 'contact_customer_service',
 'get_refund',
 'contact_customer_service',
 'card_swallowed',
 'fee',
 'contact_customer_service',
 'contact_customer_service',
 'change_order',
 'delete_account',
 'compromised_card',
 'oodoos',
 'contact_customer_service',
 'contact_human_agent',
 'change_order',
 'contact_human_agent',
 'loan',
 'contact_human_agent',
 'compromised_card',
 'get_refund',
 'get_refund',
 'card_swallowed',
 'idoos',
 'delete_account',
 'compromised_card',
 'insurance',
 'activate_my_card',
 'get_invoice',
 'delete_account',
 'change_order',
 'idoos',
 'change_order',
 'idoos',
 'idoos',
 'cancel_order',
 'idoos',
 'idoos',
 'delete_account',
 'loan',
 'get_refund',
 'cancel_order',
 'lost_or_stolen_phone',
 'contact_human_agent',
 'idoos',
 'insurance',
 'general_positive_feedback',
 'oodoos',
 'oodoos',
 'change_order',
 'idoos',
 'contact_customer_service

In [48]:
accuracy, precision, recall, f1_score = compute_metrics(ground_truths, predicted_intents_dar)
accuracy, precision, recall, f1_score

(0.8038674033149171, 0.8444655951283857, 0.766803129618919, 0.7803068132866307)

In [49]:
len(ground_truths)

362

In [50]:
len(predicted_intents_dar)

362

In [53]:
report_dar = classification_report(ground_truths, predicted_intents_dar, output_dict=True)

df_dar = pd.DataFrame(report_dar).transpose()

df_dar = df_dar.drop(['accuracy'])

sorted_dar = df_dar.sort_values(by='f1-score', ascending=False)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [55]:
df_dar.sort_values(by='f1-score', ascending=False)

Unnamed: 0,precision,recall,f1-score,support
change_order,1.0,1.0,1.0,18.0
cancel_order,1.0,1.0,1.0,15.0
delete_account,1.0,1.0,1.0,19.0
activate_my_card,1.0,0.923077,0.96,13.0
lost_or_stolen_phone,1.0,0.916667,0.956522,12.0
exchange,1.0,0.866667,0.928571,15.0
fee,0.923077,0.923077,0.923077,13.0
get_invoice,1.0,0.85,0.918919,20.0
cancel_transfer,0.8125,1.0,0.896552,13.0
edit_account,1.0,0.785714,0.88,14.0


In [None]:
#create_account, general_negative_feedback, idoos have lower scores
# gpt4 have invented an intent "reactivate_my_card" which was not one of the predefined intents

In [28]:
df['intent_pred_dar_chatgpt'] = predicted_intents_dar
df

Unnamed: 0,arabic,darija,different from msa from 1 to 3,intent,intent_pred_ara,intent_pred_dar,intent_pred_ara_retriever,intent_pred_dar_retriever,intent_pred_dar_chatgpt
0,هذا رائع جدا,هادشي واعر,3,general_positive_feedback,general_positive_feedback,general_positive_feedback,general_positive_feedback,general_positive_feedback,general_positive_feedback
1,كيف يمكنني أن أضع طابعة جلدية ؟,كيفاش نصلح طابعة عالقة؟,2,oodoos,oodoos,oodoos,oodoos,oodoos,oodoos
2,أيمكنني إلغاء أمر للحصول على شهادة إيداع جديدة ؟,واش نقدر نلغي طلبية لشهادة ايداع جديدة؟,2,cancel_order,cancel_order,cancel_order,cancel_order,cancel_order,cancel_order
3,معاملات مشكوك فيها على حسابي.,عمليات مشبوهة فحسابي.,1,compromised_card,compromised_card,compromised_card,compromised_card,delete_account,compromised_card
4,تم تأجيل المباراة ، ماذا علي أن أفعل ؟,تأجل الماتش، شنو لازم ندير؟,3,get_refund,get_refund,get_refund,get_refund,get_refund,oodoos
...,...,...,...,...,...,...,...,...,...
357,أيمكنني أن أسأل عميلا ما هي ساعات دعم العملاء ؟,تقدر تسول الوكيل على ساعات دعم الزبائن؟,2,contact_customer_service,contact_customer_service,contact_customer_service,contact_customer_service,contact_customer_service,contact_human_agent
358,كيف لي أن أعرف إن كانت سياسة التأمين الخاصة بي...,كيفاش نعرف بلي تأميني مازال ساري؟,2,insurance,insurance,insurance,insurance,insurance,insurance
359,أريد تغيير المعلومات عن بروفايلي ، كيف يمكنني ...,بغيت نعدل المعلومات فبروفيل ديالي، كيفاش نديرها؟,2,edit_account,edit_account,edit_account,edit_account,edit_account,edit_account
360,أيمكنك على الأقل أن تحاول أن تعالج شواغلي بدلا...,تقدر تحاول تواجه مشاكلي بلا ما تعطيني جوابات ج...,1,general_negative_feedback,general_negative_feedback,general_negative_feedback,edit_account,general_negative_feedback,contact_human_agent


In [29]:


# Iterate through each level of differences (0, 1, 2, 4) and filter by indices
for level in [1, 2, 3]:
    print(f"\nMetrics for Darija texts where difference level is {level}:")
    sub_df = df[df["different from msa from 1 to 3"] == level]
    sub_ground_truths = sub_df['intent'].tolist()
    sub_intent_pred_dar = sub_df['intent_pred_dar_chatgpt'].tolist()
    #acc_msa, prec_msa, rec_msa, f1_msa = compute_metrics(sub_ground_truths, sub_intent_pred_ara)
    acc_dar, prec_dar, rec_dar, f1_dar = compute_metrics(sub_ground_truths, sub_intent_pred_dar)
    #print(acc_msa, prec_msa, rec_msa, f1_msa)
    print(acc_dar, prec_dar, rec_dar, f1_dar)
    print('-----------')


Metrics for Darija texts where difference level is 1:
0.8181818181818182 0.7877846790890269 0.7355072463768116 0.7402346445824708
-----------

Metrics for Darija texts where difference level is 2:
0.8164794007490637 0.8650980392156863 0.778271345613451 0.79458270454029
-----------

Metrics for Darija texts where difference level is 3:
0.6551724137931034 0.7 0.5833333333333334 0.6076190476190476
-----------
