Prompt GPT-3.5 with instructions for a classification task.

In [None]:
# import sys
# !{sys.executable} -m pip install openai

In [None]:
import os
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
from tqdm import tqdm

from openai import OpenAI
os.environ["OPENAI_API_KEY"] = ""

In [None]:
val_df = pd.read_csv("../data/mtob_domain_en_fr_val.csv")
test_df = pd.read_csv("../data/mtob_domain_en_fr_test.csv")

In [None]:
labels_fr = ["la messagerie", "l'appel", "l'événement", "la minuterie", "la musique", "le temps", "l'alarme", "les personnes", "le rappel", "les recettes", "les nouvelles"]
labels_en = ['messaging', 'calling', 'event', 'timer', 'music', 'weather', 'alarm', 'people', 'reminder', 'recipes', 'news']
labels_fr2en = dict(zip(labels_fr, labels_en))
label2id = {
    "alarm": 6,
    "calling": 1,
    "event": 2,
    "messaging": 0,
    "music": 4,
    "news": 10,
    "people": 7,
    "recipes": 9,
    "reminder": 8,
    "timer": 3,
    "weather": 5
}

In [None]:
en_prompt_template = (
    "Your task is to determine what category a phrase is related to. Both the phrase and the category will be in French.\n"
    "Possible categories: la messagerie, l'appel, l'événement, la minuterie, la musique, le temps, l'alarme, les personnes, le rappel, les recettes, les nouvelles.\n"
    "Phrase: {text_fr} \n"
    "Your answer (a single category from the list of possible categories): "
)
print(en_prompt_template)

Your task is to determine what category a phrase is related to. Both the phrase and the category will be in French.
Possible categories: la messagerie, l'appel, l'événement, la minuterie, la musique, le temps, l'alarme, les personnes, le rappel, les recettes, les nouvelles.
Phrase: {text_fr} 
Your answer (a single category from the list of possible categories): 


In [None]:
model_name = "GPT-3.5"
model_version = "gpt-3.5-turbo"

In [None]:
client = OpenAI(
    base_url="https://openai.vocareum.com/v1",
    api_key=os.environ.get("OPENAI_API_KEY")
)

def query_openai(prompt, model_name=model_version):
    response = client.chat.completions.create(
        model=model_name,
        messages=[{
            "role": "user",
            "content": prompt
        }]
    )
    answer = response.choices[0].message.content
    return answer

In [None]:
example = val_df.query("label_text == 'news'").iloc[0]
example

id                                             3136333730363831
text_en              where will american federal aid be sent to
text_fr       Où est-ce que l'aide fédérale américaine sera-...
label                                                        10
label_text                                                 news
Name: 28, dtype: object

In [None]:
en_prompt = en_prompt_template.format_map(example)
print(en_prompt)

Your task is to determine what category a phrase is related to. Both the phrase and the category will be in French.
Possible categories: la messagerie, l'appel, l'événement, la minuterie, la musique, le temps, l'alarme, les personnes, le rappel, les recettes, les nouvelles.
Phrase: Où est-ce que l'aide fédérale américaine sera-t-elle envoyée ? 
Your answer (a single category from the list of possible categories): 


In [None]:
query_openai(en_prompt)

'les nouvelles'

In [None]:
output_dir = "../zero_shot_prompting"
os.makedirs(output_dir, exist_ok=True)

In [None]:
val_prompts = val_df.apply(lambda e: en_prompt_template.format_map(e), axis=1).tolist()
val_answers = [query_openai(p) for p in tqdm(val_prompts)]

val_preds_df = val_df[["id"]]
val_preds_df["label"] = [label2id.get(labels_fr2en.get(a, "unknown"), -1) for a in val_answers]

100%|██████████| 1577/1577 [18:12<00:00,  1.44it/s]


In [None]:
val_preds_path = os.path.join(output_dir, "val_preds.csv")
val_preds_df.to_csv(val_preds_path, index=False)
print(f"Saved val preds to {val_preds_path}")

Saved val preds to zero-shot-prompting/val_preds.csv


In [None]:
test_prompts = test_df.apply(lambda e: en_prompt_template.format_map(e), axis=1).tolist()
test_answers = [query_openai(p) for p in tqdm(test_prompts)]

test_preds_df = test_df[["id"]]
test_preds_df["label"] = [label2id.get(labels_fr2en.get(a, "unknown"), -1) for a in test_answers]

100%|██████████| 3193/3193 [36:43<00:00,  1.45it/s]


In [None]:
test_preds_path = os.path.join(output_dir, "test_preds.csv")
test_preds_df.to_csv(test_preds_path, index=False)
print(f"Saved test preds to {test_preds_path}")

Saved test preds to zero-shot-prompting/test_preds.csv
