In [1]:
import os
import pandas as pd

os.chdir("../../")
from scripts.llm import get_completion
from scripts.utils import save_obj_as_pickle, read_obj_from_pickle
from scripts.data import make_database, make_prompts_for_clf

pd.set_option("display.max_colwidth", 150)

In [2]:
def loadData(dire):
    dsType = dire.split("/")[-1]
    fp1 = os.path.join(dire, f"{dsType}.data.txt")
    fp2 = os.path.join(dire, f"{dsType}.gold.txt")
    wic_data = pd.read_table(fp1, header=None, names=["word", "pos", "ix", "context1", "context2"])
    wic_label = pd.read_table(fp2, header=None, names=["label"])
    df = pd.concat([wic_data, wic_label], axis=1)
    df["text"] = "Target word: " + df["word"] + "\n" + "Context 1: " + df["context1"] + "\n" + "Context 2: " + df["context2"] + "\n"
    df = df.drop(columns=["word", "pos", "ix", "context1", "context2"])
    df.label = df.label.map({"T": "Yes", "F": "No"})
    df.reset_index(drop=True, inplace=True)

    return df


clf_task = "WiC"
test_dire = f"data/raw/text classification/{clf_task}/test"
test_df = loadData(test_dire)

dev_dire = f"data/raw/text classification/{clf_task}/dev"
dev_df = loadData(dev_dire)

test_df.head()

Unnamed: 0,label,text
0,Yes,Target word: defeat\nContext 1: It was a narrow defeat .\nContext 2: The army 's only defeat .\n
1,Yes,Target word: groom\nContext 1: Groom the dogs .\nContext 2: Sheila groomed the horse .\n
2,Yes,"Target word: penetration\nContext 1: The penetration of upper management by women .\nContext 2: Any penetration , however slight , is sufficient t..."
3,No,Target word: hit\nContext 1: We hit Detroit at one in the morning but kept driving through the night .\nContext 2: An interesting idea hit her .\n
4,No,Target word: deliberation\nContext 1: He was a man of judicial deliberation .\nContext 2: A little deliberation would have deterred them .\n


### Database

Sources for the prompt data

In [3]:
# num_instance: Number of instances to compose multi-problem prompts. Each instance contains multiple problems
# max_instance_size: Maximum number of problems sampled from the benchmark dataset to compose an instance 
# dev_df can be used for purposes such as testing the prompts or for generating exemplars 
database = make_database(test_df, dev_df, num_instance=100, max_instance_size=100)
database.keys()

dict_keys(['num_instance', 'max_instance_size', 'labels', 'testData', 'testInstances', 'devData', 'devInstances'])

#### 0-shot

In [4]:
database["promptTemplates"] = dict()
database["promptTemplates"]["0-shot"] = dict()

SingleClf = "Analyze the usage of the given target word in the two subsequent contexts. " \
            "The target word may appear in various grammatical forms in each context. " \
            "Respond with 'Yes' if it maintains the same meaning across both contexts, and 'No' if it does not.\n\n" \
            "$text\nAnswer: "

BatchClf = "Analyze the usage of the following $num target words in the two contexts that immediately follow them. " \
           "These target words may appear in different grammatical forms across the two subsequent contexts. " \
           "Determine if each target word maintains the same meaning in the two subsequent contexts. " \
           "Provide your answers line by line, indicating 'Yes' if it does and 'No' if it does not.\n\n$texts\n" \
           "Answers:\n"

SelectOne = "Analyze the following $num target words and determine the index numbers of the target words where " \
            "the same meaning $be maintained across the two contexts that immediately follow them. " \
            "These target words may appear in different grammatical forms in each context.\n" \
            "If none of the target words satisfy this condition, write 'None.'.\n" \
            "If all the target words satisfy this condition, write 'All.'\n" \
            "Otherwise, provide the index numbers.\n\n" \
            "Output your responses in JSON format with the key 'answer'.\nA formatted example output is provided below.\n" \
            "{'answer': [None/All or index numbers of the target words where the same meaning $be maintained in the two subsequent contexts]}\n\n" \
            "Here are the target words along with their contexts:\n\n$texts\n" \
            "JSON output:\n"

SelectAll = "Analyze the following $num target words, which may appear in different grammatical forms in the two subsequent contexts. " \
            "First, list the index numbers of target words that maintain the same meaning in the two subsequent contexts. " \
            "Then, list the index numbers of target words that do not maintain the same meaning in the two subsequent contexts.\n" \
            "If none of the target words satisfy a condition, write 'None.'\nIf all the target words satisfy a condition, write 'All.'\n" \
            "Otherwise, provide the index numbers of the target words that satisfy each condition.\n\n" \
            "Output your responses in JSON format with two keys: 'yes' for target words used with consistent meanings and " \
            "'no' for those used with inconsistent meanings.\nA formatted example output is provided below.\n" \
            "{'yes': [None/All or index numbers of target words used with consistent meanings], " \
            "'no': [None/All or index numbers of target words used with inconsistent meanings]}\n\n" \
            "Here are the target words along with their contexts:\n\n$texts\n" \
            "JSON output:\n"

tasks = ["SingleClf", "BatchClf", "SelectOne", "SelectAll"]
promptTemplates = [SingleClf, BatchClf, SelectOne, SelectAll]

for task, tmp in zip(tasks, promptTemplates):
    database["promptTemplates"]["0-shot"][task] = tmp

In [5]:
os.makedirs("data/databases/text classification/", exist_ok=True)
save_obj_as_pickle(database, f"data/databases/text classification/{clf_task}.pkl")

Saved object to data/databases/text classification/WiC.pkl


#### Test Prompts

- The main purpose is to check if LLMs can output the desired formats given the prompts 

In [6]:
dev = []
num_instance = 2
label_attr_converter = lambda t: {"Yes": "is", "No": "isn't"}[t]
taskSizes = [3, 5]
for propmtMode in ["0-shot"]:
    for task in tasks:
        if task == "SingleClf":
            dev.append(make_prompts_for_clf(database, task, "dev", propmtMode)[:num_instance])
            continue

        for taskSize in taskSizes:  
            dev.append(make_prompts_for_clf(database, task, "dev", propmtMode, taskSize, attr="be", 
                                            label_attr_converter=label_attr_converter, num_instance=num_instance))

dev = pd.concat(dev).reset_index(drop=True)

In [7]:
for p in dev[(dev["taskSize"] <= 3) & (dev["taskIndex"] == 1)].prompt:
    print(p)
    print("-"*50)
    print()

Analyze the usage of the given target word in the two subsequent contexts. The target word may appear in various grammatical forms in each context. Respond with 'Yes' if it maintains the same meaning across both contexts, and 'No' if it does not.

Target word: board
Context 1: Room and board .
Context 2: He nailed boards across the windows .

Answer: 
--------------------------------------------------

Analyze the usage of the following 3 target words in the two contexts that immediately follow them. These target words may appear in different grammatical forms across the two subsequent contexts. Determine if each target word maintains the same meaning in the two subsequent contexts. Provide your answers line by line, indicating 'Yes' if it does and 'No' if it does not.

1. Target word: twist
Context 1: Do n't twist my words .
Context 2: Twist the dough into a braid .

2. Target word: brush
Context 1: Brush aside the objections .
Context 2: Brush the dust from the jacket .

3. Target wo

In [8]:
dev["preds"] = dev.prompt.apply(get_completion)
dev

Unnamed: 0,taskIndex,prompt,answer,targetLabel,task,#shot,CoT,taskSize,preds
0,1,Analyze the usage of the given target word in the two subsequent contexts. The target word may appear in various grammatical forms in each context...,No,,SingleClf,0,False,1,No
1,2,Analyze the usage of the given target word in the two subsequent contexts. The target word may appear in various grammatical forms in each context...,No,,SingleClf,0,False,1,Yes
2,1,Analyze the usage of the following 3 target words in the two contexts that immediately follow them. These target words may appear in different gra...,"[No, Yes, Yes]",,BatchClf,0,False,3,1. No\n2. Yes\n3. No
3,2,Analyze the usage of the following 3 target words in the two contexts that immediately follow them. These target words may appear in different gra...,"[No, Yes, Yes]",,BatchClf,0,False,3,1. No\n2. Yes\n3. Yes
4,1,Analyze the usage of the following 5 target words in the two contexts that immediately follow them. These target words may appear in different gra...,"[No, Yes, Yes, No, Yes]",,BatchClf,0,False,5,1. Yes\n2. Yes\n3. No\n4. No\n5. No
5,2,Analyze the usage of the following 5 target words in the two contexts that immediately follow them. These target words may appear in different gra...,"[No, Yes, Yes, No, No]",,BatchClf,0,False,5,1. No\n2. No\n3. No\n4. No\n5. No
6,1,Analyze the following 3 target words and determine the index numbers of the target words where the same meaning isn't maintained across the two co...,{1},No,SelectOne,0,False,3,{'answer': [All]}
7,1,Analyze the following 3 target words and determine the index numbers of the target words where the same meaning is maintained across the two conte...,"{2, 3}",Yes,SelectOne,0,False,3,{'answer': [None]}
8,2,Analyze the following 3 target words and determine the index numbers of the target words where the same meaning isn't maintained across the two co...,{1},No,SelectOne,0,False,3,{'answer': [All]}
9,2,Analyze the following 3 target words and determine the index numbers of the target words where the same meaning is maintained across the two conte...,"{2, 3}",Yes,SelectOne,0,False,3,{'answer': [None]}


### Make prompts

In [9]:
database = read_obj_from_pickle(f"data/databases/text classification/{clf_task}.pkl")

Read object from data/databases/text classification/WiC.pkl


In [10]:
out = []
num_instance = 100

taskSizes = [3, 5, 10, 20, 50]
for propmtMode in ["0-shot"]:
    for task in tasks:

        if task == "SingleClf":
            out.append(make_prompts_for_clf(database, task, "test", propmtMode))
            continue

        for taskSize in taskSizes:
            out.append(make_prompts_for_clf(database, task, "test", propmtMode, taskSize, attr="be", 
                                            label_attr_converter=label_attr_converter, num_instance=num_instance))

out = pd.concat(out)
out.reset_index(drop=True, inplace=True)

os.makedirs("results/text classification/", exist_ok=True)
out.to_json(f"results/text classification/{clf_task}.json", orient="records", lines=True)

In [11]:
out.task.value_counts()

SingleClf    1400
SelectOne    1000
BatchClf      500
SelectAll     500
Name: task, dtype: int64

In [12]:
out.copy()[(out.taskIndex == 1) & (out.taskSize <= 3)]

Unnamed: 0,taskIndex,prompt,answer,targetLabel,task,#shot,CoT,taskSize
0,1,Analyze the usage of the given target word in the two subsequent contexts. The target word may appear in various grammatical forms in each context...,Yes,,SingleClf,0,False,1
1400,1,Analyze the usage of the following 3 target words in the two contexts that immediately follow them. These target words may appear in different gra...,"[Yes, No, Yes]",,BatchClf,0,False,3
1900,1,Analyze the following 3 target words and determine the index numbers of the target words where the same meaning isn't maintained across the two co...,{2},No,SelectOne,0,False,3
1901,1,Analyze the following 3 target words and determine the index numbers of the target words where the same meaning is maintained across the two conte...,"{1, 3}",Yes,SelectOne,0,False,3
2900,1,"Analyze the following 3 target words, which may appear in different grammatical forms in the two subsequent contexts. First, list the index number...","{'no': {2}, 'yes': {1, 3}}",,SelectAll,0,False,3
