In [1]:
import os
import pandas as pd

os.chdir("../../")
from scripts.llm import get_completion
from scripts.utils import save_obj_as_pickle, read_obj_from_pickle
from scripts.data import make_database, make_prompts_for_clf

pd.set_option("display.max_colwidth", 150)

In [2]:
def loadData(fp):
    data = []
    for line in open(fp).readlines()[1:]:
        data.append([s.strip() for s in line.split("\t")])

    df = pd.DataFrame(data, columns=["label", "id1", "id2", "text1", "text2"])
    df = df[["label", "text1", "text2"]]
    df.label = df.label.map({"0": "No", "1": "Yes"})
    df["text"] = "Text A: " + df.text1 + "\nText B: " + df.text2 + "\n"
    df = df[['label', 'text']]

    return df

clf_task = "MRPC"
test_fp = f"data/raw/text classification/{clf_task}/test.tsv"
test_df = loadData(test_fp)

dev_fp = f"data/raw/text classification/{clf_task}/val.tsv"
dev_df = loadData(dev_fp)

test_df.head()

Unnamed: 0,label,text
0,Yes,"Text A: PCCW's chief operating officer, Mike Butcher, and Alex Arena, the chief financial officer, will report directly to Mr So.\nText B: Current..."
1,Yes,Text A: The world's two largest automakers said their U.S. sales declined more than predicted last month as a late summer sales frenzy caused more...
2,Yes,"Text A: According to the federal Centers for Disease Control and Prevention (news - web sites), there were 19 reported cases of measles in the Uni..."
3,No,Text A: A tropical storm rapidly developed in the Gulf of Mexico Sunday and was expected to hit somewhere along the Texas or Louisiana coasts by M...
4,No,Text A: The company didn't detail the costs of the replacement and repairs.\nText B: But company officials expect the costs of the replacement wor...


### Database

Sources for the prompt data

In [3]:
# num_instance: Number of instances to compose multi-problem prompts. Each instance contains multiple problems
# max_instance_size: Maximum number of problems sampled from the benchmark dataset to compose an instance 
# dev_df can be used for purposes such as testing the prompts or for generating exemplars 
database = make_database(test_df, dev_df, num_instance=100, max_instance_size=100)
database.keys()

dict_keys(['num_instance', 'max_instance_size', 'labels', 'testData', 'testInstances', 'devData', 'devInstances'])

#### 0-shot

In [4]:
database["promptTemplates"] = dict()
database["promptTemplates"]["0-shot"] = dict()


SingleClf = "Compare text A with text B and determine if text A is a paraphrase of text B. " \
            "Respond with 'Yes' if text A is a paraphrase, and 'No' if it is not.\n\n" \
            "$text\nAnswer:"

BatchClf = "Compare text A with text B for the following $num text pairs and determine if text A is a paraphrase of text B line by line. " \
           "Respond with 'Yes' if text A is a paraphrase, and 'No' if it is not. Provide your answers line by line.\n\n" \
           "$texts\nAnswers:\n" 

SelectOne = "Go over the $num text pairs below and list the index numbers of the text pairs where text A $be a paraphrase of text B according to the following instructions:\n" \
            "If none of the text pairs satisfy this condition, write 'None.'\n" \
            "If all the text pairs satisfy this condition, write 'All.'\n" \
            "Otherwise, provide the index numbers of the text pairs where text A $be a paraphrase of text B.\n\n" \
            "Output your responses in JSON format with the key 'answer'.\nA formatted example output is provided below.\n" \
            "{'answer': [None/All or index numbers of the text pairs where text A $be a paraphrase of text B]}\n\n" \
            "Here are the text pairs:\n\n$texts\n" \
            "JSON output:\n"

SelectAll = "Go over the $num text pairs below. First, list the index numbers of the text pairs that contain paraphrases. " \
            "Then, list the index numbers of the text pairs that contain non-paraphrases.\n" \
            "If none of the text pairs satisfy a condition, write 'None.'\n" \
            "If all the text pairs satisfy a condition, write 'All.'\n" \
            "Otherwise, provide the index numbers of the text pairs that satisfy each condition.\n\n" \
            "Output your responses in JSON format with two keys: 'yes' for paraphrases and 'no' for non-paraphrases." \
            "\nA formatted example output is provided below.\n" \
            "{'yes': [None/All or index numbers of text pairs that contain paraphrases], " \
            "'no': [None/All or index numbers of text pairs that contain non-paraphrases]}\n\n" \
            "Here are the text pairs:\n\n$texts\n" \
            "JSON output:\n"

tasks = ["SingleClf", "BatchClf", "SelectOne", "SelectAll"]
promptTemplates = [SingleClf, BatchClf, SelectOne, SelectAll]

for task, tmp in zip(tasks, promptTemplates):
    database["promptTemplates"]["0-shot"][task] = tmp

In [5]:
os.makedirs("data/databases/text classification/", exist_ok=True)
save_obj_as_pickle(database, f"data/databases/text classification/{clf_task}.pkl")

Saved object to data/databases/text classification/MRPC.pkl


#### Test Prompts

- The main purpose is to check if LLMs can output the desired formats given the prompts 

In [6]:
dev = []
num_instance = 2
label_attr_converter = lambda t: {"Yes": "is", "No": "isn't"}[t]
taskSizes = [3, 5]
for propmtMode in ["0-shot"]:
    for task in tasks:
        if task == "SingleClf":
            dev.append(make_prompts_for_clf(database, task, "dev", propmtMode)[:num_instance])
            continue

        for taskSize in taskSizes:  
            dev.append(make_prompts_for_clf(database, task, "dev", propmtMode, taskSize, attr="be", 
                                            label_attr_converter=label_attr_converter, num_instance=num_instance))

dev = pd.concat(dev).reset_index(drop=True)

In [7]:
for p in dev[(dev["taskSize"] <= 3) & (dev["taskIndex"] == 1)].prompt:
    print(p)
    print("-"*50)
    print()

Compare text A with text B and determine if text A is a paraphrase of text B. Respond with 'Yes' if text A is a paraphrase, and 'No' if it is not.

Text A: Stocks have rallied sharply for more than three months in anticipation of a rebound in the second half of the year.
Text B: Stocks have rallied sharply for more than three months in anticipation of an economic rebound in the year's second half.

Answer:
--------------------------------------------------

Compare text A with text B for the following 3 text pairs and determine if text A is a paraphrase of text B line by line. Respond with 'Yes' if text A is a paraphrase, and 'No' if it is not. Provide your answers line by line.

1. Text A: Food and Drug Administration (news - web sites) Commissioner Mark McClellan said Kraft's initiative could start an important trend.
Text B: U.S. Food and Drug Administration Commissioner Mark McClellan said Kraft's could start an important trend.

2. Text A: "There were more people surrounding the c

In [8]:
dev["preds"] = dev.prompt.apply(get_completion)
dev

Unnamed: 0,taskIndex,prompt,answer,targetLabel,task,#shot,CoT,taskSize,preds
0,1,"Compare text A with text B and determine if text A is a paraphrase of text B. Respond with 'Yes' if text A is a paraphrase, and 'No' if it is not....",Yes,,SingleClf,0,False,1,No
1,2,"Compare text A with text B and determine if text A is a paraphrase of text B. Respond with 'Yes' if text A is a paraphrase, and 'No' if it is not....",No,,SingleClf,0,False,1,No
2,1,Compare text A with text B for the following 3 text pairs and determine if text A is a paraphrase of text B line by line. Respond with 'Yes' if te...,"[Yes, Yes, No]",,BatchClf,0,False,3,1. No\n2. No\n3. No
3,2,Compare text A with text B for the following 3 text pairs and determine if text A is a paraphrase of text B line by line. Respond with 'Yes' if te...,"[No, Yes, Yes]",,BatchClf,0,False,3,1. No\n2. No\n3. Yes
4,1,Compare text A with text B for the following 5 text pairs and determine if text A is a paraphrase of text B line by line. Respond with 'Yes' if te...,"[Yes, Yes, No, No, Yes]",,BatchClf,0,False,5,1. No\n2. No\n3. No\n4. No\n5. No
5,2,Compare text A with text B for the following 5 text pairs and determine if text A is a paraphrase of text B line by line. Respond with 'Yes' if te...,"[No, Yes, Yes, Yes, No]",,BatchClf,0,False,5,1. No\n2. No\n3. No\n4. No\n5. No
6,1,Go over the 3 text pairs below and list the index numbers of the text pairs where text A isn't a paraphrase of text B according to the following i...,{3},No,SelectOne,0,False,3,"{'answer': [1, 2]}"
7,1,Go over the 3 text pairs below and list the index numbers of the text pairs where text A is a paraphrase of text B according to the following inst...,"{1, 2}",Yes,SelectOne,0,False,3,{'answer': [None]}
8,2,Go over the 3 text pairs below and list the index numbers of the text pairs where text A isn't a paraphrase of text B according to the following i...,{1},No,SelectOne,0,False,3,"{'answer': [1, 2]}"
9,2,Go over the 3 text pairs below and list the index numbers of the text pairs where text A is a paraphrase of text B according to the following inst...,"{2, 3}",Yes,SelectOne,0,False,3,{'answer': [None]}


### Make prompts

In [9]:
database = read_obj_from_pickle(f"data/databases/text classification/{clf_task}.pkl")

Read object from data/databases/text classification/MRPC.pkl


In [10]:
out = []
num_instance = 100
taskSizes = [3, 5, 10, 20, 50]
for propmtMode in ["0-shot"]:
    for task in tasks:

        if task == "SingleClf":
            out.append(make_prompts_for_clf(database, task, "test", propmtMode))
            continue

        for taskSize in taskSizes:
            out.append(make_prompts_for_clf(database, task, "test", propmtMode, taskSize, attr="be", 
                                            label_attr_converter=label_attr_converter, num_instance=num_instance))

out = pd.concat(out)
out.reset_index(drop=True, inplace=True)

os.makedirs("results/text classification/", exist_ok=True)
out.to_json(f"results/text classification/{clf_task}.json", orient="records", lines=True)

In [11]:
out.task.value_counts()

SingleClf    1725
SelectOne    1000
BatchClf      500
SelectAll     500
Name: task, dtype: int64

In [12]:
out.copy()[(out.taskIndex == 1) & (out.taskSize <= 3)]

Unnamed: 0,taskIndex,prompt,answer,targetLabel,task,#shot,CoT,taskSize
0,1,"Compare text A with text B and determine if text A is a paraphrase of text B. Respond with 'Yes' if text A is a paraphrase, and 'No' if it is not....",Yes,,SingleClf,0,False,1
1725,1,Compare text A with text B for the following 3 text pairs and determine if text A is a paraphrase of text B line by line. Respond with 'Yes' if te...,"[No, Yes, Yes]",,BatchClf,0,False,3
2225,1,Go over the 3 text pairs below and list the index numbers of the text pairs where text A isn't a paraphrase of text B according to the following i...,{1},No,SelectOne,0,False,3
2226,1,Go over the 3 text pairs below and list the index numbers of the text pairs where text A is a paraphrase of text B according to the following inst...,"{2, 3}",Yes,SelectOne,0,False,3
3225,1,"Go over the 3 text pairs below. First, list the index numbers of the text pairs that contain paraphrases. Then, list the index numbers of the text...","{'no': {1}, 'yes': {2, 3}}",,SelectAll,0,False,3
