In [1]:
import os
import pandas as pd

os.chdir("../../../")
from scripts.llm import get_completion
from scripts.utils import save_obj_as_pickle, read_obj_from_pickle
from scripts.data import make_database, make_prompts_for_clf

pd.set_option("display.max_colwidth", 150)

In [2]:
def loadData(fp):
    df = pd.read_table(fp, header=None, names=["label", "text"])
    df.label = df.label.map({0: "Negative", 1: "Positive"})
    return df

clf_task = "SST-2"
test_fp = f"data/raw/text classification/{clf_task}/test.tsv"
test_df = loadData(test_fp)

dev_fp = f"data/raw/text classification/{clf_task}/dev.tsv"
dev_df = loadData(dev_fp)

test_df.head()

Unnamed: 0,label,text
0,Negative,"no movement , no yuks , not much of anything ."
1,Negative,"a gob of drivel so sickly sweet , even the eager consumers of moore 's pasteurized ditties will retch it up like rancid crème brûlée ."
2,Negative,"gangs of new york is an unapologetic mess , whose only saving grace is that it ends by blowing just about everything up ."
3,Negative,"we never really feel involved with the story , as all of its ideas remain just that : abstract ideas ."
4,Positive,this is one of polanski 's best films .


In [3]:
def construct_pairs(df, num_pairs=1000):
    out = []
    seen = set()

    for _ in range(num_pairs):

        while True:
            i, j = df.sample(2).index
            key = tuple(sorted([i, j]))

            if key in seen:
                continue

            seen.add(key)
            text_pair = "Text A: " + df.loc[i].text + "\nText B: " + df.loc[j].text + "\n"
            label_pair = [df.loc[i].label, df.loc[j].label]

            label = "Yes" if len(set(label_pair)) == 1 else "No"
            out.append([text_pair, label, label_pair])
            break

    return pd.DataFrame(out, columns=["text", "label", "originalLabels"])

In [4]:
new_test_df = construct_pairs(test_df, num_pairs=1000)
new_dev_df = construct_pairs(dev_df, num_pairs=1000)

In [5]:
for ix in new_test_df.sample(5).index:
    t = new_test_df.loc[ix, "text"]
    l = new_test_df.loc[ix, "label"]
    print(t[:-1])
    print(l)
    print("-"*100)

Text A: the new guy does have a heart .
Text B: guillen rarely gets beneath the surface of things .
No
----------------------------------------------------------------------------------------------------
Text A: binoche and magimel are perfect in these roles .
Text B: all the well-meaningness in the world ca n't erase the fact that the believer feels like a 12-step program for the jewish nazi .
No
----------------------------------------------------------------------------------------------------
Text A: between bedroom scenes , viewers may find themselves wishing they could roll over and take a nap .
Text B: good for a few unintentional laughs , `` extreme ops '' was obviously made for the `` xxx '' crowd , people who enjoy mindless action without the benefit of decent acting , writing , and direction .
Yes
----------------------------------------------------------------------------------------------------
Text A: there is no entry portal in the rules of attraction , and i spent most 

In [6]:
new_test_df.label.value_counts()

No     504
Yes    496
Name: label, dtype: int64

In [12]:
clf_task = "SST-2-inference"
os.makedirs(f"data/raw/text classification/{clf_task}", exist_ok=True)
new_test_df.to_json(f"data/raw/text classification/{clf_task}/test.json", orient="records", lines=True, indent=4)
new_dev_df.to_json(f"data/raw/text classification/{clf_task}/dev.json", orient="records", lines=True, indent=4)

### Database

Sources for the prompt data

In [13]:
database = make_database(new_test_df, new_dev_df, num_instance=100, max_instance_size=100)
database.keys()

dict_keys(['num_instance', 'max_instance_size', 'labels', 'testData', 'testInstances', 'devData', 'devInstances'])

#### 0-shot

In [14]:
database["promptTemplates"] = dict()
database["promptTemplates"]["0-shot"] = dict()


SingleClf = "Compare text A with text B and determine if text A shares the same sentiment with text B. " \
            "The sentiment is either positive or negative. " \
            "Respond with 'Yes' if text A shares the same sentiment with text B, and 'No' if it does not.\n\n" \
            "$text\nAnswer:"

BatchClf = "Compare text A with text B for the following $num text pairs and determine if text A shares the same sentiment with text B line by line. " \
           "The sentiment is either positive or negative. " \
           "Respond with 'Yes' if text A shares the same sentiment with text B, and 'No' if it does not. Provide your answers line by line.\n\n" \
           "$texts\nAnswers:\n" 

SelectOne = "Go over the $num text pairs below and list the index numbers of the text pairs where text A $does the same sentiment with text B according to the following instructions:\n" \
            "If none of the text pairs satisfy this condition, write 'None.'\n" \
            "If all the text pairs satisfy this condition, write 'All.'\n" \
            "Otherwise, provide the index numbers of the text pairs where text A $does the same sentiment with text B.\n\n" \
            "Output your responses in JSON format with the key 'answer'.\nA formatted example output is provided below.\n" \
            "{'answer': [None/All or index numbers of the text pairs where text A $does the same sentiment with text B]}\n\n" \
            "Here are the text pairs:\n\n$texts\n" \
            "JSON output:\n"

SelectAll = "Go over the $num text pairs below. First, list the index numbers of the text pairs where text A shares the same sentiment with text B. " \
            "Then, list the index numbers of the text pairs where text A does not share the same sentiment with text B.\n" \
            "If none of the text pairs satisfy a condition, write 'None.'\n" \
            "If all the text pairs satisfy a condition, write 'All.'\n" \
            "Otherwise, provide the index numbers of the text pairs that satisfy each condition.\n\n" \
            "Output your responses in JSON format with two keys: 'yes' for consistent sentiments and 'no' for inconsistent sentiments." \
            "\nA formatted example output is provided below.\n" \
            "{'yes': [None/All or index numbers of text pairs with consistent sentiments], " \
            "'no': [None/All or index numbers of text pairs with inconsistent sentiments]}" \
            "\n\nHere are the text pairs:\n\n$texts\n" \
            "JSON output:\n" 

tasks = ["SingleClf", "BatchClf", "SelectOne", "SelectAll"]
promptTemplates = [SingleClf, BatchClf, SelectOne, SelectAll]

for task, tmp in zip(tasks, promptTemplates):
    database["promptTemplates"]["0-shot"][task] = tmp

In [15]:
os.makedirs("data/databases/text classification/", exist_ok=True)
save_obj_as_pickle(database, f"data/databases/text classification/{clf_task}.pkl")

Saved object to data/databases/text classification/SST-2-inference.pkl


#### Test Prompts

- The main purpose is to check if LLMs can output the desired formats given the prompts 

In [16]:
dev = []
num_instance = 2
label_attr_converter = lambda t: {"Yes": "shares", "No": "does not share"}[t]
taskSizes = [3, 5]
for propmtMode in ["0-shot"]:
    for task in tasks:
        if task == "SingleClf":
            dev.append(make_prompts_for_clf(database, task, "dev", propmtMode)[:num_instance])
            continue

        for taskSize in taskSizes:  
            dev.append(make_prompts_for_clf(database, task, "dev", propmtMode, taskSize, attr="does", 
                                            label_attr_converter=label_attr_converter, num_instance=num_instance))

dev = pd.concat(dev).reset_index(drop=True)

In [17]:
for p in dev[(dev["taskSize"] <= 3) & (dev["taskIndex"] == 1)].prompt:
    print(p)
    print("-"*50)
    print()

Compare text A with text B and determine if text A shares the same sentiment with text B. The sentiment is either positive or negative. Respond with 'Yes' if text A shares the same sentiment with text B, and 'No' if it does not.

Text A: -lrb- w -rrb- hile long on amiable monkeys and worthy environmentalism , jane goodall 's wild chimpanzees is short on the thrills the oversize medium demands .
Text B: it gets onto the screen just about as much of the novella as one could reasonably expect , and is engrossing and moving in its own right .

Answer:
--------------------------------------------------

Compare text A with text B for the following 3 text pairs and determine if text A shares the same sentiment with text B line by line. The sentiment is either positive or negative. Respond with 'Yes' if text A shares the same sentiment with text B, and 'No' if it does not. Provide your answers line by line.

1. Text A: there 's ... tremendous energy from the cast , a sense of playfulness and 

In [18]:
dev["preds"] = dev.prompt.apply(get_completion)
dev

Unnamed: 0,taskIndex,prompt,answer,targetLabel,task,#shot,CoT,taskSize,preds
0,1,Compare text A with text B and determine if text A shares the same sentiment with text B. The sentiment is either positive or negative. Respond wi...,No,,SingleClf,0,False,1,No
1,2,Compare text A with text B and determine if text A shares the same sentiment with text B. The sentiment is either positive or negative. Respond wi...,No,,SingleClf,0,False,1,No
2,1,Compare text A with text B for the following 3 text pairs and determine if text A shares the same sentiment with text B line by line. The sentimen...,"[No, Yes, No]",,BatchClf,0,False,3,1. No\n2. No\n3. No
3,2,Compare text A with text B for the following 3 text pairs and determine if text A shares the same sentiment with text B line by line. The sentimen...,"[No, Yes, No]",,BatchClf,0,False,3,1. No\n2. No\n3. Yes
4,1,Compare text A with text B for the following 5 text pairs and determine if text A shares the same sentiment with text B line by line. The sentimen...,"[No, Yes, No, Yes, No]",,BatchClf,0,False,5,1. No\n2. No\n3. No\n4. No\n5. Yes
5,2,Compare text A with text B for the following 5 text pairs and determine if text A shares the same sentiment with text B line by line. The sentimen...,"[No, Yes, No, No, No]",,BatchClf,0,False,5,1. No\n2. No\n3. No\n4. No\n5. No
6,1,Go over the 3 text pairs below and list the index numbers of the text pairs where text A does not share the same sentiment with text B according t...,"{1, 3}",No,SelectOne,0,False,3,"{'answer': [1, 2]}"
7,1,Go over the 3 text pairs below and list the index numbers of the text pairs where text A shares the same sentiment with text B according to the fo...,{2},Yes,SelectOne,0,False,3,{'answer': [None]}
8,2,Go over the 3 text pairs below and list the index numbers of the text pairs where text A does not share the same sentiment with text B according t...,"{1, 3}",No,SelectOne,0,False,3,"{'answer': [1, 3]}"
9,2,Go over the 3 text pairs below and list the index numbers of the text pairs where text A shares the same sentiment with text B according to the fo...,{2},Yes,SelectOne,0,False,3,{'answer': [None]}


### Make prompts

In [19]:
clf_task = "SST-2-inference"
database = read_obj_from_pickle(f"data/databases/text classification/{clf_task}.pkl")

Read object from data/databases/text classification/SST-2-inference.pkl


In [20]:
out = []
num_instance = 100
taskSizes = [3, 5, 10, 20, 50]
for propmtMode in ["0-shot"]:
    for task in tasks:

        if task == "SingleClf":
            out.append(make_prompts_for_clf(database, task, "test", propmtMode))
            continue

        for taskSize in taskSizes:
            out.append(make_prompts_for_clf(database, task, "test", propmtMode, taskSize, attr="does", 
                                            label_attr_converter=label_attr_converter, num_instance=num_instance))

out = pd.concat(out)
out.reset_index(drop=True, inplace=True)

os.makedirs("results/text classification/", exist_ok=True)
out.to_json(f"results/text classification/{clf_task}.json", orient="records", lines=True)

In [21]:
out.task.value_counts()

SingleClf    1000
SelectOne    1000
BatchClf      500
SelectAll     500
Name: task, dtype: int64

In [22]:
out.copy()[(out.taskIndex == 1) & (out.taskSize <= 3)]

Unnamed: 0,taskIndex,prompt,answer,targetLabel,task,#shot,CoT,taskSize
0,1,Compare text A with text B and determine if text A shares the same sentiment with text B. The sentiment is either positive or negative. Respond wi...,No,,SingleClf,0,False,1
1000,1,Compare text A with text B for the following 3 text pairs and determine if text A shares the same sentiment with text B line by line. The sentimen...,"[No, No, Yes]",,BatchClf,0,False,3
1500,1,Go over the 3 text pairs below and list the index numbers of the text pairs where text A does not share the same sentiment with text B according t...,"{1, 2}",No,SelectOne,0,False,3
1501,1,Go over the 3 text pairs below and list the index numbers of the text pairs where text A shares the same sentiment with text B according to the fo...,{3},Yes,SelectOne,0,False,3
2500,1,"Go over the 3 text pairs below. First, list the index numbers of the text pairs where text A shares the same sentiment with text B. Then, list the...","{'no': {1, 2}, 'yes': {3}}",,SelectAll,0,False,3
