In [1]:
import os
import pandas as pd

os.chdir("../../")
from scripts.llm import get_completion
from scripts.utils import save_obj_as_pickle, read_obj_from_pickle
from scripts.data import make_database, make_prompts_for_clf

pd.set_option("display.max_colwidth", 150)

In [2]:
def loadData(fp):
    df = pd.read_table(fp, header=None, names=["source", "label", "note", "text"])[["label", "text"]]
    df.label = df.label.map({0: "Unacceptable", 1: "Acceptable"})
    return df

clf_task = "CoLA"
# the public CoLA test set is not labelled so we use the dev set as test set
test_fp = f"data/raw/text classification/{clf_task}/dev.tsv"
test_df = loadData(test_fp)

# the dev set can be used to finetune prompts and other hyperameters 
# may also be used to construct few-shot and CoT exemplars
dev_fp = f"data/raw/text classification/{clf_task}/train.tsv"
dev_df = loadData(dev_fp)

test_df.head()

Unnamed: 0,label,text
0,Acceptable,The sailors rode the breeze clear of the rocks.
1,Acceptable,The weights made the rope stretch over the pulley.
2,Acceptable,The mechanical doll wriggled itself loose.
3,Acceptable,"If you had eaten more, you would want less."
4,Unacceptable,"As you eat the most, you want the least."


### Database

Sources for the prompt data

In [3]:
# num_instance: Number of instances to compose multi-problem prompts. Each instance contains multiple problems
# max_instance_size: Maximum number of problems sampled from the benchmark dataset to compose an instance 
# dev_df can be used for purposes such as testing the prompts or for generating exemplars 
database = make_database(test_df, dev_df, num_instance=100, max_instance_size=100)
database.keys()

dict_keys(['num_instance', 'max_instance_size', 'labels', 'testData', 'testInstances', 'devData', 'devInstances'])

#### 0-shot prompts

In [4]:
database["promptTemplates"] = dict()
database["promptTemplates"]["0-shot"] = dict()

SingleClf = "Indicate the grammatical acceptability for the following line of text. " \
            "The acceptability shall be either 'Acceptable' or 'Unacceptable.'\n\n" \
            "Text: $text\nGrammatical acceptability:"

BatchClf = "Indicate the grammatical acceptabilities for each of the $num following lines of text. " \
           "The acceptability shall be either 'Acceptable' or 'Unacceptable.'\n\nTexts, one per line:\n\n" \
           "$texts\n\nGrammatical acceptabilities for each of the $num lines of text, one per line:\n"

SelectOne = "Go over the $num lines of text below and list the index numbers of the lines that are grammatically $acceptability according to the following instructions:\n" \
            "If none of the texts are grammatically $acceptability, write 'None.'\n" \
            "If all the texts are grammatically $acceptability, write 'All.'\n" \
            "Otherwise, provide the index numbers for each grammatically $acceptability text.\n\n" \
            "Output your responses in JSON format with the key '$acceptability'.\nA formatted example output is provided below.\n" \
            "{'$acceptability': [None/All or index numbers of $acceptability sentences]}\n\n" \
            "Texts, one per line:\n\n$texts\n\n" \
            "JSON output:\n"

SelectAll = "Go over the $num lines of text below. First, list the index numbers of the lines that are grammatically acceptable. " \
            "Then, list the index numbers of the lines that are grammatically unacceptable.\n" \
            "If none of the sentences show a particular acceptability, write 'None.'\n" \
            "If all the sentences show a particular acceptability, write 'All.'\n" \
            "Otherwise, provide the index numbers of the texts that fit a particular category.\n" \
            "Output your responses in JSON format with two keys 'acceptable' and 'unacceptable.'\nA formatted example output is provided below. \n" \
            "{'acceptable': [None/All or index numbers of acceptable texts], 'unacceptable': [None/All or index numbers of unacceptable texts]}" \
            "\n\nTexts, one per line:\n\n$texts\n\n" \
            "JSON output:\n" 
                                  
tasks = ["SingleClf", "BatchClf", "SelectOne", "SelectAll"]
promptTemplates = [SingleClf, BatchClf, SelectOne, SelectAll]

for task, tmp in zip(tasks, promptTemplates):
    database["promptTemplates"]["0-shot"][task] = tmp

In [5]:
os.makedirs("data/databases/text classification/", exist_ok=True)
save_obj_as_pickle(database, f"data/databases/text classification/{clf_task}.pkl")

Saved object to data/databases/text classification/CoLA.pkl


#### Test Prompts

- The main purpose is to check if LLMs can output the desired formats given the prompts 

In [6]:
dev = []
num_instance = 2

taskSizes = [5, 10]
for propmtMode in ["0-shot"]:
    for task in tasks:
        if task == "SingleClf":
            dev.append(make_prompts_for_clf(database, task, "dev", propmtMode)[:num_instance])
            continue

        for taskSize in taskSizes:  
            dev.append(make_prompts_for_clf(database, task, "dev", propmtMode, taskSize, attr="acceptability", 
                                            label_attr_converter=None, num_instance=num_instance))

dev = pd.concat(dev).reset_index(drop=True)

In [7]:
for p in dev[(dev["taskSize"] <= 5) & (dev["taskIndex"] == 1)].prompt:
    print(p)
    print("-"*50)
    print()

Indicate the grammatical acceptability for the following line of text. The acceptability shall be either 'Acceptable' or 'Unacceptable.'

Text: Our friends won't buy this analysis, let alone the next one we propose.
Grammatical acceptability:
--------------------------------------------------

Indicate the grammatical acceptabilities for each of the 5 following lines of text. The acceptability shall be either 'Acceptable' or 'Unacceptable.'

Texts, one per line:

1. He turned from a prince.
2. Shaving myself is difficult for me.
3. John inquired which book he should read.
4. John met Mary in Vienna.
5. Harry has claimed but I do not believe that Melvin is a Communist.

Grammatical acceptabilities for each of the 5 lines of text, one per line:

--------------------------------------------------

Go over the 5 lines of text below and list the index numbers of the lines that are grammatically acceptable according to the following instructions:
If none of the texts are grammatically accept

In [8]:
dev["preds"] = dev.prompt.apply(get_completion)
dev

Unnamed: 0,taskIndex,prompt,answer,targetLabel,task,#shot,CoT,taskSize,preds
0,1,Indicate the grammatical acceptability for the following line of text. The acceptability shall be either 'Acceptable' or 'Unacceptable.'\n\nText: ...,Acceptable,,SingleClf,0,False,1,Acceptable
1,2,Indicate the grammatical acceptability for the following line of text. The acceptability shall be either 'Acceptable' or 'Unacceptable.'\n\nText: ...,Acceptable,,SingleClf,0,False,1,Acceptable
2,1,Indicate the grammatical acceptabilities for each of the 5 following lines of text. The acceptability shall be either 'Acceptable' or 'Unacceptabl...,"[Unacceptable, Acceptable, Acceptable, Acceptable, Acceptable]",,BatchClf,0,False,5,1. Acceptable\n2. Unacceptable\n3. Acceptable\n4. Acceptable\n5. Acceptable
3,2,Indicate the grammatical acceptabilities for each of the 5 following lines of text. The acceptability shall be either 'Acceptable' or 'Unacceptabl...,"[Acceptable, Acceptable, Unacceptable, Acceptable, Acceptable]",,BatchClf,0,False,5,1. Acceptable\n2. Unacceptable\n3. Unacceptable\n4. Unacceptable\n5. Acceptable
4,1,Indicate the grammatical acceptabilities for each of the 10 following lines of text. The acceptability shall be either 'Acceptable' or 'Unacceptab...,"[Unacceptable, Acceptable, Acceptable, Acceptable, Acceptable, Acceptable, Acceptable, Acceptable, Acceptable, Acceptable]",,BatchClf,0,False,10,1. Acceptable\n2. Acceptable\n3. Acceptable\n4. Acceptable\n5. Acceptable\n6. Acceptable\n7. Acceptable\n8. Acceptable\n9. Acceptable\n10. Acceptable
5,2,Indicate the grammatical acceptabilities for each of the 10 following lines of text. The acceptability shall be either 'Acceptable' or 'Unacceptab...,"[Acceptable, Acceptable, Unacceptable, Acceptable, Acceptable, Acceptable, Acceptable, Acceptable, Acceptable, Acceptable]",,BatchClf,0,False,10,1. Acceptable\n2. Unacceptable\n3. Unacceptable\n4. Unacceptable\n5. Acceptable\n6. Acceptable\n7. Acceptable\n8. Acceptable\n9. Acceptable\n10. A...
6,1,Go over the 5 lines of text below and list the index numbers of the lines that are grammatically acceptable according to the following instruction...,"{2, 3, 4, 5}",Acceptable,SelectOne,0,False,5,"{'acceptable': [2, 3, 4]}"
7,1,Go over the 5 lines of text below and list the index numbers of the lines that are grammatically unacceptable according to the following instructi...,{1},Unacceptable,SelectOne,0,False,5,{'unacceptable': [2]}
8,2,Go over the 5 lines of text below and list the index numbers of the lines that are grammatically acceptable according to the following instruction...,"{1, 2, 4, 5}",Acceptable,SelectOne,0,False,5,{'acceptable': [5]}
9,2,Go over the 5 lines of text below and list the index numbers of the lines that are grammatically unacceptable according to the following instructi...,{3},Unacceptable,SelectOne,0,False,5,"{'unacceptable': [2, 3, 4]}"


### Make prompts

In [9]:
clf_task = "CoLA"
database = read_obj_from_pickle(f"data/databases/text classification/{clf_task}.pkl")

Read object from data/databases/text classification/CoLA.pkl


In [10]:
out = []
num_instance = 100
tasks = ["SingleClf", "BatchClf", "SelectOne", "SelectAll"]

taskSizes = [5, 10, 20, 50, 100]
for propmtMode in ["0-shot"]:
    for task in tasks:

        if task == "SingleClf":
            out.append(make_prompts_for_clf(database, task, "test", propmtMode))
            continue

        for taskSize in taskSizes:
            out.append(make_prompts_for_clf(database, task, "test", propmtMode, taskSize, attr="acceptability", 
                                            label_attr_converter=None, num_instance=num_instance))

out = pd.concat(out)
out.reset_index(drop=True, inplace=True)

os.makedirs("results/text classification/", exist_ok=True)
out.to_json(f"results/text classification/{clf_task}.json", orient="records", lines=True)                                             

In [11]:
out.task.value_counts()

SingleClf    1043
SelectOne    1000
BatchClf      500
SelectAll     500
Name: task, dtype: int64

In [12]:
out.copy()[(out.taskIndex == 1) & (out.taskSize <= 5)]

Unnamed: 0,taskIndex,prompt,answer,targetLabel,task,#shot,CoT,taskSize
0,1,Indicate the grammatical acceptability for the following line of text. The acceptability shall be either 'Acceptable' or 'Unacceptable.'\n\nText: ...,Acceptable,,SingleClf,0,False,1
1043,1,Indicate the grammatical acceptabilities for each of the 5 following lines of text. The acceptability shall be either 'Acceptable' or 'Unacceptabl...,"[Unacceptable, Acceptable, Unacceptable, Acceptable, Unacceptable]",,BatchClf,0,False,5
1543,1,Go over the 5 lines of text below and list the index numbers of the lines that are grammatically acceptable according to the following instruction...,"{2, 4}",Acceptable,SelectOne,0,False,5
1544,1,Go over the 5 lines of text below and list the index numbers of the lines that are grammatically unacceptable according to the following instructi...,"{1, 3, 5}",Unacceptable,SelectOne,0,False,5
2543,1,"Go over the 5 lines of text below. First, list the index numbers of the lines that are grammatically acceptable. Then, list the index numbers of t...","{'acceptable': {2, 4}, 'unacceptable': {1, 3, 5}}",,SelectAll,0,False,5
