In [1]:
import os
import pandas as pd

os.chdir("../../../")
from scripts.utils import save_obj_as_pickle, read_obj_from_pickle
from scripts.data import make_prompts_for_clf, make_exemplars_for_clf

pd.set_option("display.max_colwidth", 150)

In [2]:
clf_task = "CoLA"
database = read_obj_from_pickle(f"data/databases/text classification/{clf_task}.pkl")

Read object from data/databases/text classification/CoLA.pkl


In [3]:
database["promptTemplates"] = dict()
database["promptTemplates"]["2-shot"] = dict()

SingleClf_exemplar = "Text: $text\nGrammatical acceptability: $answer"
BatchClf_exemplar = "Texts, one per line:\n\n$texts\n\nGrammatical acceptabilities for each of the following lines of text, one per line:\n$answer"
SelectOne_exemplar = "Texts, one per line:\n\n$texts\n\nJSON output:\n{'$acceptability': $answer}"
SelectAll_exemplar = "Texts, one per line:\n\n$texts\n\nJSON output:\n$answer"

exemplar_templates = {"SingleClf": SingleClf_exemplar,
                      "BatchClf": BatchClf_exemplar,
                      "SelectOne": SelectOne_exemplar,
                      "SelectAll": SelectAll_exemplar}

SingleClf = "Indicate the grammatical acceptability for the following line of text. " \
            "The acceptability shall be either 'Acceptable' or 'Unacceptable.'\n\n$exemplars\n\n" \
            "Text: $text\nGrammatical acceptability:"

BatchClf = "Indicate the grammatical acceptabilities for each of the $num following lines of text. " \
           "The acceptability shall be either 'Acceptable' or 'Unacceptable.'\n\n$exemplars\n\nTexts, one per line:\n\n" \
           "$texts\n\nGrammatical acceptabilities for each of the $num lines of text, one per line:\n"

SelectOne = "Go over the $num lines of text below and list the index numbers of the lines that are grammatically $acceptability according to the following instructions:\n" \
            "If none of the texts are grammatically $acceptability, write 'None.'\n" \
            "If all the texts are grammatically $acceptability, write 'All.'\n" \
            "Otherwise, provide the index numbers for each grammatically $acceptability text.\n\n" \
            "Output your responses in JSON format with the key '$acceptability'.\nA formatted example output is provided below.\n" \
            "{'$acceptability': [None/All or index numbers of $acceptability sentences]}\n\n$exemplars\n\n" \
            "Texts, one per line:\n\n$texts\n\n" \
            "JSON output:\n"

SelectAll = "Go over the $num lines of text below. First, list the index numbers of the lines that are grammatically acceptable. " \
            "Then, list the index numbers of the lines that are grammatically unacceptable.\n" \
            "If none of the sentences show a particular acceptability, write 'None.'\n" \
            "If all the sentences show a particular acceptability, write 'All.'\n" \
            "Otherwise, provide the index numbers of the texts that fit a particular category.\n" \
            "Output your responses in JSON format with two keys 'acceptable' and 'unacceptable.'\nA formatted example output is provided below. \n" \
            "{'acceptable': [None/All or index numbers of acceptable texts], 'unacceptable': [None/All or index numbers of unacceptable texts]}" \
            "\n\n$exemplars\n\nTexts, one per line:\n\n$texts\n\n" \
            "JSON output:\n" 
                                  
tasks = ["SingleClf", "BatchClf", "SelectOne", "SelectAll"]
promptTemplates = [SingleClf, BatchClf, SelectOne, SelectAll]


num_shot, taskSize = 2, 5
for task, tmp in zip(tasks, promptTemplates):
    exemplars = make_exemplars_for_clf(database, task, exemplar_templates, 
                                       num_shot=num_shot, taskSize=taskSize, attr="acceptability", 
                                       label_attr_converter=None)
    database["promptTemplates"]["2-shot"][task] = tmp.replace("$exemplars", exemplars)
    print(tmp.replace("$exemplars", exemplars))
    print("-" * 100 + "\n")

save_obj_as_pickle(database, f"data/databases/text classification/{clf_task}.pkl")

Indicate the grammatical acceptability for the following line of text. The acceptability shall be either 'Acceptable' or 'Unacceptable.'

Text: Our friends won't buy this analysis, let alone the next one we propose.
Grammatical acceptability: Acceptable

Text: One more pseudo generalization and I'm giving up.
Grammatical acceptability: Acceptable

Text: $text
Grammatical acceptability:
----------------------------------------------------------------------------------------------------

Indicate the grammatical acceptabilities for each of the $num following lines of text. The acceptability shall be either 'Acceptable' or 'Unacceptable.'

Texts, one per line:

1. He turned from a prince.
2. Shaving myself is difficult for me.
3. John inquired which book he should read.
4. John met Mary in Vienna.
5. Harry has claimed but I do not believe that Melvin is a Communist.

Grammatical acceptabilities for each of the following lines of text, one per line:
Unacceptable
Acceptable
Acceptable
Accep

In [4]:
out = []
num_instance = 100
tasks = ["SingleClf", "BatchClf", "SelectOne", "SelectAll"]

taskSizes = [5]
for propmtMode in ["2-shot"]:
    for task in tasks:

        if task == "SingleClf":
            out.append(make_prompts_for_clf(database, task, "test", propmtMode))
            continue

        for taskSize in taskSizes:
            out.append(make_prompts_for_clf(database, task, "test", propmtMode, taskSize, attr="acceptability", 
                                            label_attr_converter=None, num_instance=num_instance))

out = pd.concat(out)
out.reset_index(drop=True, inplace=True)

In [5]:
os.makedirs("results/text classification/", exist_ok=True)
out.to_json(f"results/text classification/{clf_task}_2_shot.json", orient="records", lines=True)                                             

In [6]:
out.task.value_counts()

SingleClf    1043
SelectOne     200
BatchClf      100
SelectAll     100
Name: task, dtype: int64

In [7]:
out.copy()[(out.taskIndex == 1) & (out.taskSize <= 5)]

Unnamed: 0,taskIndex,prompt,answer,targetLabel,task,#shot,CoT,taskSize
0,1,Indicate the grammatical acceptability for the following line of text. The acceptability shall be either 'Acceptable' or 'Unacceptable.'\n\nText: ...,Acceptable,,SingleClf,2,False,1
1043,1,Indicate the grammatical acceptabilities for each of the 5 following lines of text. The acceptability shall be either 'Acceptable' or 'Unacceptabl...,"[Unacceptable, Acceptable, Unacceptable, Acceptable, Unacceptable]",,BatchClf,2,False,5
1143,1,Go over the 5 lines of text below and list the index numbers of the lines that are grammatically acceptable according to the following instruction...,"{2, 4}",Acceptable,SelectOne,2,False,5
1144,1,Go over the 5 lines of text below and list the index numbers of the lines that are grammatically unacceptable according to the following instructi...,"{1, 3, 5}",Unacceptable,SelectOne,2,False,5
1343,1,"Go over the 5 lines of text below. First, list the index numbers of the lines that are grammatically acceptable. Then, list the index numbers of t...","{'acceptable': {2, 4}, 'unacceptable': {1, 3, 5}}",,SelectAll,2,False,5
