In [1]:
import os
import pandas as pd

os.chdir("../../")
from scripts.llm import get_completion
from scripts.utils import save_obj_as_pickle, read_obj_from_pickle
from scripts.data import make_database, make_prompts_for_clf

pd.set_option("display.max_colwidth", 150)

In [2]:
def loadData(fp):
    df = pd.read_table(fp, header=None, names=["label", "text"])
    df.label = df.label.map({0: "Negative", 1: "Positive"})
    return df

clf_task = "SST-2"
test_fp = f"data/raw/text classification/{clf_task}/test.tsv"
test_df = loadData(test_fp)

dev_fp = f"data/raw/text classification/{clf_task}/dev.tsv"
dev_df = loadData(dev_fp)

test_df.head()

Unnamed: 0,label,text
0,Negative,"no movement , no yuks , not much of anything ."
1,Negative,"a gob of drivel so sickly sweet , even the eager consumers of moore 's pasteurized ditties will retch it up like rancid crème brûlée ."
2,Negative,"gangs of new york is an unapologetic mess , whose only saving grace is that it ends by blowing just about everything up ."
3,Negative,"we never really feel involved with the story , as all of its ideas remain just that : abstract ideas ."
4,Positive,this is one of polanski 's best films .


### Database

Sources for the prompt data

In [3]:
# num_instance: Number of instances to compose multi-problem prompts. Each instance contains multiple problems
# max_instance_size: Maximum number of problems sampled from the benchmark dataset to compose an instance 
# dev_df can be used for purposes such as testing the prompts or for generating exemplars 
database = make_database(test_df, dev_df, num_instance=100, max_instance_size=100)
database.keys()

dict_keys(['num_instance', 'max_instance_size', 'labels', 'testData', 'testInstances', 'devData', 'devInstances'])

#### 0-shot

In [4]:
database["promptTemplates"] = dict()
database["promptTemplates"]["0-shot"] = dict()

SingleClf = "Indicate the sentiment for the following line of text. " \
            "The sentiment shall be either 'Positive' or 'Negative.'\n\n" \
            "Text: $text\nSentiment: "

BatchClf = "Indicate the sentiment for each of the $num following lines of text. " \
           "The sentiment shall be either 'Positive' or 'Negative.'\n\nTexts, one per line:\n\n" \
           "$texts\n\nThe sentiments for each of the $num lines of text, one per line:\n"

SelectOne = "Go over the $num lines of text below and list the index numbers of the lines with $polarity sentiment according to the following instructions:\n" \
            "If none of the texts show $polarity sentiment, write 'None.'\n" \
            "If all the texts show $polarity sentiment, write 'All.'\n" \
            "Otherwise, provide the index numbers for each text with $polarity sentiment.\n\n" \
            "Output your responses in JSON format with the key '$polarity'.\nA formatted example output is provided below.\n" \
            "{'$polarity': [None/All or index numbers for the texts with $polarity sentiment]}\n\n" \
            "Texts, one per line:\n\n$texts\n\n" \
            "JSON output:\n"

SelectAll = "Go over the $num lines of text below. First, list the index numbers of the lines with positive sentiment. " \
            "Then, list the index numbers of the lines with negative sentiment.\n" \
            "If none of the texts show a particular sentiment, write 'None.'\n" \
            "If all the texts show a particular sentiment, write 'All.'\n" \
            "Otherwise, provide the index numbers of the texts that fit a particular category.\n\n" \
            "Output your responses in JSON format with two keys: 'positive' and 'negative.'\nA formatted example output is provided below.\n" \
            "{'positive': [None/All or index numbers of positive sentences], 'negative': [None/All or index numbers of negative sentences]}\n\n" \
            "Texts, one per line:\n\n$texts\n\n" \
            "JSON output:\n"

tasks = ["SingleClf", "BatchClf", "SelectOne", "SelectAll"]
promptTemplates = [SingleClf, BatchClf, SelectOne, SelectAll]

for task, tmp in zip(tasks, promptTemplates):
    database["promptTemplates"]["0-shot"][task] = tmp

In [5]:
os.makedirs("data/databases/text classification/", exist_ok=True)
save_obj_as_pickle(database, f"data/databases/text classification/{clf_task}.pkl")

Saved object to data/databases/text classification/SST-2.pkl


#### Test Prompts

- The main purpose is to check if LLMs can output the desired formats given the prompts 

In [7]:
dev = []
num_instance = 2

taskSizes = [5, 10]
for propmtMode in ["0-shot"]:
    for task in tasks:
        if task == "SingleClf":
            dev.append(make_prompts_for_clf(database, task, "dev", propmtMode)[:num_instance])
            continue

        for taskSize in taskSizes:  
            dev.append(make_prompts_for_clf(database, task, "dev", propmtMode, taskSize, attr="polarity", 
                                            label_attr_converter=None, num_instance=num_instance))

dev = pd.concat(dev).reset_index(drop=True)

In [8]:
for p in dev[(dev["taskSize"] <= 5) & (dev["taskIndex"] == 1)].prompt:
    print(p)
    print("-"*50)
    print()

Indicate the sentiment for the following line of text. The sentiment shall be either 'Positive' or 'Negative.'

Text: one long string of cliches .
Sentiment: 
--------------------------------------------------

Indicate the sentiment for each of the 5 following lines of text. The sentiment shall be either 'Positive' or 'Negative.'

Texts, one per line:

1. generally , clockstoppers will fulfill your wildest fantasies about being a different kind of time traveler , while happily killing 94 minutes .
2. it 's so mediocre , despite the dynamic duo on the marquee , that we just ca n't get no satisfaction .
3. the only excitement comes when the credits finally roll and you get to leave the theater .
4. a delightful coming-of-age story .
5. ... designed to provide a mix of smiles and tears , `` crossroads '' instead provokes a handful of unintentional howlers and numerous yawns .

The sentiments for each of the 5 lines of text, one per line:

-------------------------------------------------

In [9]:
dev["preds"] = dev.prompt.apply(get_completion)
dev

Unnamed: 0,taskIndex,prompt,answer,targetLabel,task,#shot,CoT,taskSize,preds
0,1,Indicate the sentiment for the following line of text. The sentiment shall be either 'Positive' or 'Negative.'\n\nText: one long string of cliches...,Negative,,SingleClf,0,False,1,Negative
1,2,Indicate the sentiment for the following line of text. The sentiment shall be either 'Positive' or 'Negative.'\n\nText: if you 've ever entertaine...,Negative,,SingleClf,0,False,1,Negative
2,1,"Indicate the sentiment for each of the 5 following lines of text. The sentiment shall be either 'Positive' or 'Negative.'\n\nTexts, one per line:\...","[Positive, Negative, Negative, Positive, Negative]",,BatchClf,0,False,5,Positive\nNegative\nNegative\nPositive\nNegative
3,2,"Indicate the sentiment for each of the 5 following lines of text. The sentiment shall be either 'Positive' or 'Negative.'\n\nTexts, one per line:\...","[Negative, Negative, Negative, Negative, Negative]",,BatchClf,0,False,5,1. Negative\n2. Negative\n3. Negative\n4. Negative\n5. Negative
4,1,"Indicate the sentiment for each of the 10 following lines of text. The sentiment shall be either 'Positive' or 'Negative.'\n\nTexts, one per line:...","[Positive, Negative, Negative, Positive, Negative, Negative, Negative, Positive, Positive, Positive]",,BatchClf,0,False,10,1. Positive\n2. Negative\n3. Negative\n4. Positive\n5. Negative\n6. Negative\n7. Negative\n8. Positive\n9. Positive\n10. Negative
5,2,"Indicate the sentiment for each of the 10 following lines of text. The sentiment shall be either 'Positive' or 'Negative.'\n\nTexts, one per line:...","[Negative, Negative, Negative, Negative, Negative, Positive, Negative, Negative, Positive, Negative]",,BatchClf,0,False,10,Negative\nNegative\nNegative\nNegative\nNegative\nPositive\nNegative\nNegative\nPositive\nNegative
6,1,Go over the 5 lines of text below and list the index numbers of the lines with negative sentiment according to the following instructions:\nIf non...,"{2, 3, 5}",Negative,SelectOne,0,False,5,"{'negative': [2, 5]}"
7,1,Go over the 5 lines of text below and list the index numbers of the lines with positive sentiment according to the following instructions:\nIf non...,"{1, 4}",Positive,SelectOne,0,False,5,{'positive': [4]}
8,2,Go over the 5 lines of text below and list the index numbers of the lines with negative sentiment according to the following instructions:\nIf non...,{All},Negative,SelectOne,0,False,5,"{'negative': [2, 3, 4]}"
9,2,Go over the 5 lines of text below and list the index numbers of the lines with positive sentiment according to the following instructions:\nIf non...,{None},Positive,SelectOne,0,False,5,{'positive': [None]}


### Make prompts

In [10]:
database = read_obj_from_pickle(f"data/databases/text classification/{clf_task}.pkl")

Read object from data/databases/text classification/SST-2.pkl


In [11]:
out = []
num_instance = 100

taskSizes = [5, 10, 20, 50, 100]
for propmtMode in ["0-shot"]:
    for task in tasks:

        if task == "SingleClf":
            out.append(make_prompts_for_clf(database, task, "test", propmtMode))
            continue

        for taskSize in taskSizes:
            out.append(make_prompts_for_clf(database, task, "test", propmtMode, taskSize, attr="polarity", 
                                            label_attr_converter=None, num_instance=num_instance))

out = pd.concat(out)
out.reset_index(drop=True, inplace=True)

os.makedirs("results/text classification/", exist_ok=True)
out.to_json(f"results/text classification/{clf_task}.json", orient="records", lines=True)    

In [12]:
out.task.value_counts()

SingleClf    1821
SelectOne    1000
BatchClf      500
SelectAll     500
Name: task, dtype: int64

In [13]:
out.copy()[(out.taskIndex == 1) & (out.taskSize <= 5)]

Unnamed: 0,taskIndex,prompt,answer,targetLabel,task,#shot,CoT,taskSize
0,1,"Indicate the sentiment for the following line of text. The sentiment shall be either 'Positive' or 'Negative.'\n\nText: no movement , no yuks , no...",Negative,,SingleClf,0,False,1
1821,1,"Indicate the sentiment for each of the 5 following lines of text. The sentiment shall be either 'Positive' or 'Negative.'\n\nTexts, one per line:\...","[Positive, Negative, Negative, Negative, Positive]",,BatchClf,0,False,5
2321,1,Go over the 5 lines of text below and list the index numbers of the lines with negative sentiment according to the following instructions:\nIf non...,"{2, 3, 4}",Negative,SelectOne,0,False,5
2322,1,Go over the 5 lines of text below and list the index numbers of the lines with positive sentiment according to the following instructions:\nIf non...,"{1, 5}",Positive,SelectOne,0,False,5
3321,1,"Go over the 5 lines of text below. First, list the index numbers of the lines with positive sentiment. Then, list the index numbers of the lines w...","{'negative': {2, 3, 4}, 'positive': {1, 5}}",,SelectAll,0,False,5
