In [1]:
import os
import pandas as pd

os.chdir("../../")
from scripts.llm import get_num_of_tokens, get_completion
from scripts.utils import save_obj_as_pickle, read_obj_from_pickle
from scripts.data import make_database, make_prompts_for_clf

pd.set_option("display.max_colwidth", 150)

In [2]:
def loadData(fp):
    df = pd.read_table(fp, header=None, names=["label", "text"])
    df.label = df.label.map({0: "Negative", 1: "Positive"})
    return df

clf_task = "SST-2"
test_fp = f"data/raw/text classification/{clf_task}/test.tsv"
test_df = loadData(test_fp)

dev_fp = f"data/raw/text classification/{clf_task}/dev.tsv"
dev_df = loadData(dev_fp)

test_df.head()

Unnamed: 0,label,text
0,Negative,"no movement , no yuks , not much of anything ."
1,Negative,"a gob of drivel so sickly sweet , even the eager consumers of moore 's pasteurized ditties will retch it up like rancid crème brûlée ."
2,Negative,"gangs of new york is an unapologetic mess , whose only saving grace is that it ends by blowing just about everything up ."
3,Negative,"we never really feel involved with the story , as all of its ideas remain just that : abstract ideas ."
4,Positive,this is one of polanski 's best films .


In [3]:
test_df.label.value_counts()

Negative    912
Positive    909
Name: label, dtype: int64

In [4]:
test_df.text.apply(get_num_of_tokens).describe()

count    1821.000000
mean       22.698517
std        10.682467
min         3.000000
25%        14.000000
50%        22.000000
75%        30.000000
max        63.000000
Name: text, dtype: float64

In [5]:
dev_df.label.value_counts()

Positive    444
Negative    428
Name: label, dtype: int64

In [6]:
dev_df.text.apply(get_num_of_tokens).describe()

count    872.000000
mean      23.001147
std       10.516129
min        2.000000
25%       15.000000
50%       22.000000
75%       30.000000
max       58.000000
Name: text, dtype: float64

### Database

Sources for the prompt data

In [8]:
database = make_database(test_df, dev_df, num_instance=500, max_instance_size=500)
database.keys()

dict_keys(['num_instance', 'max_instance_size', 'labels', 'testData', 'testInstances', 'devData', 'devInstances'])

#### Zero-shot

In [4]:
database["promptTemplates"] = dict()
database["promptTemplates"]["zero-shot"] = dict()

single_clf = "Indicate the sentiment for the following line of text. " \
             "The sentiment shall be either 'Positive' or 'Negative.'\n\n" \
             "Text: $text\nSentiment: "

batch_clf = "Indicate the sentiment for each of the $num following lines of text. " \
            "The sentiment shall be either 'Positive' or 'Negative.'\n\nTexts, one per line:\n\n" \
            "$texts\n\nThe sentiments for each of the $num lines of text, one per line:\n"

index_selection_one_cat_a_time = "Go over the $num lines of text below and list the index numbers of the lines with $polarity sentiment according to the following instructions:\n" \
                                 "If none of the texts show $polarity sentiment, write 'None.'\n" \
                                 "If all the texts show $polarity sentiment, write 'All.'\n" \
                                 "Otherwise, provide the index numbers for each text with $polarity sentimen, each on a separate line.\n\n" \
                                 "Texts, one per line:\n\n$texts\n\n" \
                                 "'None,' 'All,' or the index numbers for the texts with $polarity sentiment, one per line:\n"

index_selection_one_cat_a_time_json =   "Go over the $num lines of text below and list the index numbers of the lines with $polarity sentiment according to the following instructions:\n" \
                                        "If none of the texts show $polarity sentiment, write 'None.'\n" \
                                        "If all the texts show $polarity sentiment, write 'All.'\n" \
                                        "Otherwise, provide the index numbers for each text with $polarity sentimen.\n\n" \
                                        "Output your responses in JSON format with the key '$polarity'.\nA formatted example output is provided below.\n" \
                                        "{'$polarity': [None/All or index numbers for the texts with $polarity sentiment]}\n\n" \
                                        "Texts, one per line:\n\n$texts\n\n" \
                                        "JSON output:\n"

index_selection_all_cat_at_once = "Go over the $num lines of text below. First, list the index numbers of the lines with positive sentiment. " \
                                  "Then, list the index numbers of the lines with negative sentiment.\n" \
                                  "If none of the texts show a particular sentiment, write 'None.'\n" \
                                  "If all the texts show a particular sentiment, write 'All.'\n" \
                                  "Otherwise, provide the index numbers of the texts that fit a particular category.\n\n" \
                                  "Texts, one per line:\n\n$texts\n\n" \
                                  "Output your responses in JSON format with two keys: 'positive' and 'negative.'\nA formatted example output is provided below.\n" \
                                  "{'positive': [None/All or index numbers of positive sentences], 'negative': [None/All or index numbers of negative sentences]}"
                                  
index_selection_all_cat_at_once_adjusted =  "Go over the $num lines of text below. First, list the index numbers of the lines with positive sentiment. " \
                                            "Then, list the index numbers of the lines with negative sentiment.\n" \
                                            "If none of the texts show a particular sentiment, write 'None.'\n" \
                                            "If all the texts show a particular sentiment, write 'All.'\n" \
                                            "Otherwise, provide the index numbers of the texts that fit a particular category.\n\n" \
                                            "Output your responses in JSON format with two keys: 'positive' and 'negative.'\nA formatted example output is provided below.\n" \
                                            "{'positive': [None/All or index numbers of positive sentences], 'negative': [None/All or index numbers of negative sentences]}\n\n" \
                                            "Texts, one per line:\n\n$texts\n\n" \
                                            "JSON output:\n"

tasks = ["single_clf", "batch_clf", "index_selection_one_cat_a_time", "index_selection_all_cat_at_once", 
         "index_selection_all_cat_at_once_adjusted", "index_selection_one_cat_a_time_json"]
promptTemplates = [single_clf, batch_clf, index_selection_one_cat_a_time, index_selection_all_cat_at_once, 
                   index_selection_all_cat_at_once_adjusted, index_selection_one_cat_a_time_json]

for task, tmp in zip(tasks, promptTemplates):
    database["promptTemplates"]["zero-shot"][task] = tmp

In [5]:
os.makedirs("data/databases/text classification/", exist_ok=True)
save_obj_as_pickle(database, f"data/databases/text classification/{clf_task}.pkl")

Saved object to data/databases/text classification/SST-2.pkl


#### Test Prompts

- The main purpose is to check if LLMs can output the desired formats given the prompts 

In [8]:
dev = []
num_instance = 2

taskSizes = [5, 10]
for propmtMode in ["zero-shot"]:
    for task in tasks:
        if task == "single_clf":
            dev.append(make_prompts_for_clf(database, task, "dev", propmtMode)[:num_instance])
            continue

        for taskSize in taskSizes:  
            dev.append(make_prompts_for_clf(database, task, "dev", propmtMode, taskSize, attr="polarity", 
                                            label_attr_converter=None, num_instance=num_instance))

dev = pd.concat(dev).reset_index(drop=True)

In [9]:
for p in dev[(dev["taskSize"] <= 5) & (dev["taskIndex"] == 1)].prompt:
    print(p)
    print("-"*50)
    print()

Indicate the sentiment for the following line of text. The sentiment shall be either 'Positive' or 'Negative.'

Text: one long string of cliches .
Sentiment: 
--------------------------------------------------

Indicate the sentiment for each of the 5 following lines of text. The sentiment shall be either 'Positive' or 'Negative.'

Texts, one per line:

1. it 's too self-important and plodding to be funny , and too clipped and abbreviated to be an epic .
2. determined to be fun , and bouncy , with energetic musicals , the humor did n't quite engage this adult .
3. this film seems thirsty for reflection , itself taking on adolescent qualities .
4. if the first men in black was money , the second is small change .
5. the film makes a fatal mistake : it asks us to care about a young man whose only apparent virtue is that he is not quite as unpleasant as some of the people in his life .

The sentiments for each of the 5 lines of text, one per line:

----------------------------------------

In [23]:
dev["preds"] = dev.prompt.apply(get_completion)
dev

Unnamed: 0,taskIndex,prompt,answer,targetLabel,task,#shot,CoT,taskSize,preds
0,1,Indicate the sentiment for the following line of text. The sentiment shall be either 'Positive' or 'Negative.'\n\nText: one long string of cliches...,Negative,,single_clf,0,False,1,Negative
1,2,Indicate the sentiment for the following line of text. The sentiment shall be either 'Positive' or 'Negative.'\n\nText: if you 've ever entertaine...,Negative,,single_clf,0,False,1,Negative
2,1,"Indicate the sentiment for each of the 5 following lines of text. The sentiment shall be either 'Positive' or 'Negative.'\n\nTexts, one per line:\...","[Negative, Negative, Negative, Negative, Negative]",,batch_clf,0,False,5,1. Negative\n2. Negative\n3. Negative\n4. Negative\n5. Negative
3,2,"Indicate the sentiment for each of the 5 following lines of text. The sentiment shall be either 'Positive' or 'Negative.'\n\nTexts, one per line:\...","[Positive, Positive, Negative, Negative, Positive]",,batch_clf,0,False,5,1. Positive\n2. Negative\n3. Negative\n4. Negative\n5. Positive
4,1,"Indicate the sentiment for each of the 10 following lines of text. The sentiment shall be either 'Positive' or 'Negative.'\n\nTexts, one per line:...","[Negative, Negative, Negative, Negative, Negative, Positive, Negative, Positive, Positive, Negative]",,batch_clf,0,False,10,1. Negative\n2. Negative\n3. Negative\n4. Negative\n5. Negative\n6. Positive\n7. Negative\n8. Positive\n9. Positive\n10. Negative
5,2,"Indicate the sentiment for each of the 10 following lines of text. The sentiment shall be either 'Positive' or 'Negative.'\n\nTexts, one per line:...","[Positive, Positive, Negative, Negative, Positive, Negative, Positive, Negative, Negative, Negative]",,batch_clf,0,False,10,1. Positive\n2. Negative\n3. Negative\n4. Negative\n5. Positive\n6. Negative\n7. Positive\n8. Negative\n9. Negative\n10. Negative
6,1,"Go over the 5 lines of text below and list the index numbers of the lines with negative sentiment.\nIf none of the texts show negative sentiment, ...",{All},Negative,index_selection_one_cat_a_time,0,False,5,1\n5
7,1,"Go over the 5 lines of text below and list the index numbers of the lines with positive sentiment.\nIf none of the texts show positive sentiment, ...",{None},Positive,index_selection_one_cat_a_time,0,False,5,None.
8,2,"Go over the 5 lines of text below and list the index numbers of the lines with negative sentiment.\nIf none of the texts show negative sentiment, ...","{3, 4}",Negative,index_selection_one_cat_a_time,0,False,5,2\n3
9,2,"Go over the 5 lines of text below and list the index numbers of the lines with positive sentiment.\nIf none of the texts show positive sentiment, ...","{1, 2, 5}",Positive,index_selection_one_cat_a_time,0,False,5,1\n5


### Adjusted

In [6]:
dev = []
num_instance = 2

taskSizes = [5, 10]
for propmtMode in ["zero-shot"]:
    for task in tasks[-1:]:
        if task == "single_clf":
            dev.append(make_prompts_for_clf(database, task, "dev", propmtMode)[:num_instance])
            continue

        for taskSize in taskSizes:  
            dev.append(make_prompts_for_clf(database, task, "dev", propmtMode, taskSize, attr="polarity", 
                                            label_attr_converter=None, num_instance=num_instance))

dev = pd.concat(dev).reset_index(drop=True)

In [7]:
for p in dev[(dev["taskSize"] <= 5) & (dev["taskIndex"] == 1)].prompt:
    print(p)
    print("-"*50)
    print()

Go over the 5 lines of text below and list the index numbers of the lines with negative sentiment according to the following instructions:
If none of the texts show negative sentiment, write 'None.'
If all the texts show negative sentiment, write 'All.'
Otherwise, provide the index numbers for each text with negative sentimen.

Output your responses in JSON format with the key 'negative'.
A formatted example output is provided below.
{'negative': [None/All or index numbers for the texts with negative sentiment]}

Texts, one per line:

1. it 's too self-important and plodding to be funny , and too clipped and abbreviated to be an epic .
2. determined to be fun , and bouncy , with energetic musicals , the humor did n't quite engage this adult .
3. this film seems thirsty for reflection , itself taking on adolescent qualities .
4. if the first men in black was money , the second is small change .
5. the film makes a fatal mistake : it asks us to care about a young man whose only apparent 

In [8]:
model = "meta-llama/Llama-3-8b-chat-hf"
dev["preds"] = dev.prompt.apply(lambda p: get_completion(p, model))
dev

Unnamed: 0,taskIndex,prompt,answer,targetLabel,task,#shot,CoT,taskSize,preds
0,1,Go over the 5 lines of text below and list the index numbers of the lines with negative sentiment according to the following instructions:\nIf non...,{All},Negative,index_selection_one_cat_a_time_json,0,False,5,"After analyzing the texts, I found that the following lines have negative sentiment:\n\n{'negative': [1, 4, 5]}\n\nThe line with index number 1 ex..."
1,1,Go over the 5 lines of text below and list the index numbers of the lines with positive sentiment according to the following instructions:\nIf non...,{None},Positive,index_selection_one_cat_a_time_json,0,False,5,"After analyzing the texts, I found that none of them show positive sentiment. Therefore, the output is:\n\n{'positive': 'None'}\n\nNote that if an..."
2,2,Go over the 5 lines of text below and list the index numbers of the lines with negative sentiment according to the following instructions:\nIf non...,"{3, 4}",Negative,index_selection_one_cat_a_time_json,0,False,5,"After analyzing the texts, I found that lines 2 and 3 have negative sentiment. Here is the output in JSON format:\n\n{\n""negative"": [2, 3]\n}"
3,2,Go over the 5 lines of text below and list the index numbers of the lines with positive sentiment according to the following instructions:\nIf non...,"{1, 2, 5}",Positive,index_selection_one_cat_a_time_json,0,False,5,"After analyzing the texts, I found that only one text shows positive sentiment. Here is the output in JSON format:\n\n{\n""positive"": [5]\n}\n\nThe..."
4,1,Go over the 10 lines of text below and list the index numbers of the lines with negative sentiment according to the following instructions:\nIf no...,"{1, 2, 3, 4, 5, 7, 10}",Negative,index_selection_one_cat_a_time_json,0,False,10,"After analyzing the texts, I found that the following lines have negative sentiment:\n\n{'negative': [1, 4, 5, 10]}\n\nThe lines with negative sen..."
5,1,Go over the 10 lines of text below and list the index numbers of the lines with positive sentiment according to the following instructions:\nIf no...,"{8, 9, 6}",Positive,index_selection_one_cat_a_time_json,0,False,10,"After analyzing the text, I found that the lines with positive sentiment are:\n\n{'positive': [6, 7, 8, 9]}\n\nThese lines have positive sentiment..."
6,2,Go over the 10 lines of text below and list the index numbers of the lines with negative sentiment according to the following instructions:\nIf no...,"{3, 4, 6, 8, 9, 10}",Negative,index_selection_one_cat_a_time_json,0,False,10,"After analyzing the texts, I found that the following lines have negative sentiment:\n\n{'negative': [2, 3, 4, 6, 8, 9, 10]}\n\nThese lines expres..."
7,2,Go over the 10 lines of text below and list the index numbers of the lines with positive sentiment according to the following instructions:\nIf no...,"{1, 2, 5, 7}",Positive,index_selection_one_cat_a_time_json,0,False,10,"After analyzing the texts, I found that only one line has positive sentiment. Here is the output in JSON format:\n\n{'positive': [5]}"


### Make prompts

In [9]:
database = read_obj_from_pickle(f"data/databases/text classification/{clf_task}.pkl")

Read object from data/databases/text classification/SST-2.pkl


In [11]:
# out = []
# num_instance = 100

# taskSizes = [5, 10, 20, 50, 100]
# for propmtMode in ["zero-shot"]:
#     for task in tasks:

#         if task == "single_clf":
#             out.append(make_prompts_for_clf(database, task, "test", propmtMode))
#             continue

#         for taskSize in taskSizes:
#             out.append(make_prompts_for_clf(database, task, "test", propmtMode, taskSize, attr="polarity", 
#                                             label_attr_converter=None, num_instance=num_instance))

# out = pd.concat(out)
# out.reset_index(drop=True, inplace=True)

In [13]:
fp = f"results/text classification/{clf_task}.json"
out = [pd.read_json(fp, lines=True)]

num_instance = 100
taskSizes = [5, 10, 20, 50, 100]
for propmtMode in ["zero-shot"]:
    for taskSize in taskSizes:
        out.append(make_prompts_for_clf(database, tasks[-1], "test", propmtMode, taskSize, attr="polarity", 
                                        label_attr_converter=None, num_instance=num_instance))

out = pd.concat(out)
out.reset_index(drop=True, inplace=True)

os.makedirs("results/text classification/", exist_ok=True)
out.to_json(f"results/text classification/{clf_task}.json", orient="records", lines=True)                                             

In [14]:
out.task.value_counts()

single_clf                                  1821
index_selection_one_cat_a_time              1000
index_selection_one_cat_a_time_json         1000
batch_clf                                    500
index_selection_all_cat_at_once              500
index_selection_all_cat_at_once_adjusted     500
Name: task, dtype: int64

In [15]:
out.prompt.apply(get_num_of_tokens).describe()

count    5321.000000
mean      687.713776
std       839.563717
min        31.000000
25%        58.000000
50%       320.000000
75%      1224.000000
max      2908.000000
Name: prompt, dtype: float64

In [16]:
sub = out.copy()[(out.taskIndex == 1) & (out.taskSize <= 5)]
sub

Unnamed: 0,taskIndex,prompt,answer,targetLabel,task,#shot,CoT,taskSize,gpt-3.5-turbo-0125-completion,meta-llama/Llama-3-70b-chat-hf-completion,gpt-4-turbo-2024-04-09-completion,mistralai/Mixtral-8x7B-Instruct-v0.1-completion,meta-llama/Llama-3-8b-chat-hf-completion,lmsys/vicuna-13b-v1.5-completion,mistralai/Mistral-7B-Instruct-v0.2-completion
0,1,"Indicate the sentiment for the following line of text. The sentiment shall be either 'Positive' or 'Negative.'\n\nText: no movement , no yuks , no...",Negative,,single_clf,0,False,1,Negative,Sentiment: Negative,Negative,Negative,Sentiment: Negative,Sentiment: Negative,"Negative. The text expresses a lack of excitement or positivity, using phrases like ""no movement"" and ""not much of anything."""
1821,1,"Indicate the sentiment for each of the 5 following lines of text. The sentiment shall be either 'Positive' or 'Negative.'\n\nTexts, one per line:\...","[Negative, Positive, Positive, Positive, Positive]",,batch_clf,0,False,5,1. Negative\n2. Positive\n3. Positive\n4. Negative\n5. Positive,Here are the sentiments for each of the 5 lines of text:\n\n1. Negative\n2. Positive\n3. Positive\n4. Neutral (but leaning towards Positive)\n5. P...,1. Negative\n2. Positive\n3. Positive\n4. Positive\n5. Positive,1. Negative\n2. Positive\n3. Positive\n4. Positive\n5. Positive,Here are the sentiments for each line:\n\n1. Negative\n2. Positive\n3. Positive\n4. Neutral (Note: The sentiment is not strongly positive or negat...,1. Negative\n2. Positive\n3. Positive\n4. Negative\n5. Positive,"1. Neutral: The sentence is stating a fact about the person's behavior, which is neither positive nor negative.\n2. Positive: The sentence expres..."
2321,1,Go over the 5 lines of text below and list the index numbers of the lines with negative sentiment according to the following instructions:\nIf non...,[1],Negative,index_selection_one_cat_a_time,0,False,5,None.,1,1,None.,None.,None.,None.
2322,1,Go over the 5 lines of text below and list the index numbers of the lines with positive sentiment according to the following instructions:\nIf non...,"[2, 3, 4, 5]",Positive,index_selection_one_cat_a_time,0,False,5,2\n3\n5,2\n3\n5,2\n3\n5,2\n3\n5,Here is the list of index numbers for the texts with positive sentiment:\n\n2\n3\n5\n\nThese three lines have positive sentiment.,None.,3. fans of nijinsky will savor every minute of cox 's work.\n5. westfeldt and juergensen exude a chemistry and comfort level that 's both saucy a...
3321,1,"Go over the 5 lines of text below. First, list the index numbers of the lines with positive sentiment. Then, list the index numbers of the lines w...","{'negative': [1], 'positive': [2, 3, 4, 5]}",,index_selection_all_cat_at_once,0,False,5,"{\n ""positive"": [2, 3, 5],\n ""negative"": [1, 4]\n}","Here is the output:\n\n{'positive': [2, 3, 5], 'negative': [1, 4]}\n\nExplanation:\n\n* Lines 2, 3, and 5 have positive sentiment, so their index ...","```json\n{\n ""positive"": [2, 3, 5],\n ""negative"": [1, 4]\n}\n```","{\n""positive"": [2, 5],\n""negative"": [1]\n}","Here is the output in JSON format:\n\n{\n""positive"": [2, 3, 5],\n""negative"": [1]\n}\n\nExplanation:\n\n* Line 1 has a negative sentiment, as it im...","{\n""positive"": [2, 4],\n""negative"": [1, 3, 5]\n}","{'positive': [3, 5], 'negative': [1]}"
3821,1,"Go over the 5 lines of text below. First, list the index numbers of the lines with positive sentiment. Then, list the index numbers of the lines w...","{'negative': [1], 'positive': [2, 3, 4, 5]}",,index_selection_all_cat_at_once_adjusted,0,False,5,,,,,,,
4321,1,Go over the 5 lines of text below and list the index numbers of the lines with negative sentiment according to the following instructions:\nIf non...,{1},Negative,index_selection_one_cat_a_time_json,0,False,5,,,,,,,
4322,1,Go over the 5 lines of text below and list the index numbers of the lines with positive sentiment according to the following instructions:\nIf non...,"{2, 3, 4, 5}",Positive,index_selection_one_cat_a_time_json,0,False,5,,,,,,,


In [17]:
# out["lmsys/vicuna-13b-v1.5-completion"].str.contains("TOO_").sum()

0