In [1]:
import os
import pandas as pd

os.chdir("../../")
from scripts.llm import get_num_of_tokens, get_completion
from scripts.utils import save_obj_as_pickle, read_obj_from_pickle
from scripts.data import make_database, make_prompts_for_clf

pd.set_option("display.max_colwidth", 150)

In [2]:
# both sets have 10,000 text pairs, which is too large. 
def loadData(fp, sample_size=1000):
    df = pd.read_table(fp, header=1, names=["label", "premise", "hypothesis"])
    df = df[df.label != "-"]
    df = df.sample(sample_size, random_state=234)
    df.reset_index(drop=True, inplace=True)
    df.label = df.label.apply(str.capitalize)
    df["text"] = "Premise: " + df.premise + "\nHypothesis: " + df.hypothesis + "\n"
    df = df[['label', 'text']]
    return df

clf_task = "SNLI"
test_fp = f"data/raw/text classification/{clf_task}/test.txt"
test_df = loadData(test_fp)

dev_fp = f"data/raw/text classification/{clf_task}/dev.txt"
dev_df = loadData(dev_fp)

test_df.head()

Unnamed: 0,label,text
0,Neutral,Premise: a matador in white and gold clothing and pink sock is getting hit by a raging bull\nHypothesis: The angry bull is attacking the matador a...
1,Entailment,"Premise: A man wearing a black sweater and a knit cap sits in front of mountain scenery , with a cloudy sky overhead .\nHypothesis: A man is outdo..."
2,Entailment,Premise: Two men are making hand gestures and posing for a picture .\nHypothesis: Two men are ready to have their picture taken .\n
3,Entailment,Premise: Workers are removing ice from a walkway .\nHypothesis: Workers outside on a slippery walkway\n
4,Entailment,Premise: A man riding a motorcycle on a dirt road passing several junk or abandoned cars on the side of the road .\nHypothesis: A man is riding a ...


In [4]:
test_df.label.value_counts()

Entailment       351
Neutral          338
Contradiction    311
Name: label, dtype: int64

In [5]:
test_df.text.apply(get_num_of_tokens).describe()

count    1000.000000
mean       31.753000
std         7.934037
min        16.000000
25%        26.000000
50%        31.000000
75%        36.000000
max        70.000000
Name: text, dtype: float64

In [7]:
dev_df.label.value_counts()

Neutral          362
Entailment       333
Contradiction    305
Name: label, dtype: int64

In [8]:
dev_df.text.apply(get_num_of_tokens).describe()

count    1000.000000
mean       31.996000
std         8.556939
min        15.000000
25%        26.000000
50%        30.000000
75%        37.000000
max        75.000000
Name: text, dtype: float64

### Database

Sources for the prompt data

In [9]:
database = make_database(test_df, dev_df, num_instance=500, max_instance_size=500)
database.keys()

dict_keys(['num_instance', 'max_instance_size', 'labels', 'testData', 'testInstances', 'devData', 'devInstances'])

#### Zero-shot

In [4]:
database["promptTemplates"] = dict()
database["promptTemplates"]["zero-shot"] = dict()


single_clf = "Given the following premise and hypothesis, determine the inference relation between them. " \
             "Respond with 'Entailment' if the hypothesis logically follows from the premise, " \
             "'Contradiction' if they are in direct opposition, and 'Neutral' if neither applies. \n\n" \
             "$text\nInference relation:"

batch_clf = "Given the following $num pairs of premises and hypotheses, determine the inference relation for each pair line by line. " \
            "Respond with 'Entailment' if the hypothesis entails the premise, and 'Contradiction' if they contradict. " \
            "If neither is the case, respond with 'Neutral.' Provide your answers line by line.\n\n" \
            "$texts\nInference relations for the $num text pairs provided above:\n"

index_selection_one_cat_a_time = "Go over the $num text pairs below and list the index numbers of the text pairs where " \
                                 "the inference relation between the premise and the hypothesis is $relationship according to the following instructions:\n" \
                                 "If none of the text pairs contain $relationship inference relation, write 'None.'\n" \
                                 "If all text pairs contain $relationship inference relation, write 'All.'\n" \
                                 "Otherwise, provide the index numbers of the text pairs where the inference relation between the premise " \
                                 "and the hypothesis is $relationship, each on a separate line.\n\n" \
                                 "Here are the text pairs:\n\n$texts\n'None,' 'All,' or the index numbers of the text pairs where the inference relation " \
                                 "between the premise and the hypothesis is $relationship:\n"

index_selection_all_cat_at_once = "Go over the $num text pairs below. " \
                                  "First, list the index numbers of the text pairs that contain entailment inference relation. " \
                                  "Then, select all text pairs that contain contradiction inference relation. " \
                                  "Finally, select all text pairs that contain neutral inference relation.\n" \
                                  "If none of the text pairs satisfy a condition, write 'None.'\n" \
                                  "If all the text pairs belong satisfy a condition, write 'All.'\n" \
                                  "Otherwise, provide the index numbers of the text pairs that satisfy each condition.\n\n" \
                                  "Here are the text pairs:\n\n$texts\n" \
                                  "Output your responses in JSON format with three keys: 'entailment', 'contradiction', and 'neutral'." \
                                  "\nA formatted example output is provided below.\n" \
                                  "{'entailment': [None/All or index numbers of text pairs that contain entailment inference relation], " \
                                  "'contradiction': [None/All or index numbers of text pairs that contain contradiction inference relation], " \
                                  "'neutral': [None/All or index numbers of text pairs that contain neutral inference relation]}"

index_selection_all_cat_at_once_adjusted =  "Go over the $num text pairs below. " \
                                            "First, list the index numbers of the text pairs that contain entailment inference relation. " \
                                            "Then, select all text pairs that contain contradiction inference relation. " \
                                            "Finally, select all text pairs that contain neutral inference relation.\n" \
                                            "If none of the text pairs satisfy a condition, write 'None.'\n" \
                                            "If all the text pairs belong satisfy a condition, write 'All.'\n" \
                                            "Otherwise, provide the index numbers of the text pairs that satisfy each condition.\n\n" \
                                            "Output your responses in JSON format with three keys: 'entailment', 'contradiction', and 'neutral'." \
                                            "\nA formatted example output is provided below.\n" \
                                            "{'entailment': [None/All or index numbers of text pairs that contain entailment inference relation], " \
                                            "'contradiction': [None/All or index numbers of text pairs that contain contradiction inference relation], " \
                                            "'neutral': [None/All or index numbers of text pairs that contain neutral inference relation]}\n\n" \
                                            "Here are the text pairs:\n\n$texts\n" \
                                            "JSON output:\n"
                                            
tasks = ["single_clf", "batch_clf", "index_selection_one_cat_a_time", 
         "index_selection_all_cat_at_once", "index_selection_all_cat_at_once_adjusted"]
promptTemplates = [single_clf, batch_clf, index_selection_one_cat_a_time, 
                   index_selection_all_cat_at_once, index_selection_all_cat_at_once_adjusted]

for task, tmp in zip(tasks, promptTemplates):
    database["promptTemplates"]["zero-shot"][task] = tmp

In [5]:
os.makedirs("data/databases/text classification/", exist_ok=True)
save_obj_as_pickle(database, f"data/databases/text classification/{clf_task}.pkl")

Saved object to data/databases/text classification/SNLI.pkl


#### Test Prompts

- The main purpose is to check if LLMs can output the desired formats given the prompts 

In [25]:
dev = []
num_instance = 2
taskSizes = [3, 5]
for propmtMode in ["zero-shot"]:
    for task in tasks:
        if task == "single_clf":
            dev.append(make_prompts_for_clf(database, task, "dev", propmtMode)[:num_instance])
            continue

        for taskSize in taskSizes:  
            dev.append(make_prompts_for_clf(database, task, "dev", propmtMode, taskSize, attr="relationship", 
                                            label_attr_converter=None, num_instance=num_instance))

dev = pd.concat(dev).reset_index(drop=True)


In [27]:
for p in dev[(dev["taskSize"] <= 3) & (dev["taskIndex"] == 1)].prompt:
    print(p)
    print("-"*50)
    print()

Given the following premise and hypothesis, determine the inference relation between them. Respond with 'Entailment' if the hypothesis logically follows from the premise, 'Contradiction' if they are in direct opposition, and 'Neutral' if neither applies. 

Premise: A man wearing blue jeans and a black shirt is talking to a man wearing white shorts while they are sitting outside in folding chairs on a patio .
Hypothesis: the men are inside the church

Inference relation:
--------------------------------------------------

Given the following 3 pairs of premises and hypotheses, determine the inference relation for each pair line by line. Respond with 'Entailment' if the hypothesis entails the premise, and 'Contradiction' if they contradict. If neither is the case, respond with 'Neutral.' Provide your answers line by line.

1. Premise: A man dressed all in white throws the first pitch at a baseball game .
Hypothesis: A Senator starts a baseball game .

2. Premise: A woman with dark hair a

In [26]:
dev["preds"] = dev.prompt.apply(get_completion)
dev

Unnamed: 0,taskIndex,prompt,answer,targetLabel,task,#shot,CoT,taskSize,preds
0,1,"Given the following premise and hypothesis, determine the inference relation between them. Respond with 'Entailment' if the hypothesis logically f...",Contradiction,,single_clf,0,False,1,Neutral
1,2,"Given the following premise and hypothesis, determine the inference relation between them. Respond with 'Entailment' if the hypothesis logically f...",Entailment,,single_clf,0,False,1,Entailment
2,1,"Given the following 3 pairs of premises and hypotheses, determine the inference relation for each pair line by line. Respond with 'Entailment' if ...","[Neutral, Neutral, Neutral]",,batch_clf,0,False,3,1. Neutral\n2. Neutral\n3. Neutral
3,2,"Given the following 3 pairs of premises and hypotheses, determine the inference relation for each pair line by line. Respond with 'Entailment' if ...","[Neutral, Neutral, Neutral]",,batch_clf,0,False,3,1. Neutral\n2. Neutral\n3. Contradiction
4,1,"Given the following 5 pairs of premises and hypotheses, determine the inference relation for each pair line by line. Respond with 'Entailment' if ...","[Neutral, Neutral, Neutral, Entailment, Neutral]",,batch_clf,0,False,5,1. Neutral\n2. Neutral\n3. Neutral\n4. Entailment\n5. Neutral
5,2,"Given the following 5 pairs of premises and hypotheses, determine the inference relation for each pair line by line. Respond with 'Entailment' if ...","[Neutral, Neutral, Neutral, Entailment, Entailment]",,batch_clf,0,False,5,1. Neutral\n2. Neutral\n3. Neutral\n4. Entailment\n5. Entailment
6,1,Go over the 3 text pairs below and list the index numbers of the text pairs where the inference relation between the premise and the hypothesis is...,{None},Contradiction,index_selection_one_cat_a_time,0,False,3,None.
7,1,Go over the 3 text pairs below and list the index numbers of the text pairs where the inference relation between the premise and the hypothesis is...,{None},Entailment,index_selection_one_cat_a_time,0,False,3,None.
8,1,Go over the 3 text pairs below and list the index numbers of the text pairs where the inference relation between the premise and the hypothesis is...,{All},Neutral,index_selection_one_cat_a_time,0,False,3,None.
9,2,Go over the 3 text pairs below and list the index numbers of the text pairs where the inference relation between the premise and the hypothesis is...,{None},Contradiction,index_selection_one_cat_a_time,0,False,3,None.


### Adjusted

In [18]:
dev = []
num_instance = 2

taskSizes = [3, 5]
for propmtMode in ["zero-shot"]:
    for task in tasks[-1:]:
        if task == "single_clf":
            dev.append(make_prompts_for_clf(database, task, "dev", propmtMode)[:num_instance])
            continue

        for taskSize in taskSizes:  
            dev.append(make_prompts_for_clf(database, task, "dev", propmtMode, taskSize, attr="relationship", 
                                            label_attr_converter=None, num_instance=num_instance))

dev = pd.concat(dev).reset_index(drop=True)

In [19]:
for p in dev[(dev["taskSize"] <= 5) & (dev["taskIndex"] == 1)].prompt:
    print(p)
    print("-"*50)
    print()

Go over the 3 text pairs below. First, list the index numbers of the text pairs that contain entailment inference relation. Then, select all text pairs that contain contradiction inference relation. Finally, select all text pairs that contain neutral inference relation.
If none of the text pairs satisfy a condition, write 'None.'
If all the text pairs belong satisfy a condition, write 'All.'
Otherwise, provide the index numbers of the text pairs that satisfy each condition.

Output your responses in JSON format with three keys: 'entailment', 'contradiction', and 'neutral'.
A formatted example output is provided below.
{'entailment': [None/All or index numbers of text pairs that contain entailment inference relation], 'contradiction': [None/All or index numbers of text pairs that contain contradiction inference relation], 'neutral': [None/All or index numbers of text pairs that contain neutral inference relation]}

Here are the text pairs:

1. Premise: A man dressed all in white throws 

In [8]:
model = "meta-llama/Llama-3-8b-chat-hf"
dev["preds"] = dev.prompt.apply(lambda p: get_completion(p, model))
dev

Unnamed: 0,taskIndex,prompt,answer,targetLabel,task,#shot,CoT,taskSize,preds
0,1,"Go over the 5 text pairs below. First, list the index numbers of the text pairs that contain entailment inference relation. Then, select all text ...","{'contradiction': {'None'}, 'entailment': {4}, 'neutral': {1, 2, 3, 5}}",,index_selection_all_cat_at_once_adjusted,0,False,5,"After analyzing the text pairs, I found the following results:\n\n{'entailment': [1, 4], 'contradiction': [2], 'neutral': [3, 5]}\n\nHere's a brea..."
1,2,"Go over the 5 text pairs below. First, list the index numbers of the text pairs that contain entailment inference relation. Then, select all text ...","{'contradiction': {'None'}, 'entailment': {4, 5}, 'neutral': {1, 2, 3}}",,index_selection_all_cat_at_once_adjusted,0,False,5,"After analyzing the text pairs, I found the following results:\n\n{'entailment': [2, 3], 'contradiction': None, 'neutral': [1, 4, 5]}\n\nHere's a ..."
2,1,"Go over the 10 text pairs below. First, list the index numbers of the text pairs that contain entailment inference relation. Then, select all text...","{'contradiction': {9, 10}, 'entailment': {8, 4, 7}, 'neutral': {1, 2, 3, 5, 6}}",,index_selection_all_cat_at_once_adjusted,0,False,10,"After analyzing the text pairs, I found the following results:\n\n{\n""entailment"": [1, 4, 7], \n""contradiction"": [2, 6, 9], \n""neutral"": [3, 5, 8,..."
3,2,"Go over the 10 text pairs below. First, list the index numbers of the text pairs that contain entailment inference relation. Then, select all text...","{'contradiction': {9}, 'entailment': {4, 5, 7}, 'neutral': {1, 2, 3, 6, 8, 10}}",,index_selection_all_cat_at_once_adjusted,0,False,10,"After analyzing the text pairs, I found the following results:\n\n{'entailment': [2, 3, 5, 6, 7, 8, 10], 'contradiction': [1, 9], 'neutral': [4]}\..."


### Make prompts

In [9]:
database = read_obj_from_pickle(f"data/databases/text classification/{clf_task}.pkl")

Read object from data/databases/text classification/SNLI.pkl


In [29]:
# out = []
# num_instance = 100
# taskSizes = [3, 5, 10, 20, 50]
# for propmtMode in ["zero-shot"]:
#     for task in tasks:

#         if task == "single_clf":
#             out.append(make_prompts_for_clf(database, task, "test", propmtMode))
#             continue

#         for taskSize in taskSizes:
#             out.append(make_prompts_for_clf(database, task, "test", propmtMode, taskSize, attr="relationship", 
#                                             label_attr_converter=None, num_instance=num_instance))

# out = pd.concat(out)
# out.reset_index(drop=True, inplace=True)

In [34]:
# os.makedirs("results/text classification/", exist_ok=True)
# out.to_json(f"results/text classification/{clf_task}.json", orient="records", lines=True)

In [13]:
fp = f"results/text classification/{clf_task}.json"
out = [pd.read_json(fp, lines=True)]

num_instance = 100
taskSizes = [3, 5, 10, 20, 50]
for propmtMode in ["zero-shot"]:
    for taskSize in taskSizes:
        out.append(make_prompts_for_clf(database, tasks[-1], "test", propmtMode, taskSize, attr="relationship", 
                                        label_attr_converter=None, num_instance=num_instance))

out = pd.concat(out)
out.reset_index(drop=True, inplace=True)

os.makedirs("results/text classification/", exist_ok=True)
out.to_json(f"results/text classification/{clf_task}.json", orient="records", lines=True)                                             

In [14]:
out.task.value_counts()

index_selection_one_cat_a_time              1500
index_selection_one_cat_a_time_json         1500
single_clf                                  1000
batch_clf                                    500
index_selection_all_cat_at_once              500
index_selection_all_cat_at_once_adjusted     500
Name: task, dtype: int64

In [15]:
out.prompt.apply(get_num_of_tokens).describe()

count    5500.000000
mean      614.437818
std       581.198469
min        71.000000
25%       229.000000
50%       376.000000
75%       807.000000
max      2008.000000
Name: prompt, dtype: float64

In [16]:
sub = out.copy()[(out.taskIndex == 1) & (out.taskSize <= 3)]
sub

Unnamed: 0,taskIndex,prompt,answer,targetLabel,task,#shot,CoT,taskSize,gpt-3.5-turbo-0125-completion,meta-llama/Llama-3-70b-chat-hf-completion,meta-llama/Llama-3-8b-chat-hf-completion,mistralai/Mixtral-8x7B-Instruct-v0.1-completion,gpt-4-turbo-2024-04-09-completion,lmsys/vicuna-13b-v1.5-completion,mistralai/Mistral-7B-Instruct-v0.2-completion
0,1,"Given the following premise and hypothesis, determine the inference relation between them. Respond with 'Entailment' if the hypothesis logically f...",Neutral,,single_clf,0,False,1,Entailment,Inference relation: Entailment,"Entailment\n\nThe premise sets the scene for a dramatic and intense situation, where the matador is being attacked by a raging bull. The hypothesi...",Neutral. The hypothesis is not directly entailed or contradicted by the premise. The premise does not provide information about the bull's reason...,"Inference relation: Neutral\n\nExplanation: The premise describes a scene where a matador is getting hit by a raging bull, specifying the attire o...",Entailment,"Neutral\n\nThe premise describes an action taking place between a matador and a bull, while the hypothesis makes a judgment about the justifiabil..."
1000,1,"Given the following 3 pairs of premises and hypotheses, determine the inference relation for each pair line by line. Respond with 'Entailment' if ...","[Contradiction, Contradiction, Contradiction]",,batch_clf,0,False,3,1. Neutral\n2. Neutral\n3. Contradiction,Here are the inference relations for each pair:\n\n1. Premise: This church choir sings to the masses as they sing joyous songs from the book at a ...,Here are the inference relations for each pair:\n\n1. Premise: This church choir sings to the masses as they sing joyous songs from the book at a ...,1. Neutral - The hypothesis of a choir singing at a baseball game is not directly related to the premise about a church choir singing joyous song...,"1. Neutral - The premise describes a choir singing at a church, while the hypothesis mentions a choir singing at a baseball game. These are differ...",1. Entailment\n2. Contradiction\n3. Contradiction,"1. Neutral: The premise is about a church choir singing to the masses, while the hypothesis is about a choir singing at a baseball game. These tw..."
1500,1,Go over the 3 text pairs below and list the index numbers of the text pairs where the inference relation between the premise and the hypothesis is...,[All],Contradiction,index_selection_one_cat_a_time,0,False,3,None.,2,"After analyzing the text pairs, I found that:\n\n* Text pair 1: The premise describes a church choir singing at a church, while the hypothesis des...",1\n3,1\n2,None.,1. Index number: 1.\n\nExplanation: The premise describes a church choir singing joyous songs to the masses at a church. The hypothesis describes...
1501,1,Go over the 3 text pairs below and list the index numbers of the text pairs where the inference relation between the premise and the hypothesis is...,[None],Entailment,index_selection_one_cat_a_time,0,False,3,None.,None.,"After analyzing the text pairs, I found that:\n\n* Text pair 1 does not contain an entailment inference relation. The premise describes a church c...","None.\n\nNone of the text pairs have an entailment relationship between the premise and the hypothesis. In each case, the hypothesis goes beyond ...",,None.,"None.\n\nExplanation:\n\n1. The premise describes a choir singing at a church, while the hypothesis describes a choir singing at a baseball game...."
1502,1,Go over the 3 text pairs below and list the index numbers of the text pairs where the inference relation between the premise and the hypothesis is...,[None],Neutral,index_selection_one_cat_a_time,0,False,3,None.,"2.\n\nThe inference relation between the premise and the hypothesis is neutral in text pair 2, as the premise describes a woman preparing for dinn...","After analyzing the text pairs, I found that the inference relation between the premise and the hypothesis is neutral in the following index numbe...","None.\n\nIn the first text pair, the premise describes the church choir singing in a church, while the hypothesis describes a choir singing at a ...",1\n2\n3,None.,"None.\n\nThe inference relations in all three text pairs are not neutral. In the first text pair, there is no inference relation as the premise a..."
3000,1,"Go over the 3 text pairs below. First, list the index numbers of the text pairs that contain entailment inference relation. Then, select all text ...","{'contradiction': ['All'], 'entailment': ['None'], 'neutral': ['None']}",,index_selection_all_cat_at_once,0,False,3,"{\n ""entailment"": [\n 3\n ],\n ""contradiction"": [\n 1,\n 2\n ],\n ""neutral"": [\n None\n ]\n}","Here is the output in JSON format:\n\n{'entailment': [], 'contradiction': [2], 'neutral': [1, 3]}\n\nExplanation:\n\n* There are no text pairs tha...","Here is the output in JSON format:\n\n{\n""entailment"": [1],\n""contradiction"": [2],\n""neutral"": [3]\n}\n\nExplanation:\n\n* Index 1 contains entail...","{\n'entailment': [None],\n'contradiction': [1, 2],\n'neutral': [3]\n}","```json\n{\n ""entailment"": ""None"",\n ""contradiction"": [1, 2, 3],\n ""neutral"": ""None""\n}\n```","{\n""entailment"": [\n""1"",\n""3""\n],\n""contradiction"": [\n""2""\n],\n""neutral"": [\n""2""\n]}","{'entailment': [None], 'contradiction': [1, 2], 'neutral': [3]}\n\nExplanation:\n\n1. The hypothesis ""A choir singing at a baseball game"" is not ..."
3500,1,Go over the 3 text pairs below and list the index numbers of the text pairs where the inference relation between the premise and the hypothesis is...,[All],Contradiction,index_selection_one_cat_a_time_json,0,False,3,"{'contradiction': [1, 2, 3]}",,"Here is the output in JSON format:\n\n{\n""contradiction"": [1]\n}\n\nExplanation:\n\n* Text pair 1: The premise describes a church choir singing at...",,,,
3501,1,Go over the 3 text pairs below and list the index numbers of the text pairs where the inference relation between the premise and the hypothesis is...,[None],Entailment,index_selection_one_cat_a_time_json,0,False,3,{'entailment': [None]},,"Here is the output in JSON format:\n\n{\n""entailment"": [1]\n}\n\nExplanation:\n\n* Text pair 1: The premise describes a church choir singing at a ...",,,,
3502,1,Go over the 3 text pairs below and list the index numbers of the text pairs where the inference relation between the premise and the hypothesis is...,[None],Neutral,index_selection_one_cat_a_time_json,0,False,3,{'neutral': [None]},,"After analyzing the text pairs, I found that none of them contain a neutral inference relation. Therefore, the output is:\n\n{'neutral': 'None'}\n...",,,,
5000,1,"Go over the 3 text pairs below. First, list the index numbers of the text pairs that contain entailment inference relation. Then, select all text ...","{'contradiction': {'All'}, 'entailment': {'None'}, 'neutral': {'None'}}",,index_selection_all_cat_at_once_adjusted,0,False,3,,,,,,,


In [17]:
# out["lmsys/vicuna-13b-v1.5-completion"].str.contains("TOO_").sum()

0