In [1]:
import os
import pandas as pd

os.chdir("../../")
from scripts.llm import get_completion
from scripts.utils import save_obj_as_pickle, read_obj_from_pickle
from scripts.data import make_database, make_prompts_for_clf

pd.set_option("display.max_colwidth", 150)

In [2]:
# both sets have 10,000 text pairs, which is too large. 
def loadData(fp, sample_size=1000):
    df = pd.read_table(fp, header=1, names=["label", "premise", "hypothesis"])
    df = df[df.label != "-"]
    df = df.sample(sample_size, random_state=234)
    df.reset_index(drop=True, inplace=True)
    df.label = df.label.apply(str.capitalize)
    df["text"] = "Premise: " + df.premise + "\nHypothesis: " + df.hypothesis + "\n"
    df = df[['label', 'text']]
    return df

clf_task = "SNLI"
test_fp = f"data/raw/text classification/{clf_task}/test.txt"
test_df = loadData(test_fp)

dev_fp = f"data/raw/text classification/{clf_task}/dev.txt"
dev_df = loadData(dev_fp)

test_df.head()

Unnamed: 0,label,text
0,Neutral,Premise: a matador in white and gold clothing and pink sock is getting hit by a raging bull\nHypothesis: The angry bull is attacking the matador a...
1,Entailment,"Premise: A man wearing a black sweater and a knit cap sits in front of mountain scenery , with a cloudy sky overhead .\nHypothesis: A man is outdo..."
2,Entailment,Premise: Two men are making hand gestures and posing for a picture .\nHypothesis: Two men are ready to have their picture taken .\n
3,Entailment,Premise: Workers are removing ice from a walkway .\nHypothesis: Workers outside on a slippery walkway\n
4,Entailment,Premise: A man riding a motorcycle on a dirt road passing several junk or abandoned cars on the side of the road .\nHypothesis: A man is riding a ...


### Database

Sources for the prompt data

In [3]:
# num_instance: Number of instances to compose multi-problem prompts. Each instance contains multiple problems
# max_instance_size: Maximum number of problems sampled from the benchmark dataset to compose an instance 
# dev_df can be used for purposes such as testing the prompts or for generating exemplars 
database = make_database(test_df, dev_df, num_instance=100, max_instance_size=100)
database.keys()

dict_keys(['num_instance', 'max_instance_size', 'labels', 'testData', 'testInstances', 'devData', 'devInstances'])

#### 0-shot

In [4]:
database["promptTemplates"] = dict()
database["promptTemplates"]["0-shot"] = dict()


SingleClf = "Given the following premise and hypothesis, determine the inference relation between them. " \
            "Respond with 'Entailment' if the hypothesis logically follows from the premise, " \
            "'Contradiction' if they are in direct opposition, and 'Neutral' if neither applies. \n\n" \
            "$text\nInference relation:"

BatchClf = "Given the following $num pairs of premises and hypotheses, determine the inference relation for each pair line by line. " \
           "Respond with 'Entailment' if the hypothesis entails the premise, and 'Contradiction' if they contradict. " \
           "If neither is the case, respond with 'Neutral.' Provide your answers line by line.\n\n" \
           "$texts\nInference relations for the $num text pairs provided above:\n"

SelectOne = "Go over the $num text pairs below and list the index numbers of the text pairs where " \
            "the inference relation between the premise and the hypothesis is $relationship according to the following instructions:\n" \
            "If none of the text pairs contain $relationship inference relation, write 'None.'\n" \
            "If all text pairs contain $relationship inference relation, write 'All.'\n" \
            "Otherwise, provide the index numbers of the text pairs where the inference relation between the premise and the hypothesis is $relationship.\n\n" \
            "Output your responses in JSON format with the key '$relationship'.\nA formatted example output is provided below.\n" \
            "{'$relationship': [None/All or index numbers of text pairs that contain $relationship inference relation]}\n\n" \
            "Here are the text pairs:\n\n$texts\nJSON output:\n"

SelectAll =  "Go over the $num text pairs below. " \
             "First, list the index numbers of the text pairs that contain entailment inference relation. " \
             "Then, select all text pairs that contain contradiction inference relation. " \
             "Finally, select all text pairs that contain neutral inference relation.\n" \
             "If none of the text pairs satisfy a condition, write 'None.'\n" \
             "If all the text pairs belong satisfy a condition, write 'All.'\n" \
             "Otherwise, provide the index numbers of the text pairs that satisfy each condition.\n\n" \
             "Output your responses in JSON format with three keys: 'entailment', 'contradiction', and 'neutral'." \
             "\nA formatted example output is provided below.\n" \
             "{'entailment': [None/All or index numbers of text pairs that contain entailment inference relation], " \
             "'contradiction': [None/All or index numbers of text pairs that contain contradiction inference relation], " \
             "'neutral': [None/All or index numbers of text pairs that contain neutral inference relation]}\n\n" \
             "Here are the text pairs:\n\n$texts\n" \
             "JSON output:\n"
                                            
tasks = ["SingleClf", "BatchClf", "SelectOne", "SelectAll"]
promptTemplates = [SingleClf, BatchClf, SelectOne, SelectAll]

for task, tmp in zip(tasks, promptTemplates):
    database["promptTemplates"]["0-shot"][task] = tmp

In [5]:
os.makedirs("data/databases/text classification/", exist_ok=True)
save_obj_as_pickle(database, f"data/databases/text classification/{clf_task}.pkl")

Saved object to data/databases/text classification/SNLI.pkl


#### Test Prompts

- The main purpose is to check if LLMs can output the desired formats given the prompts 

In [6]:
dev = []
num_instance = 2
taskSizes = [3, 5]
for propmtMode in ["0-shot"]:
    for task in tasks:
        if task == "SingleClf":
            dev.append(make_prompts_for_clf(database, task, "dev", propmtMode)[:num_instance])
            continue

        for taskSize in taskSizes:  
            dev.append(make_prompts_for_clf(database, task, "dev", propmtMode, taskSize, attr="relationship", 
                                            label_attr_converter=None, num_instance=num_instance))

dev = pd.concat(dev).reset_index(drop=True)


In [7]:
for p in dev[(dev["taskSize"] <= 3) & (dev["taskIndex"] == 1)].prompt:
    print(p)
    print("-"*50)
    print()

Given the following premise and hypothesis, determine the inference relation between them. Respond with 'Entailment' if the hypothesis logically follows from the premise, 'Contradiction' if they are in direct opposition, and 'Neutral' if neither applies. 

Premise: A man wearing blue jeans and a black shirt is talking to a man wearing white shorts while they are sitting outside in folding chairs on a patio .
Hypothesis: the men are inside the church

Inference relation:
--------------------------------------------------

Given the following 3 pairs of premises and hypotheses, determine the inference relation for each pair line by line. Respond with 'Entailment' if the hypothesis entails the premise, and 'Contradiction' if they contradict. If neither is the case, respond with 'Neutral.' Provide your answers line by line.

1. Premise: A young man wearing a gray shirt standing outside a wooden structure smiling and making a fist with his right hand .
Hypothesis: A young man is standing ou

In [8]:
dev["preds"] = dev.prompt.apply(get_completion)
dev

Unnamed: 0,taskIndex,prompt,answer,targetLabel,task,#shot,CoT,taskSize,preds
0,1,"Given the following premise and hypothesis, determine the inference relation between them. Respond with 'Entailment' if the hypothesis logically f...",Contradiction,,SingleClf,0,False,1,Neutral
1,2,"Given the following premise and hypothesis, determine the inference relation between them. Respond with 'Entailment' if the hypothesis logically f...",Entailment,,SingleClf,0,False,1,Entailment
2,1,"Given the following 3 pairs of premises and hypotheses, determine the inference relation for each pair line by line. Respond with 'Entailment' if ...","[Entailment, Entailment, Neutral]",,BatchClf,0,False,3,1. Neutral\n2. Neutral\n3. Contradiction
3,2,"Given the following 3 pairs of premises and hypotheses, determine the inference relation for each pair line by line. Respond with 'Entailment' if ...","[Neutral, Contradiction, Contradiction]",,BatchClf,0,False,3,1. Neutral\n2. Neutral\n3. Contradiction
4,1,"Given the following 5 pairs of premises and hypotheses, determine the inference relation for each pair line by line. Respond with 'Entailment' if ...","[Entailment, Entailment, Neutral, Neutral, Contradiction]",,BatchClf,0,False,5,1. Neutral\n2. Neutral\n3. Neutral\n4. Neutral\n5. Contradiction
5,2,"Given the following 5 pairs of premises and hypotheses, determine the inference relation for each pair line by line. Respond with 'Entailment' if ...","[Neutral, Contradiction, Contradiction, Neutral, Entailment]",,BatchClf,0,False,5,1. Neutral\n2. Neutral\n3. Contradiction\n4. Neutral\n5. Entailment
6,1,Go over the 3 text pairs below and list the index numbers of the text pairs where the inference relation between the premise and the hypothesis is...,{None},Contradiction,SelectOne,0,False,3,"{'contradiction': [1, 2]}"
7,1,Go over the 3 text pairs below and list the index numbers of the text pairs where the inference relation between the premise and the hypothesis is...,"{1, 2}",Entailment,SelectOne,0,False,3,{'entailment': [1]}
8,1,Go over the 3 text pairs below and list the index numbers of the text pairs where the inference relation between the premise and the hypothesis is...,{3},Neutral,SelectOne,0,False,3,{'neutral': [None]}
9,2,Go over the 3 text pairs below and list the index numbers of the text pairs where the inference relation between the premise and the hypothesis is...,"{2, 3}",Contradiction,SelectOne,0,False,3,"{'contradiction': [1, 3]}"


### Make prompts

In [9]:
database = read_obj_from_pickle(f"data/databases/text classification/{clf_task}.pkl")

Read object from data/databases/text classification/SNLI.pkl


In [10]:
out = []
num_instance = 100
taskSizes = [3, 5, 10, 20, 50]
for propmtMode in ["0-shot"]:
    for task in tasks:

        if task == "SingleClf":
            out.append(make_prompts_for_clf(database, task, "test", propmtMode))
            continue

        for taskSize in taskSizes:
            out.append(make_prompts_for_clf(database, task, "test", propmtMode, taskSize, attr="relationship", 
                                            label_attr_converter=None, num_instance=num_instance))

out = pd.concat(out)
out.reset_index(drop=True, inplace=True)


os.makedirs("results/text classification/", exist_ok=True)
out.to_json(f"results/text classification/{clf_task}.json", orient="records", lines=True)

In [11]:
out.task.value_counts()

SelectOne    1500
SingleClf    1000
BatchClf      500
SelectAll     500
Name: task, dtype: int64

In [12]:
out.copy()[(out.taskIndex == 1) & (out.taskSize <= 3)]

Unnamed: 0,taskIndex,prompt,answer,targetLabel,task,#shot,CoT,taskSize
0,1,"Given the following premise and hypothesis, determine the inference relation between them. Respond with 'Entailment' if the hypothesis logically f...",Neutral,,SingleClf,0,False,1
1000,1,"Given the following 3 pairs of premises and hypotheses, determine the inference relation for each pair line by line. Respond with 'Entailment' if ...","[Neutral, Neutral, Neutral]",,BatchClf,0,False,3
1500,1,Go over the 3 text pairs below and list the index numbers of the text pairs where the inference relation between the premise and the hypothesis is...,{None},Contradiction,SelectOne,0,False,3
1501,1,Go over the 3 text pairs below and list the index numbers of the text pairs where the inference relation between the premise and the hypothesis is...,{None},Entailment,SelectOne,0,False,3
1502,1,Go over the 3 text pairs below and list the index numbers of the text pairs where the inference relation between the premise and the hypothesis is...,{All},Neutral,SelectOne,0,False,3
3000,1,"Go over the 3 text pairs below. First, list the index numbers of the text pairs that contain entailment inference relation. Then, select all text ...","{'contradiction': {'None'}, 'entailment': {'None'}, 'neutral': {'All'}}",,SelectAll,0,False,3
