In [1]:
import os
import pandas as pd

os.chdir("../../../")

import numpy as np
from scripts.utils import save_obj_as_pickle, read_obj_from_pickle
from scripts.data import make_prompts_for_clf, get_indexed_texts
from scripts.parsing import read_benchmark_results

pd.set_option("display.max_colwidth", 150)

### SST-2

In [2]:
clf_task = "SST-2"
data = read_benchmark_results(clf_task)
database = read_obj_from_pickle(f"data/databases/text classification/{clf_task}.pkl")

Read object from data/databases/text classification/SST-2.pkl


In [3]:
SelectOne = "Go over the $num lines of text below and list the index numbers of the lines with $polarity sentiment according to the following instructions:\n" \
            "If none of the texts show $polarity sentiment, write 'None.'\n" \
            "If all the texts show $polarity sentiment, write 'All.'\n" \
            "Otherwise, provide the index numbers for each text with $polarity sentiment.\n\n" \
            "Output your responses in JSON format with the key '$polarity'.\nA formatted example output is provided below.\n" \
            "{'$polarity': [None/All or index numbers for the texts with $polarity sentiment]}\n\n" \
            "Texts, one per line:\n\n$texts\n\n" \
            "JSON output:\n"

SelectAll = "Go over the $num lines of text below. First, list the index numbers of the lines with positive sentiment. " \
            "Then, list the index numbers of the lines with negative sentiment.\n" \
            "If none of the texts show a particular sentiment, write 'None.'\n" \
            "If all the texts show a particular sentiment, write 'All.'\n" \
            "Otherwise, provide the index numbers of the texts that fit a particular category.\n\n" \
            "Output your responses in JSON format with two keys: 'positive' and 'negative.'\nA formatted example output is provided below.\n" \
            "{'positive': [None/All or index numbers of positive sentences], 'negative': [None/All or index numbers of negative sentences]}\n\n" \
            "Texts, one per line:\n\n$texts\n\n" \
            "JSON output:\n"

In [4]:
def get_10_examples_and_answers(database, polarity="positive", top_k=10):
    dev = database["devData"]
    polarity = polarity.capitalize()
    texts = get_indexed_texts(dev["texts"][:top_k])
    labels = dev["labels"][:top_k]
    answers = (np.where(dev["labels"][:10] == polarity)[0] + 1).tolist()
    return texts, get_indexed_texts(labels), ", ".join(map(str, answers))

def make_SelectOne_exemplar(polarity="positive"):
    num = "10"
    texts, labels, answers = get_10_examples_and_answers(database, polarity)
    prompt = SelectOne.replace("$num", num).replace("$polarity", polarity)
    prompt = prompt.replace("$texts", texts)

    prompt += f"To solve this task, let's first classify the {num} lines of text above, one per line:\n\n" \
              f"{labels}\n\nFrom here, we see that lines $answers show positive sentiment. " \
              f"Therefore, the answer in JSON format is as follows:\n" \
              "{'$polarity': [$answers]}".replace("$answers", answers).replace("$polarity", polarity)
    return prompt


def make_SelectAll_exemplar():
    num = "10"
    texts, labels, pos_answers = get_10_examples_and_answers(database, "positive")
    _, _, neg_answers = get_10_examples_and_answers(database, "negative")

    prompt = SelectAll.replace("$num", num)
    prompt = prompt.replace("$texts", texts)

    prompt += f"To solve this task, let's first classify the {num} lines of text above, one per line:\n\n" \
              f"{labels}\n\nFrom here, we see that lines $posAnswers show positive sentiment, and lines $negAnswers show negative sentiment. " \
              f"Therefore, the answer in JSON format is as follows:\n" \
              "{'positive': [$posAnswers], 'negative': [$negAnswers]}".replace("$posAnswers", pos_answers).replace("$negAnswers", neg_answers)
    return prompt

In [5]:
selectOneExemplar = make_SelectOne_exemplar()
selectAllExemplar = make_SelectAll_exemplar()
SelectOne_CoT = selectOneExemplar + "\n\n" + SelectOne
SelectAll_CoT = selectAllExemplar + "\n\n" + SelectAll

In [6]:
database["promptTemplates"] = dict()
database["promptTemplates"]["1-shot-CoT"] = dict()

tasks = ["SelectOne", "SelectAll"]
promptTemplates = [SelectOne_CoT, SelectAll_CoT]

for task, tmp in zip(tasks, promptTemplates):
    database["promptTemplates"]["1-shot-CoT"][task] = tmp

save_obj_as_pickle(database, f"data/databases/text classification/{clf_task}.pkl")

Saved object to data/databases/text classification/SST-2.pkl


In [7]:
out = []
num_instance = 100
taskSize = 10
promptMode = "1-shot-CoT"
for task in tasks:

    out.append(make_prompts_for_clf(database, task, "test", promptMode, taskSize, attr="polarity", 
                                    label_attr_converter=None, num_instance=num_instance))

out = pd.concat(out)
out.reset_index(drop=True, inplace=True)

In [8]:
for ix in out[out.taskIndex == 1].index:
    print(out.at[ix, "prompt"])
    print("\n" + "*" * 100 + "\n")

Go over the 10 lines of text below and list the index numbers of the lines with positive sentiment according to the following instructions:
If none of the texts show positive sentiment, write 'None.'
If all the texts show positive sentiment, write 'All.'
Otherwise, provide the index numbers for each text with positive sentiment.

Output your responses in JSON format with the key 'positive'.
A formatted example output is provided below.
{'positive': [None/All or index numbers for the texts with positive sentiment]}

Texts, one per line:

1. one long string of cliches .
2. if you 've ever entertained the notion of doing what the title of this film implies , what sex with strangers actually shows may put you off the idea forever .
3. k-19 exploits our substantial collective fear of nuclear holocaust to generate cheap hollywood tension .
4. it 's played in the most straight-faced fashion , with little humor to lighten things up .
5. there is a fabric of complex ideas here , and feelings th

In [9]:
data_ = pd.concat([data, out], axis=0).reset_index(drop=True)
data_.to_json(f"results/text classification/{clf_task}.json", orient="records", lines=True)                                             

### CoLA

In [10]:
SelectOne = "Go over the $num lines of text below and list the index numbers of the lines that are grammatically $acceptability according to the following instructions:\n" \
            "If none of the texts are grammatically $acceptability, write 'None.'\n" \
            "If all the texts are grammatically $acceptability, write 'All.'\n" \
            "Otherwise, provide the index numbers for each grammatically $acceptability text.\n\n" \
            "Output your responses in JSON format with the key '$acceptability'.\nA formatted example output is provided below.\n" \
            "{'$acceptability': [None/All or index numbers of $acceptability sentences]}\n\n" \
            "Texts, one per line:\n\n$texts\n\n" \
            "JSON output:\n"

SelectAll = "Go over the $num lines of text below. First, list the index numbers of the lines that are grammatically acceptable. " \
            "Then, list the index numbers of the lines that are grammatically unacceptable.\n" \
            "If none of the sentences show a particular acceptability, write 'None.'\n" \
            "If all the sentences show a particular acceptability, write 'All.'\n" \
            "Otherwise, provide the index numbers of the texts that fit a particular category.\n" \
            "Output your responses in JSON format with two keys 'acceptable' and 'unacceptable.'\nA formatted example output is provided below. \n" \
            "{'acceptable': [None/All or index numbers of acceptable texts], 'unacceptable': [None/All or index numbers of unacceptable texts]}" \
            "\n\nTexts, one per line:\n\n$texts\n\n" \
            "JSON output:\n" 

In [11]:
clf_task = "CoLA"
data = read_benchmark_results(clf_task)
database = read_obj_from_pickle(f"data/databases/text classification/{clf_task}.pkl")

Read object from data/databases/text classification/CoLA.pkl


In [12]:
def get_10_examples_and_answers(database, attr="unacceptable", top_k=10):
    dev = database["devData"]
    attr = attr.capitalize()
    texts = get_indexed_texts(dev["texts"][-top_k:])
    labels = dev["labels"][-top_k:]
    answers = (np.where(labels == attr)[0] + 1).tolist()
    return texts, get_indexed_texts(labels), ", ".join(map(str, answers))

def make_SelectOne_exemplar(attr="unacceptable"):
    num = "10"
    texts, labels, answers = get_10_examples_and_answers(database, attr)
    prompt = SelectOne.replace("$num", num).replace("$acceptability", attr)
    prompt = prompt.replace("$texts", texts)

    prompt += f"To solve this task, let's first classify the {num} lines of text above, one per line:\n\n" \
              f"{labels}\n\nFrom here, we see that texts in lines $answers are unacceptable. " \
              f"Therefore, the answer in JSON format is as follows:\n" \
              "{'$acceptability': [$answers]}".replace("$answers", answers).replace("$acceptability", attr)
    return prompt


def make_SelectAll_exemplar():
    num = "10"
    texts, labels, pos_answers = get_10_examples_and_answers(database, "acceptable")
    _, _, neg_answers = get_10_examples_and_answers(database, "unacceptable")

    prompt = SelectAll.replace("$num", num)
    prompt = prompt.replace("$texts", texts)

    prompt += f"To solve this task, let's first classify the {num} lines of text above, one per line:\n\n" \
              f"{labels}\n\nFrom here, we see that texts in lines $posAnswers are acceptable, and texts in lines $negAnswers are unacceptable. " \
              f"Therefore, the answer in JSON format is as follows:\n" \
              "{'acceptable': [$posAnswers], 'unacceptable': [$negAnswers]}".replace("$posAnswers", pos_answers).replace("$negAnswers", neg_answers)
    return prompt

In [13]:
selectOneExemplar = make_SelectOne_exemplar()
selectAllExemplar = make_SelectAll_exemplar()
SelectOne_CoT = selectOneExemplar + "\n\n" + SelectOne
SelectAll_CoT = selectAllExemplar + "\n\n" + SelectAll

In [14]:
database["promptTemplates"] = dict()
database["promptTemplates"]["1-shot-CoT"] = dict()

tasks = ["SelectOne", "SelectAll"]
promptTemplates = [SelectOne_CoT, SelectAll_CoT]

for task, tmp in zip(tasks, promptTemplates):
    database["promptTemplates"]["1-shot-CoT"][task] = tmp

save_obj_as_pickle(database, f"data/databases/text classification/{clf_task}.pkl")

Saved object to data/databases/text classification/CoLA.pkl


In [15]:
out = []
num_instance = 100
taskSize = 10
promptMode = "1-shot-CoT"
for task in tasks:

    out.append(make_prompts_for_clf(database, task, "test", promptMode, taskSize, attr="acceptability", 
                                    label_attr_converter=None, num_instance=num_instance))

out = pd.concat(out)
out.reset_index(drop=True, inplace=True)

In [16]:
for ix in out[out.taskIndex == 1].index:
    print(out.at[ix, "prompt"])
    print("\n" + "*" * 100 + "\n")

Go over the 10 lines of text below and list the index numbers of the lines that are grammatically unacceptable according to the following instructions:
If none of the texts are grammatically unacceptable, write 'None.'
If all the texts are grammatically unacceptable, write 'All.'
Otherwise, provide the index numbers for each grammatically unacceptable text.

Output your responses in JSON format with the key 'unacceptable'.
A formatted example output is provided below.
{'unacceptable': [None/All or index numbers of unacceptable sentences]}

Texts, one per line:

1. Where has he put the cake?
2. Jason persuaded Medea to desert her family
3. Gilgamesh perhaps should be leaving.
4. Gilgamesh hasn't kissed Ishtar.
5. Anson thought that himself was going to the club.
6. Poseidon appears to own a dragon
7. Digitize is my happiest memory
8. It is easy to slay the Gorgon.
9. I had the strangest feeling that I knew you.
10. What all did you get for Christmas?

JSON output:
To solve this task, le

In [17]:
data_ = pd.concat([data, out], axis=0).reset_index(drop=True)
data_.to_json(f"results/text classification/{clf_task}.json", orient="records", lines=True)                                             

### AGNews

In [18]:
SelectOne = "This is a news classification task in which each line of text belongs to one of four categories 'Business,' 'Sports,' 'World,' and 'Sci/Tech.'\n\n" \
            "Go over the $num lines of text below and list the index numbers of the lines that can be classified as $category according to the following instructions:\n" \
            "If none of the texts can be classified as $category, write 'None.'\n" \
            "If all the texts can be classified as $category, write 'All.'\n" \
            "Otherwise, provide the index numbers of the texts that can be classified as $category.\n\n" \
            "Output your responses in JSON format with the key '$category'.\nA formatted example output is provided below.\n" \
            "{'$category': [None/All or index numbers of the texts that can be classified as $category]}\n\n" \
            "Texts, one per line:\n\n$texts\n\n" \
            "JSON output:\n"

SelectAll = "This is a news classification task in which each line of text belongs to one of four categories 'Business,' 'Sports,' 'World,' and 'Sci/Tech.'\n\n" \
            "Go over the $num lines of text below and list the index numbers of the lines that belong to each category according to the following instructions:\n" \
            "If none of the texts can be classified as a particular category, write 'None.'\n" \
            "If all the texts can be classified as a particular category, write 'All.'\n" \
            "Otherwise, provide the index numbers of the texts that can be classified as the category.\n" \
            "Output your responses in JSON format with the following keys: 'business,' 'sports,' 'world,' and 'sci/tech.'\n" \
            "A formatted example output is provided below.\n" \
            "{'business': [None/All or index numbers of texts in 'business' category], 'sports': [None/All or index numbers of texts in 'sports' category], " \
            "'world': [None/All or index numbers of texts in 'world' category], 'sci/tech': [None/All or index numbers of texts in sci/tech category]}\n\n" \
            "Texts, one per line:\n\n$texts\n\n" \
            "JSON output:\n" 

In [19]:
clf_task = "AGNews"
data = read_benchmark_results(clf_task)
database = read_obj_from_pickle(f"data/databases/text classification/{clf_task}.pkl")

Read object from data/databases/text classification/AGNews.pkl


In [20]:
def get_10_examples_and_answers(database, attr="sports", top_k=10):
    dev = database["devData"]
    texts = get_indexed_texts(dev["texts"][:top_k])
    labels = dev["labels"][:top_k]
    answers = (np.where(labels == attr)[0] + 1).tolist()
    return texts, get_indexed_texts(labels), ", ".join(map(str, answers))

def make_SelectOne_exemplar(attr="Sports"):
    num = "10"
    texts, labels, answers = get_10_examples_and_answers(database, attr)
    prompt = SelectOne.replace("$num", num).replace("$category", attr)
    prompt = prompt.replace("$texts", texts)

    prompt += f"To solve this task, let's first classify the {num} lines of text above, one per line:\n\n" \
              f"{labels}\n\nFrom here, we see that texts in lines $answers can be classified as 'Sports.' " \
              f"Therefore, the answer in JSON format is as follows:\n" \
              "{'sports': [$answers]}".replace("$answers", answers).replace("$category", attr)
    return prompt

def make_SelectAll_exemplar():
    num = "10"
    texts, labels, cat1_answers = get_10_examples_and_answers(database, "Business")
    _, _, cat2_answers = get_10_examples_and_answers(database, "Sports")
    _, _, cat3_answers = get_10_examples_and_answers(database, "World")
    _, _, cat4_answers = get_10_examples_and_answers(database, "Sci/Tech")

    prompt = SelectAll.replace("$num", num)
    prompt = prompt.replace("$texts", texts)

    prompt += f"To solve this task, let's first classify the {num} lines of text above, one per line:\n\n" \
              f"{labels}\n\nFrom here, we see that texts in lines $cat1_answers can be classified as 'Business,' " \
              f"texts in lines $cat2_answers can be classified as 'Sports,' texts in lines $cat3_answers can be classified as 'World,' " \
              f"and in lines $cat4_answers can be classified as 'Sci/Tech.' " \
              f"Therefore, the answer in JSON format is as follows:\n" \
              "{'business': [$cat1_answers], 'sports': [$cat2_answers], 'world': [$cat3_answers], 'sci/tech': [$cat4_answers]}"
    prompt = prompt.replace("$cat1_answers", cat1_answers).replace("$cat2_answers", cat2_answers)
    prompt = prompt.replace("$cat3_answers", cat3_answers).replace("$cat4_answers", cat4_answers)
    return prompt

In [21]:
selectOneExemplar = make_SelectOne_exemplar()
selectAllExemplar = make_SelectAll_exemplar()
SelectOne_CoT = selectOneExemplar + "\n\n" + SelectOne
SelectAll_CoT = selectAllExemplar + "\n\n" + SelectAll

In [22]:
database["promptTemplates"] = dict()
database["promptTemplates"]["1-shot-CoT"] = dict()

tasks = ["SelectOne", "SelectAll"]
promptTemplates = [SelectOne_CoT, SelectAll_CoT]

for task, tmp in zip(tasks, promptTemplates):
    database["promptTemplates"]["1-shot-CoT"][task] = tmp

save_obj_as_pickle(database, f"data/databases/text classification/{clf_task}.pkl")

Saved object to data/databases/text classification/AGNews.pkl


In [23]:
out = []
num_instance = 100
taskSize = 10
promptMode = "1-shot-CoT"
for task in tasks:

    out.append(make_prompts_for_clf(database, task, "test", promptMode, taskSize, attr="category", 
                                    label_attr_converter=None, num_instance=num_instance))

out = pd.concat(out)
out.reset_index(drop=True, inplace=True)

In [24]:
for ix in out[out.taskIndex == 1].index:
    print(out.at[ix, "prompt"])
    print("\n" + "*" * 100 + "\n")

This is a news classification task in which each line of text belongs to one of four categories 'Business,' 'Sports,' 'World,' and 'Sci/Tech.'

Go over the 10 lines of text below and list the index numbers of the lines that can be classified as Sports according to the following instructions:
If none of the texts can be classified as Sports, write 'None.'
If all the texts can be classified as Sports, write 'All.'
Otherwise, provide the index numbers of the texts that can be classified as Sports.

Output your responses in JSON format with the key 'Sports'.
A formatted example output is provided below.
{'Sports': [None/All or index numbers of the texts that can be classified as Sports]}

Texts, one per line:

1. The last time they saw each other, John Kerry was sitting in the Red Sox owner #39;s box on the eve of the Democratic National Convention and the front 
2. Frank Arnesen, the Tottenham sporting director, has refuted suggestions that Jacques Santini #39;s surprise resignation was f

In [25]:
data_ = pd.concat([data, out], axis=0).reset_index(drop=True)
data_.to_json(f"results/text classification/{clf_task}.json", orient="records", lines=True)                                             

### MRPC

In [26]:
SelectOne = "Go over the $num text pairs below and list the index numbers of the text pairs where text A $be a paraphrase of text B according to the following instructions:\n" \
            "If none of the text pairs satisfy this condition, write 'None.'\n" \
            "If all the text pairs satisfy this condition, write 'All.'\n" \
            "Otherwise, provide the index numbers of the text pairs where text A $be a paraphrase of text B.\n\n" \
            "Output your responses in JSON format with the key 'answer'.\nA formatted example output is provided below.\n" \
            "{'answer': [None/All or index numbers of the text pairs where text A $be a paraphrase of text B]}\n\n" \
            "Here are the text pairs:\n\n$texts\n" \
            "JSON output:\n"

SelectAll = "Go over the $num text pairs below. First, list the index numbers of the text pairs that contain paraphrases. " \
            "Then, list the index numbers of the text pairs that contain non-paraphrases.\n" \
            "If none of the text pairs satisfy a condition, write 'None.'\n" \
            "If all the text pairs satisfy a condition, write 'All.'\n" \
            "Otherwise, provide the index numbers of the text pairs that satisfy each condition.\n\n" \
            "Output your responses in JSON format with two keys: 'yes' for paraphrases and 'no' for non-paraphrases." \
            "\nA formatted example output is provided below.\n" \
            "{'yes': [None/All or index numbers of text pairs that contain paraphrases], " \
            "'no': [None/All or index numbers of text pairs that contain non-paraphrases]}\n\n" \
            "Here are the text pairs:\n\n$texts\n" \
            "JSON output:\n"

In [27]:
clf_task = "MRPC"
data = read_benchmark_results(clf_task)
database = read_obj_from_pickle(f"data/databases/text classification/{clf_task}.pkl")

Read object from data/databases/text classification/MRPC.pkl


In [28]:
label_attr_converter = lambda t: {"Yes": "is", "No": "isn't"}[t]


def get_10_examples_and_answers(database, attr="Yes", top_k=10):
    dev = database["devData"]
    attr = attr.capitalize()
    texts = get_indexed_texts(dev["texts"][-top_k:])
    labels = dev["labels"][-top_k:]
    answers = (np.where(labels == attr)[0] + 1).tolist()
    return texts, get_indexed_texts(labels), ", ".join(map(str, answers))

def make_SelectOne_exemplar(attr="Yes"):
    num = "10"
    texts, labels, answers = get_10_examples_and_answers(database, attr)
    prompt = SelectOne.replace("$num", num).replace("$be", label_attr_converter(attr))
    prompt = prompt.replace("$texts", texts)

    prompt += f"To solve this task, let's first determine if text A  is a paraphrase of text B for the {num} lines of text above, one per line:\n\n" \
              f"{labels}\n\nFrom here, we see that text pairs in lines $answers are paraphrases. " \
              f"Therefore, the answer in JSON format is as follows:\n" \
              "{'answer': [$answers]}".replace("$answers", answers)
    return prompt


def make_SelectAll_exemplar():
    num = "10"
    texts, labels, pos_answers = get_10_examples_and_answers(database, "yes")
    _, _, neg_answers = get_10_examples_and_answers(database, "no")

    prompt = SelectAll.replace("$num", num)
    prompt = prompt.replace("$texts", texts)

    prompt += f"To solve this task, let's first determine if text A  is a paraphrase of text B for the {num} lines of text above, one per line:\n\n" \
              f"{labels}\n\nFrom here, we see that text pairs in lines $posAnswers are paraphrases, and text pairs in lines $negAnswers are not. " \
              f"Therefore, the answer in JSON format is as follows:\n" \
              "{'yes': [$posAnswers], 'no': [$negAnswers]}".replace("$posAnswers", pos_answers).replace("$negAnswers", neg_answers)
    return prompt

In [29]:
selectOneExemplar = make_SelectOne_exemplar()
selectAllExemplar = make_SelectAll_exemplar()
SelectOne_CoT = selectOneExemplar + "\n\n" + SelectOne
SelectAll_CoT = selectAllExemplar + "\n\n" + SelectAll

In [30]:
database["promptTemplates"] = dict()
database["promptTemplates"]["1-shot-CoT"] = dict()

tasks = ["SelectOne", "SelectAll"]
promptTemplates = [SelectOne_CoT, SelectAll_CoT]

for task, tmp in zip(tasks, promptTemplates):
    database["promptTemplates"]["1-shot-CoT"][task] = tmp

save_obj_as_pickle(database, f"data/databases/text classification/{clf_task}.pkl")

Saved object to data/databases/text classification/MRPC.pkl


In [31]:
out = []
num_instance = 100
taskSize = 10
promptMode = "1-shot-CoT"
for task in tasks:

    out.append(make_prompts_for_clf(database, task, "test", promptMode, taskSize, attr="be", 
                                    label_attr_converter=label_attr_converter, num_instance=num_instance))

out = pd.concat(out)
out.reset_index(drop=True, inplace=True)

In [32]:
for ix in out[out.taskIndex == 1].index:
    print(out.at[ix, "prompt"])
    print("\n" + "*" * 100 + "\n")

Go over the 10 text pairs below and list the index numbers of the text pairs where text A is a paraphrase of text B according to the following instructions:
If none of the text pairs satisfy this condition, write 'None.'
If all the text pairs satisfy this condition, write 'All.'
Otherwise, provide the index numbers of the text pairs where text A is a paraphrase of text B.

Output your responses in JSON format with the key 'answer'.
A formatted example output is provided below.
{'answer': [None/All or index numbers of the text pairs where text A is a paraphrase of text B]}

Here are the text pairs:

1. Text A: "Nobody wants to go to war with anybody about anything ... it's always very much a last resort thing and one to be avoided," Mr Howard told Sydney radio.
Text B: "We don't want to go to war with anybody . . . it's always very much a last resort, and one to be avoided.

2. Text A: "The message is: If an individual is thinking of getting a flu shot, they shouldn't wait.
Text B: "The

In [33]:
data_ = pd.concat([data, out], axis=0).reset_index(drop=True)
data_.to_json(f"results/text classification/{clf_task}.json", orient="records", lines=True)                                             

### SNLI

In [34]:
SelectOne = "Go over the $num text pairs below and list the index numbers of the text pairs where " \
            "the inference relation between the premise and the hypothesis is $relationship according to the following instructions:\n" \
            "If none of the text pairs contain $relationship inference relation, write 'None.'\n" \
            "If all text pairs contain $relationship inference relation, write 'All.'\n" \
            "Otherwise, provide the index numbers of the text pairs where the inference relation between the premise and the hypothesis is $relationship.\n\n" \
            "Output your responses in JSON format with the key '$relationship'.\nA formatted example output is provided below.\n" \
            "{'$relationship': [None/All or index numbers of text pairs that contain $relationship inference relation]}\n\n" \
            "Here are the text pairs:\n\n$texts\nJSON output:\n"

SelectAll =  "Go over the $num text pairs below. " \
             "First, list the index numbers of the text pairs that contain entailment inference relation. " \
             "Then, select all text pairs that contain contradiction inference relation. " \
             "Finally, select all text pairs that contain neutral inference relation.\n" \
             "If none of the text pairs satisfy a condition, write 'None.'\n" \
             "If all the text pairs belong satisfy a condition, write 'All.'\n" \
             "Otherwise, provide the index numbers of the text pairs that satisfy each condition.\n\n" \
             "Output your responses in JSON format with three keys: 'entailment', 'contradiction', and 'neutral'." \
             "\nA formatted example output is provided below.\n" \
             "{'entailment': [None/All or index numbers of text pairs that contain entailment inference relation], " \
             "'contradiction': [None/All or index numbers of text pairs that contain contradiction inference relation], " \
             "'neutral': [None/All or index numbers of text pairs that contain neutral inference relation]}\n\n" \
             "Here are the text pairs:\n\n$texts\n" \
             "JSON output:\n"

In [35]:
clf_task = "SNLI"
data = read_benchmark_results(clf_task)
database = read_obj_from_pickle(f"data/databases/text classification/{clf_task}.pkl")

Read object from data/databases/text classification/SNLI.pkl


In [36]:
label_attr_converter = lambda t: t


def get_10_examples_and_answers(database, attr="entailment", top_k=10):
    dev = database["devData"]
    attr = attr.capitalize()
    texts = get_indexed_texts(dev["texts"][-top_k:])
    labels = dev["labels"][-top_k:]
    answers = (np.where(labels == attr)[0] + 1).tolist()
    return texts, get_indexed_texts(labels), ", ".join(map(str, answers))

def make_SelectOne_exemplar(attr="entailment"):
    num = "10"
    texts, labels, answers = get_10_examples_and_answers(database, attr)
    prompt = SelectOne.replace("$num", num).replace("$relationship", label_attr_converter(attr))
    prompt = prompt.replace("$texts", texts)

    prompt += f"To solve this task, let's first determine the inference relation between the premise and the hypothesis for the {num} lines of text above, one per line:\n\n" \
              f"{labels}\n\nFrom here, we see that text pairs in lines $answers contain entailment inference relation. " \
              f"Therefore, the answer in JSON format is as follows:\n" \
              "{'entailment': [$answers]}".replace("$answers", answers)
    return prompt


def make_SelectAll_exemplar():
    num = "10"
    texts, labels, pos_answers = get_10_examples_and_answers(database, "entailment")
    _, _, neg_answers = get_10_examples_and_answers(database, "contradiction")
    _, _, net_answers = get_10_examples_and_answers(database, "neutral")

    prompt = SelectAll.replace("$num", num)
    prompt = prompt.replace("$texts", texts)

    prompt += f"To solve this task, let's first determine the inference relation between the premise and the hypothesis for the {num} lines of text above, one per line:\n\n" \
              f"{labels}\n\nFrom here, we see that text pairs in lines $posAnswers contain entailment inference relation, " \
              f"text pairs in lines $negAnswers contain contradiction inference relation, and text pairs in $netAnswers contain neutral inference relation. " \
              f"Therefore, the answer in JSON format is as follows:\n" \
              "{'entailment': [$posAnswers], 'contradiction': [$negAnswers], 'neutral': [$netAnswers]}"
    prompt = prompt.replace("$posAnswers", pos_answers).replace("$negAnswers", neg_answers).replace("$netAnswers", net_answers)
    return prompt

In [37]:
selectOneExemplar = make_SelectOne_exemplar()
selectAllExemplar = make_SelectAll_exemplar()
SelectOne_CoT = selectOneExemplar + "\n\n" + SelectOne
SelectAll_CoT = selectAllExemplar + "\n\n" + SelectAll

In [38]:
database["promptTemplates"] = dict()
database["promptTemplates"]["1-shot-CoT"] = dict()

tasks = ["SelectOne", "SelectAll"]
promptTemplates = [SelectOne_CoT, SelectAll_CoT]

for task, tmp in zip(tasks, promptTemplates):
    database["promptTemplates"]["1-shot-CoT"][task] = tmp

save_obj_as_pickle(database, f"data/databases/text classification/{clf_task}.pkl")

Saved object to data/databases/text classification/SNLI.pkl


In [39]:
out = []
num_instance = 100
taskSize = 10
promptMode = "1-shot-CoT"
for task in tasks:

    out.append(make_prompts_for_clf(database, task, "test", promptMode, taskSize, attr="relationship", 
                                        label_attr_converter=label_attr_converter, num_instance=num_instance))

out = pd.concat(out)
out["#shot"] = 1
out.reset_index(drop=True, inplace=True)

In [40]:
for ix in out[out.taskIndex == 1].index:
    print(out.at[ix, "prompt"])
    print("\n" + "*" * 100 + "\n")

Go over the 10 text pairs below and list the index numbers of the text pairs where the inference relation between the premise and the hypothesis is entailment according to the following instructions:
If none of the text pairs contain entailment inference relation, write 'None.'
If all text pairs contain entailment inference relation, write 'All.'
Otherwise, provide the index numbers of the text pairs where the inference relation between the premise and the hypothesis is entailment.

Output your responses in JSON format with the key 'entailment'.
A formatted example output is provided below.
{'entailment': [None/All or index numbers of text pairs that contain entailment inference relation]}

Here are the text pairs:

1. Premise: A man selling donuts to a customer during a world exhibition event held in the city of Angeles
Hypothesis: A man selling donuts to a customer .

2. Premise: There are four US military soldiers in this photo , only two have their faces in the picture , there is a

In [41]:
data_ = pd.concat([data, out], axis=0).reset_index(drop=True)
data_.to_json(f"results/text classification/{clf_task}.json", orient="records", lines=True)                                             

### WiC

In [42]:
SelectOne = "Analyze the following $num target words and determine the index numbers of the target words where " \
            "the same meaning $be maintained across the two contexts that immediately follow them. " \
            "These target words may appear in different grammatical forms in each context.\n" \
            "If none of the target words satisfy this condition, write 'None.'.\n" \
            "If all the target words satisfy this condition, write 'All.'\n" \
            "Otherwise, provide the index numbers.\n\n" \
            "Output your responses in JSON format with the key 'answer'.\nA formatted example output is provided below.\n" \
            "{'answer': [None/All or index numbers of the target words where the same meaning $be maintained in the two subsequent contexts]}\n\n" \
            "Here are the target words along with their contexts:\n\n$texts\n" \
            "JSON output:\n"

SelectAll = "Analyze the following $num target words, which may appear in different grammatical forms in the two subsequent contexts. " \
            "First, list the index numbers of target words that maintain the same meaning in the two subsequent contexts. " \
            "Then, list the index numbers of target words that do not maintain the same meaning in the two subsequent contexts.\n" \
            "If none of the target words satisfy a condition, write 'None.'\nIf all the target words satisfy a condition, write 'All.'\n" \
            "Otherwise, provide the index numbers of the target words that satisfy each condition.\n\n" \
            "Output your responses in JSON format with two keys: 'yes' for target words used with consistent meanings and " \
            "'no' for those used with inconsistent meanings.\nA formatted example output is provided below.\n" \
            "{'yes': [None/All or index numbers of target words used with consistent meanings], " \
            "'no': [None/All or index numbers of target words used with inconsistent meanings]}\n\n" \
            "Here are the target words along with their contexts:\n\n$texts\n" \
            "JSON output:\n"

In [43]:
clf_task = "WiC"
data = read_benchmark_results(clf_task)
database = read_obj_from_pickle(f"data/databases/text classification/{clf_task}.pkl")

Read object from data/databases/text classification/WiC.pkl


In [44]:
label_attr_converter = lambda t: {"Yes": "is", "No": "isn't"}[t]


def get_10_examples_and_answers(database, attr="No", top_k=10):
    dev = database["devData"]
    attr = attr.capitalize()
    texts = get_indexed_texts(dev["texts"][-top_k:])
    labels = dev["labels"][-top_k:]
    answers = (np.where(labels == attr)[0] + 1).tolist()
    return texts, get_indexed_texts(labels), ", ".join(map(str, answers))

def make_SelectOne_exemplar(attr="No"):
    num = "10"
    texts, labels, answers = get_10_examples_and_answers(database, attr)
    prompt = SelectOne.replace("$num", num).replace("$be", label_attr_converter(attr))
    prompt = prompt.replace("$texts", texts)

    prompt += f"To solve this task, let's first determine if the target words is used with consistent meanings in the two subsequent contexts for the {num} lines of text above, one per line:\n\n" \
              f"{labels}\n\nFrom here, we see that text pairs in lines $answers contain use the target words with inconsistent meanings. " \
              f"Therefore, the answer in JSON format is as follows:\n" \
              "{'answer': [$answers]}".replace("$answers", answers)
    return prompt


def make_SelectAll_exemplar():
    num = "10"
    texts, labels, pos_answers = get_10_examples_and_answers(database, "Yes")
    _, _, neg_answers = get_10_examples_and_answers(database, "No")

    prompt = SelectAll.replace("$num", num)
    prompt = prompt.replace("$texts", texts)

    prompt += f"To solve this task, let's first determine if the target words is used with consistent meanings in the two subsequent contexts for the {num} lines of text above, one per line:\n\n" \
              f"{labels}\n\nFrom here, we see that text pairs in lines $posAnswers use the target words with inconsistent meanings, " \
              f"and text pairs in lines $negAnswers do not. " \
              f"Therefore, the answer in JSON format is as follows:\n" \
              "{'yes': [$posAnswers], 'no': [$negAnswers]}"
    prompt = prompt.replace("$posAnswers", pos_answers).replace("$negAnswers", neg_answers)
    return prompt

In [45]:
selectOneExemplar = make_SelectOne_exemplar()
selectAllExemplar = make_SelectAll_exemplar()
SelectOne_CoT = selectOneExemplar + "\n\n" + SelectOne
SelectAll_CoT = selectAllExemplar + "\n\n" + SelectAll

In [46]:
database["promptTemplates"] = dict()
database["promptTemplates"]["1-shot-CoT"] = dict()

tasks = ["SelectOne", "SelectAll"]
promptTemplates = [SelectOne_CoT, SelectAll_CoT]

for task, tmp in zip(tasks, promptTemplates):
    database["promptTemplates"]["1-shot-CoT"][task] = tmp

save_obj_as_pickle(database, f"data/databases/text classification/{clf_task}.pkl")

Saved object to data/databases/text classification/WiC.pkl


In [47]:
out = []
num_instance = 100
taskSize = 10
promptMode = "1-shot-CoT"
for task in tasks:

    out.append(make_prompts_for_clf(database, task, "test", promptMode, taskSize, attr="be", 
                                        label_attr_converter=label_attr_converter, num_instance=num_instance))

out = pd.concat(out)
out["#shot"] = 1
out.reset_index(drop=True, inplace=True)

In [48]:
for ix in out[out.taskIndex == 1].index:
    print(out.at[ix, "prompt"])
    print("\n" + "*" * 100 + "\n")

Analyze the following 10 target words and determine the index numbers of the target words where the same meaning isn't maintained across the two contexts that immediately follow them. These target words may appear in different grammatical forms in each context.
If none of the target words satisfy this condition, write 'None.'.
If all the target words satisfy this condition, write 'All.'
Otherwise, provide the index numbers.

Output your responses in JSON format with the key 'answer'.
A formatted example output is provided below.
{'answer': [None/All or index numbers of the target words where the same meaning isn't maintained in the two subsequent contexts]}

Here are the target words along with their contexts:

1. Target word: instruction
Context 1: Students receive instruction in the arts and sciences .
Context 2: Instruction will be provided on how to handle difficult customers .

2. Target word: extension
Context 1: The dancer was praised for her uncanny extension .
Context 2: Good 

In [49]:
data_ = pd.concat([data, out], axis=0).reset_index(drop=True)
data_.to_json(f"results/text classification/{clf_task}.json", orient="records", lines=True)                                             