# Boolean Prompting on Liar Dataset

## Dataset Setup

In [2]:
import datasets

liar = datasets.load_dataset("liar")
liar

DatasetDict({
    train: Dataset({
        features: ['id', 'label', 'statement', 'subject', 'speaker', 'job_title', 'state_info', 'party_affiliation', 'barely_true_counts', 'false_counts', 'half_true_counts', 'mostly_true_counts', 'pants_on_fire_counts', 'context'],
        num_rows: 10269
    })
    test: Dataset({
        features: ['id', 'label', 'statement', 'subject', 'speaker', 'job_title', 'state_info', 'party_affiliation', 'barely_true_counts', 'false_counts', 'half_true_counts', 'mostly_true_counts', 'pants_on_fire_counts', 'context'],
        num_rows: 1283
    })
    validation: Dataset({
        features: ['id', 'label', 'statement', 'subject', 'speaker', 'job_title', 'state_info', 'party_affiliation', 'barely_true_counts', 'false_counts', 'half_true_counts', 'mostly_true_counts', 'pants_on_fire_counts', 'context'],
        num_rows: 1284
    })
})

In [3]:
train = liar["train"]
test = liar["test"]
val = liar["validation"]

In [4]:
full_liar = datasets.concatenate_datasets([train, test, val])
full_liar

  table = cls._concat_blocks(blocks, axis=0)


Dataset({
    features: ['id', 'label', 'statement', 'subject', 'speaker', 'job_title', 'state_info', 'party_affiliation', 'barely_true_counts', 'false_counts', 'half_true_counts', 'mostly_true_counts', 'pants_on_fire_counts', 'context'],
    num_rows: 12836
})

## Model Loading

In [5]:
falcon = "tiiuae/falcon-7b-instruct"
llama = "meta-llama/Llama-2-7b-chat-hf"
mistral = "mistralai/Mistral-7B-Instruct-v0.2"
orca = "microsoft/Orca-2-7b"

In [6]:
# change this depending on experiment
model_name = mistral

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name, quantization_config=config, 
)

## Experiment

### Experiment Utils

In [8]:
from transformers import PreTrainedTokenizer
from typing import Dict

# split into True/False
LABEL_MAP = {
    0:"A",
    1:"B"
}

# 0 : False
# 1 : Half True
# 2 : Mostly True
# 3 : True
# 4 : Barely True
# 5 : Pants on Fire
def to_binary_label(entry):
    if entry['label'] in [3, 2, 1]:
        entry['label'] = 0
    else:
        entry['label'] = 1
    return entry

def was_correct(
    decoded:str, entry: Dict[str, int]
) -> bool:
    return LABEL_MAP[entry["label"]] in decoded

In [9]:
full_liar = full_liar.map(to_binary_label)

In [10]:
n_examples = 0

In [11]:
import random
random.seed(1770)
entries = random.choices(list(range(len(full_liar))), k=n_examples)

def to_zero_shot_prompt(entry: Dict[str, str]) -> str:
    speaker = entry["speaker"].replace("-", " ").title()
    statement = entry["statement"].lstrip("Says ")

    prompt = f"""Please select the option that most closely describes the following claim by {speaker}:\n{statement}\n\nA) True\nB) False\n\nChoice: ("""
    return prompt

def to_n_shot_prompt(n: int, entry: Dict[str, str]) -> str:
    examples = ""
    for i in range(n):
        examples += to_zero_shot_prompt(full_liar[entries[i]]) + LABEL_MAP[full_liar[entries[i]]['label']] + "\n\n"
    prompt = to_zero_shot_prompt(entry)
    return examples + prompt


In [12]:
responses: Dict[str, list] = {}
def workflow(idx, entry: dict, model, k:int=0, verbose: bool = False) -> bool:
    prompt = to_n_shot_prompt(k, entry)

    # encode input, move it to cuda, then generate
    encoded_input = tokenizer(prompt, return_tensors="pt")
    encoded_input = {item: val.cuda() for item, val in encoded_input.items()}
    generation = model.generate(
        **encoded_input,
        max_new_tokens=1,
        do_sample=False,
        pad_token_id=tokenizer.eos_token_id
    )

    # log the prompt and response if verbose
    if verbose:
        print(tokenizer.batch_decode(generation)[0])

    decoded = tokenizer.decode(generation[0, -1])
    correct = was_correct(decoded, entry)

    if decoded not in responses:
        responses[decoded] = []
    responses[decoded].append(idx)

    if verbose:
        print(
            "The model was",
            "correct" if correct else "incorrect",
            " - responded",
            tokenizer.decode(generation[0, -1]),
            "and answer should have been",
            LABEL_MAP[entry["label"]],
        )
    return correct

### Verify it works

In [13]:
import random
workflow(0, train[random.randint(0, len(train) - 1)], model, verbose=True, k=n_examples)

<s> Please select the option that most closely describes the following claim by Jorge Elorza:
The reality is that we have roughly 15,000 undocumented immigrants living in the state...

A) True
B) False

Choice: (A
The model was correct  - responded A and answer should have been A


True

### Run Experiment

Results of zero-shot prompting the models

In [14]:
from tqdm import tqdm

num_correct = 0
responses = {}
for idx, entry in enumerate((prog := tqdm(full_liar))):
    if idx in entries:
        continue  # don't include items that were in the examples
    
    correct = workflow(idx, entry, model, k=n_examples)
    if correct:
        num_correct += 1
    prog.set_postfix_str(f"acc: {num_correct/(idx+1):.3f}")

100%|█████████████████████████| 12836/12836 [31:06<00:00,  6.88it/s, acc: 0.569]


In [15]:
# log results
with open(f"{n_examples}_shot_binary.txt", "a") as file:
    file.write(f"{model_name} : {num_correct}/{len(full_liar)-len(entries)}\n")

In [None]:
# print results up till now
with open(f"{n_examples}_shot_binary.txt", "r") as file:
    print(file.read())

In [17]:
import pickle
pickle.dump(responses, open(f"{model_name[model_name.index('/')+1:]}_responses_bool.pk", "wb"))