#### Works on Nvidia Devices

In [1]:
!pip install  --upgrade transformers datasets accelerate evaluate bitsandbytes trl peft torch
import torch; assert torch.cuda.get_device_capability()[0] >= 8, 'Hardware not supported for Flash Attention'
!pip install ninja packaging tensorboardX tensorboard
# !MAX_JOBS=4 pip install flash-attn --no-build-isolation
!pip install flash-attn

Collecting transformers
  Downloading transformers-4.40.1-py3-none-any.whl.metadata (137 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m138.0/138.0 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datasets
  Downloading datasets-2.19.0-py3-none-any.whl.metadata (19 kB)
Collecting accelerate
  Downloading accelerate-0.29.3-py3-none-any.whl.metadata (18 kB)
Collecting evaluate
  Downloading evaluate-0.4.2-py3-none-any.whl.metadata (9.3 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.43.1-py3-none-manylinux_2_24_x86_64.whl.metadata (2.2 kB)
Collecting trl
  Downloading trl-0.8.6-py3-none-any.whl.metadata (11 kB)
Collecting peft
  Downloading peft-0.10.0-py3-none-any.whl.metadata (13 kB)
Collecting torch
  Downloading torch-2.3.0-cp310-cp310-manylinux1_x86_64.whl.metadata (26 kB)
Collecting huggingface-hub<1.0,>=0.19.3 (from transformers)
  Downloading huggingface_hub-0.22.2-py3-none-any.whl.metadata (12 kB)
Collecting regex!=2019.12.17 (from tran

In [1]:
HF_TOKEN = "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"
from huggingface_hub import login
login(
  token=HF_TOKEN, # ADD YOUR TOKEN HERE
  add_to_git_credential=True
)

Token is valid (permission: write).
[1m[31mCannot authenticate through git-credential as no helper is defined on your machine.
You might have to re-authenticate when pushing to the Hugging Face Hub.
Run the following command in your terminal in case you want to set the 'store' credential helper as default.

git config --global credential.helper store

Read https://git-scm.com/book/en/v2/Git-Tools-Credential-Storage for more details.[0m
Token has not been saved to git credential helper.
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [2]:
import peft
import trl
from peft import LoraConfig
from datasets import load_dataset

dataset = load_dataset("Ksgk-fy/alignment-sft-test2-mode-1", split="train")

In [3]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from trl import setup_chat_format

# Hugging Face model id
model_id = "HuggingFaceH4/zephyr-7b-beta"
# model_id = "meta-llama/Meta-Llama-3-8B"

# BitsAndBytesConfig int-4 config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True, bnb_4bit_use_double_quant=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16
)

# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    attn_implementation="flash_attention_2",
    torch_dtype=torch.bfloat16,
    quantization_config=bnb_config
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.padding_side = 'right' # to prevent warnings

Loading checkpoint shards:   0%|          | 0/8 [00:00<?, ?it/s]

In [4]:
from peft import LoraConfig
from transformers import TrainingArguments

# LoRA config based on QLoRA paper & Sebastian Raschka experiment
peft_config = LoraConfig(
        lora_alpha=128,
        lora_dropout=0.05,
        r=256,
        bias="none",
        target_modules="all-linear",
        task_type="CAUSAL_LM",
)

args = TrainingArguments(
    output_dir="alignment-adaptor-test06", # directory to save and repository id
    num_train_epochs=5,                     # number of training epochs
    per_device_train_batch_size=16,          # batch size per device during training
    gradient_accumulation_steps=2,          # number of steps before performing a backward/update pass
    gradient_checkpointing=True,            # use gradient checkpointing to save memory
    optim="adamw_torch_fused",              # use fused adamw optimizer
    logging_steps=10,                       # log every 10 steps
    save_strategy="epoch",                  # save checkpoint every epoch
    learning_rate=2e-4,                     # learning rate, based on QLoRA paper
    bf16=True,                              # use bfloat16 precision
    tf32=False,                              # use tf32 precision
    max_grad_norm=0.3,                      # max gradient norm based on QLoRA paper
    warmup_ratio=0.03,                      # warmup ratio based on QLoRA paper
    lr_scheduler_type="constant",           # use constant learning rate scheduler
    push_to_hub=True,                       # push model to hub
    report_to="tensorboard",                # report metrics to tensorboard
)

In [5]:
import peft
import trl
from peft import LoraConfig
from datasets import load_dataset

dataset = load_dataset("Ksgk-fy/alignment-sft-test2-mode-1", split="train")

In [6]:
from trl import SFTTrainer, DataCollatorForCompletionOnlyLM

def map_label(label):
    if label == "Yes":
        return "Yes"
    if label == "No":
        return "No"
    if label == "Unknown":
        return "Hmm"

def formatting_prompts_func(example):
    output_texts = []
    for i in range(len(example['prompt'])):
        text = f"### Question: {example['prompt'][i]}\n ### Answer: {map_label(example['completion'][i])}"
        output_texts.append(text)
    return output_texts


def formatting_prompt_func(example):
    output_texts = []
    for i in range(1):
        text = f"### Question: {example['prompt']}\n ### Answer: {map_label(example['completion'])}"
        output_texts.append(text)
    return output_texts

response_template = "### Answer:"

collator = DataCollatorForCompletionOnlyLM(response_template, tokenizer=tokenizer)


def formatting_query_prompt_func(example):
  """
  Used to let LLM generate predicted completion to a prompt
  """
  query_text = f"### Question: {example['prompt']}\n ### Answer: "
  return query_text

In [7]:
from trl import SFTTrainer

max_seq_length = 512 # max sequence length for model and packing of the dataset

trainer = SFTTrainer(
    model=model,
    args=args,
    train_dataset=dataset,
    peft_config=peft_config,
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    formatting_func=formatting_prompts_func,
    data_collator=collator,
    packing=False,
    dataset_kwargs={
        "add_special_tokens": False,  # We template with special tokens
        "append_concat_token": False, # No need to add additional separator token
    }
)


Map:   0%|          | 0/460 [00:00<?, ? examples/s]

In [8]:
# start training, the model will be automatically saved to the hub and the output directory
trainer.train()

# save model
# trainer.push_to_hub()
# trainer.save_model('Ksgk-fy/alignment-adapter-test05')

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...
The input hidden states seems to be silently casted in float32, this might be related to the fact you have upcasted embedding or layer norm layers in float32. We will cast back the input in torch.bfloat16.


Step,Training Loss
10,2.7179
20,1.416
30,0.9533
40,0.7638
50,0.3011
60,2.2833
70,0.1237




TrainOutput(global_step=70, training_loss=1.222718460219247, metrics={'train_runtime': 281.6238, 'train_samples_per_second': 8.167, 'train_steps_per_second': 0.249, 'total_flos': 2.062356402408653e+16, 'train_loss': 1.222718460219247, 'epoch': 4.827586206896552})

In [13]:
# now I need to upload model to huggging-face and not to just watch it like a football game
# Attempt 1: 
trainer.push_to_hub('Ksgk-fy/alignment-adapter-test05')

CommitInfo(commit_url='https://huggingface.co/Ksgk-fy/alignment-adaptor-test04/commit/aa7f65cb50b6bdcc65738e6e1d9e69f566249c52', commit_message='Ksgk-fy/alignment-adapter-test05', commit_description='', oid='aa7f65cb50b6bdcc65738e6e1d9e69f566249c52', pr_url=None, pr_revision=None, pr_num=None)

In [12]:
from tqdm import tqdm as tqdm
import numpy

def check_performance(dataset, model, tokenizer):
    n_correct, n_wrong = 0, 0
    pb = tqdm(total=len(dataset), desc="Calculating perplexity")
    for data in dataset:
        is_correct = eval_data(data, model, tokenizer)
        n_correct += int(is_correct)
        n_wrong += (1 - int(is_correct))
        pb.update(1)
    print("Success Rate: ", numpy.round(n_correct / (n_correct + n_wrong), 2))
    return n_correct / (n_correct + n_wrong)

def eval_data(data, model, tokenizer, scale = 1.0):

    get_text = lambda example: f"### Question: {example['prompt']}\n ### Answer: {map_label(example['completion'])}"
    chosen_str = get_text(data)
    answer_index = chosen_str.find("Answer: ") + 8
    answer = chosen_str[answer_index:].split("<|im_end|>")[0]
    start_index_sequence, end_index_sequence = answer_index, answer_index + len(answer)
    
    # When encoding happens we count the token and not the others
    query_ids = tokenizer.encode(chosen_str[:start_index_sequence])
    query_answer_ids = tokenizer.encode(chosen_str[:end_index_sequence])
    
    start_index = len(query_ids)
    end_index = len(query_answer_ids)
    if start_index == end_index:
        start_index -= 1
    
    # Run inference and calculate next-token prediction loss
    sequence_ids = tokenizer.encode(chosen_str, return_tensors="pt").to("cuda")
    with torch.no_grad():
        sequence_logits = model(sequence_ids).logits 
        target_logits = sequence_logits[:, start_index:end_index]
        # target_ids = sequence_ids[:, start_index:end_index].view(-1)
    
    # Process separately for each prediction answer
    def process_possible_answer(pos):
        pos_str = chosen_str[:start_index_sequence] + pos
        query_pos_ids = tokenizer.encode(pos_str)
        
        pos_start_index = len(query_ids)
        pos_end_index = len(query_pos_ids)
        if pos_start_index == pos_end_index:
            pos_start_index -= 1
    
        id = query_pos_ids[pos_start_index:pos_start_index+1]
        return id
    
    id_1 = process_possible_answer("Yes")
    id_2 = process_possible_answer("Hmm")
    id_3 = process_possible_answer("No")
    
    # print("Prefix: ", chosen_str[:start_index_sequence])
    # print(id_1, id_2, id_3)
    
    # Get that logits (relative logits)
    pred_logits = target_logits.view(-1)
    pred_probs = torch.softmax(pred_logits, dim=0)
    # return pred_probs, id_2

    prob_1, prob_2, prob_3 = pred_probs[id_1], pred_probs[id_2], pred_probs[id_3]

    # print(prob_1, prob_2, prob_3)
    
    prob_2 *= scale
    
    norm_1 = prob_1 / (prob_1 + prob_2 + prob_3)
    norm_2 = prob_2 / (prob_1 + prob_2 + prob_3)
    norm_3 = prob_3 / (prob_1 + prob_2 + prob_3)
    
    prob_dict = {"Yes": norm_1, "Hmm": norm_2, "No": norm_3}
    
    pred = max(prob_dict, key=prob_dict.get)
    
    print(f"Prediction: {pred}, Answer: {answer}")
    
    return pred == answer

In [13]:
data = dataset[0]
eval_data(data, trainer.model, trainer.tokenizer)

Prediction: Yes, Answer: Yes


True

In [11]:
testset = load_dataset("Ksgk-fy/alignment-sft-test2-mode-1", split="test")
check_performance(testset, trainer.model, trainer.tokenizer)

Calculating perplexity:   2%|▏         | 2/116 [00:00<00:23,  4.85it/s]

Prediction: Yes, Answer: Hmm
Prediction: No, Answer: No


Calculating perplexity:   3%|▎         | 4/116 [00:00<00:21,  5.27it/s]

Prediction: Yes, Answer: Hmm
Prediction: No, Answer: No


Calculating perplexity:   5%|▌         | 6/116 [00:01<00:20,  5.49it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:   7%|▋         | 8/116 [00:01<00:19,  5.61it/s]

Prediction: No, Answer: No
Prediction: No, Answer: No


Calculating perplexity:   9%|▊         | 10/116 [00:01<00:18,  5.71it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Yes


Calculating perplexity:  10%|█         | 12/116 [00:02<00:18,  5.71it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Yes


Calculating perplexity:  12%|█▏        | 14/116 [00:02<00:18,  5.66it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  14%|█▍        | 16/116 [00:02<00:17,  5.65it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Hmm


Calculating perplexity:  16%|█▌        | 18/116 [00:03<00:17,  5.65it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Yes


Calculating perplexity:  17%|█▋        | 20/116 [00:03<00:17,  5.60it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Hmm


Calculating perplexity:  19%|█▉        | 22/116 [00:03<00:16,  5.59it/s]

Prediction: Yes, Answer: Hmm
Prediction: No, Answer: No


Calculating perplexity:  21%|██        | 24/116 [00:04<00:16,  5.58it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Hmm


Calculating perplexity:  22%|██▏       | 26/116 [00:04<00:16,  5.56it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Yes


Calculating perplexity:  24%|██▍       | 28/116 [00:05<00:15,  5.57it/s]

Prediction: Yes, Answer: Hmm
Prediction: No, Answer: No


Calculating perplexity:  26%|██▌       | 30/116 [00:05<00:15,  5.58it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Yes


Calculating perplexity:  28%|██▊       | 32/116 [00:05<00:15,  5.59it/s]

Prediction: Yes, Answer: Hmm
Prediction: No, Answer: No


Calculating perplexity:  29%|██▉       | 34/116 [00:06<00:14,  5.62it/s]

Prediction: Yes, Answer: Yes
Prediction: No, Answer: No


Calculating perplexity:  31%|███       | 36/116 [00:06<00:14,  5.63it/s]

Prediction: Yes, Answer: Hmm
Prediction: No, Answer: No


Calculating perplexity:  33%|███▎      | 38/116 [00:06<00:13,  5.66it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  34%|███▍      | 40/116 [00:07<00:13,  5.67it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Hmm


Calculating perplexity:  36%|███▌      | 42/116 [00:07<00:13,  5.64it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Hmm


Calculating perplexity:  38%|███▊      | 44/116 [00:07<00:12,  5.63it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Hmm


Calculating perplexity:  40%|███▉      | 46/116 [00:08<00:12,  5.59it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Yes


Calculating perplexity:  41%|████▏     | 48/116 [00:08<00:12,  5.57it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  43%|████▎     | 50/116 [00:08<00:11,  5.57it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Hmm


Calculating perplexity:  45%|████▍     | 52/116 [00:09<00:11,  5.61it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Hmm


Calculating perplexity:  47%|████▋     | 54/116 [00:09<00:10,  5.67it/s]

Prediction: Yes, Answer: Yes
Prediction: No, Answer: No


Calculating perplexity:  48%|████▊     | 56/116 [00:10<00:10,  5.68it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Yes


Calculating perplexity:  50%|█████     | 58/116 [00:10<00:10,  5.68it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  52%|█████▏    | 60/116 [00:10<00:09,  5.67it/s]

Prediction: Yes, Answer: Yes
Prediction: No, Answer: No


Calculating perplexity:  53%|█████▎    | 62/116 [00:11<00:09,  5.65it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  55%|█████▌    | 64/116 [00:11<00:09,  5.67it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  57%|█████▋    | 66/116 [00:11<00:08,  5.66it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  59%|█████▊    | 68/116 [00:12<00:08,  5.61it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Hmm


Calculating perplexity:  60%|██████    | 70/116 [00:12<00:08,  5.64it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Hmm


Calculating perplexity:  62%|██████▏   | 72/116 [00:12<00:07,  5.61it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Hmm


Calculating perplexity:  64%|██████▍   | 74/116 [00:13<00:07,  5.60it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  66%|██████▌   | 76/116 [00:13<00:07,  5.62it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Hmm


Calculating perplexity:  67%|██████▋   | 78/116 [00:13<00:06,  5.65it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Yes


Calculating perplexity:  69%|██████▉   | 80/116 [00:14<00:06,  5.68it/s]

Prediction: No, Answer: No
Prediction: No, Answer: No


Calculating perplexity:  71%|███████   | 82/116 [00:14<00:06,  5.67it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  72%|███████▏  | 84/116 [00:14<00:05,  5.68it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Hmm


Calculating perplexity:  74%|███████▍  | 86/116 [00:15<00:05,  5.72it/s]

Prediction: Yes, Answer: Yes
Prediction: No, Answer: No


Calculating perplexity:  76%|███████▌  | 88/116 [00:15<00:04,  5.68it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Yes


Calculating perplexity:  78%|███████▊  | 90/116 [00:16<00:04,  5.65it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  79%|███████▉  | 92/116 [00:16<00:04,  5.66it/s]

Prediction: Yes, Answer: Yes
Prediction: No, Answer: No


Calculating perplexity:  81%|████████  | 94/116 [00:16<00:03,  5.66it/s]

Prediction: No, Answer: No
Prediction: No, Answer: No


Calculating perplexity:  83%|████████▎ | 96/116 [00:17<00:03,  5.62it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  84%|████████▍ | 98/116 [00:17<00:03,  5.66it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  86%|████████▌ | 100/116 [00:17<00:02,  5.71it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  88%|████████▊ | 102/116 [00:18<00:02,  5.72it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Hmm


Calculating perplexity:  90%|████████▉ | 104/116 [00:18<00:02,  5.67it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Yes


Calculating perplexity:  91%|█████████▏| 106/116 [00:18<00:01,  5.71it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Yes


Calculating perplexity:  93%|█████████▎| 108/116 [00:19<00:01,  5.72it/s]

Prediction: Yes, Answer: Yes
Prediction: No, Answer: No


Calculating perplexity:  95%|█████████▍| 110/116 [00:19<00:01,  5.69it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Hmm


Calculating perplexity:  97%|█████████▋| 112/116 [00:19<00:00,  5.66it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Yes


Calculating perplexity:  98%|█████████▊| 114/116 [00:20<00:00,  5.69it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Hmm


Calculating perplexity: 100%|██████████| 116/116 [00:20<00:00,  5.63it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes
Success Rate:  0.74





0.7413793103448276

In [14]:
trainset = load_dataset("Ksgk-fy/alignment-sft-test2-mode-1", split="train")
check_performance(trainset, trainer.model, trainer.tokenizer)

Calculating perplexity:   0%|          | 2/460 [00:00<01:25,  5.35it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:   1%|          | 4/460 [00:00<01:24,  5.39it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:   1%|▏         | 6/460 [00:01<01:23,  5.43it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:   2%|▏         | 8/460 [00:01<01:22,  5.50it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Hmm


Calculating perplexity:   2%|▏         | 10/460 [00:01<01:21,  5.54it/s]

Prediction: Yes, Answer: Yes
Prediction: No, Answer: No


Calculating perplexity:   3%|▎         | 12/460 [00:02<01:20,  5.56it/s]

Prediction: Yes, Answer: Hmm
Prediction: No, Answer: No


Calculating perplexity:   3%|▎         | 14/460 [00:02<01:19,  5.59it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:   3%|▎         | 16/460 [00:02<01:19,  5.59it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:   4%|▍         | 18/460 [00:03<01:19,  5.57it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Yes


Calculating perplexity:   4%|▍         | 20/460 [00:03<01:18,  5.61it/s]

Prediction: Yes, Answer: Hmm
Prediction: No, Answer: No


Calculating perplexity:   5%|▍         | 22/460 [00:03<01:18,  5.60it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Yes


Calculating perplexity:   5%|▌         | 24/460 [00:04<01:18,  5.55it/s]

Prediction: Yes, Answer: Yes
Prediction: No, Answer: No


Calculating perplexity:   6%|▌         | 26/460 [00:04<01:17,  5.56it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Yes


Calculating perplexity:   6%|▌         | 28/460 [00:05<01:17,  5.58it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Yes


Calculating perplexity:   7%|▋         | 30/460 [00:05<01:17,  5.57it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Hmm


Calculating perplexity:   7%|▋         | 32/460 [00:05<01:16,  5.57it/s]

Prediction: Yes, Answer: Yes
Prediction: No, Answer: No


Calculating perplexity:   7%|▋         | 34/460 [00:06<01:15,  5.62it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Hmm


Calculating perplexity:   8%|▊         | 36/460 [00:06<01:15,  5.63it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Yes


Calculating perplexity:   8%|▊         | 38/460 [00:06<01:14,  5.65it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:   9%|▊         | 40/460 [00:07<01:15,  5.59it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Yes


Calculating perplexity:   9%|▉         | 42/460 [00:07<01:14,  5.60it/s]

Prediction: No, Answer: No
Prediction: No, Answer: No


Calculating perplexity:  10%|▉         | 44/460 [00:07<01:13,  5.63it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Hmm


Calculating perplexity:  10%|█         | 46/460 [00:08<01:13,  5.62it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Yes


Calculating perplexity:  10%|█         | 48/460 [00:08<01:13,  5.63it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  11%|█         | 50/460 [00:08<01:12,  5.63it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  11%|█▏        | 52/460 [00:09<01:12,  5.62it/s]

Prediction: Yes, Answer: Yes
Prediction: No, Answer: No


Calculating perplexity:  12%|█▏        | 54/460 [00:09<01:12,  5.62it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  12%|█▏        | 56/460 [00:10<01:12,  5.61it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Hmm


Calculating perplexity:  13%|█▎        | 58/460 [00:10<01:11,  5.62it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Hmm


Calculating perplexity:  13%|█▎        | 60/460 [00:10<01:11,  5.63it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Hmm


Calculating perplexity:  13%|█▎        | 62/460 [00:11<01:10,  5.65it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Yes


Calculating perplexity:  14%|█▍        | 64/460 [00:11<01:10,  5.65it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  14%|█▍        | 66/460 [00:11<01:10,  5.62it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Yes


Calculating perplexity:  15%|█▍        | 68/460 [00:12<01:09,  5.63it/s]

Prediction: Yes, Answer: Yes
Prediction: No, Answer: No


Calculating perplexity:  15%|█▌        | 70/460 [00:12<01:09,  5.62it/s]

Prediction: No, Answer: No
Prediction: No, Answer: No


Calculating perplexity:  16%|█▌        | 72/460 [00:12<01:09,  5.59it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Yes


Calculating perplexity:  16%|█▌        | 74/460 [00:13<01:08,  5.60it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Yes


Calculating perplexity:  17%|█▋        | 76/460 [00:13<01:08,  5.60it/s]

Prediction: No, Answer: No
Prediction: No, Answer: No


Calculating perplexity:  17%|█▋        | 78/460 [00:13<01:08,  5.57it/s]

Prediction: Yes, Answer: Hmm
Prediction: No, Answer: No


Calculating perplexity:  17%|█▋        | 80/460 [00:14<01:07,  5.61it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  18%|█▊        | 82/460 [00:14<01:07,  5.63it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Yes


Calculating perplexity:  18%|█▊        | 84/460 [00:15<01:07,  5.59it/s]

Prediction: Yes, Answer: Hmm
Prediction: No, Answer: No


Calculating perplexity:  19%|█▊        | 86/460 [00:15<01:06,  5.61it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Hmm


Calculating perplexity:  19%|█▉        | 88/460 [00:15<01:06,  5.63it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Yes


Calculating perplexity:  20%|█▉        | 90/460 [00:16<01:05,  5.64it/s]

Prediction: Yes, Answer: Hmm
Prediction: No, Answer: No


Calculating perplexity:  20%|██        | 92/460 [00:16<01:05,  5.64it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  20%|██        | 94/460 [00:16<01:04,  5.64it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Yes


Calculating perplexity:  21%|██        | 96/460 [00:17<01:04,  5.60it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Yes


Calculating perplexity:  21%|██▏       | 98/460 [00:17<01:04,  5.61it/s]

Prediction: Yes, Answer: Hmm
Prediction: No, Answer: No


Calculating perplexity:  22%|██▏       | 100/460 [00:17<01:04,  5.60it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  22%|██▏       | 102/460 [00:18<01:04,  5.57it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Hmm


Calculating perplexity:  23%|██▎       | 104/460 [00:18<01:03,  5.60it/s]

Prediction: No, Answer: No
Prediction: No, Answer: No


Calculating perplexity:  23%|██▎       | 106/460 [00:18<01:03,  5.59it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Hmm


Calculating perplexity:  23%|██▎       | 108/460 [00:19<01:02,  5.62it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Yes


Calculating perplexity:  24%|██▍       | 110/460 [00:19<01:01,  5.66it/s]

Prediction: Yes, Answer: Yes
Prediction: No, Answer: No


Calculating perplexity:  24%|██▍       | 112/460 [00:20<01:02,  5.59it/s]

Prediction: Yes, Answer: Yes
Prediction: No, Answer: No


Calculating perplexity:  25%|██▍       | 114/460 [00:20<01:01,  5.62it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Yes


Calculating perplexity:  25%|██▌       | 116/460 [00:20<01:01,  5.63it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  26%|██▌       | 118/460 [00:21<01:00,  5.64it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Hmm


Calculating perplexity:  26%|██▌       | 120/460 [00:21<01:00,  5.58it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Yes


Calculating perplexity:  27%|██▋       | 122/460 [00:21<01:00,  5.56it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Yes


Calculating perplexity:  27%|██▋       | 124/460 [00:22<01:00,  5.57it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Hmm


Calculating perplexity:  27%|██▋       | 126/460 [00:22<00:59,  5.60it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Yes


Calculating perplexity:  28%|██▊       | 128/460 [00:22<00:59,  5.62it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Hmm


Calculating perplexity:  28%|██▊       | 130/460 [00:23<00:58,  5.61it/s]

Prediction: No, Answer: No
Prediction: No, Answer: No


Calculating perplexity:  29%|██▊       | 132/460 [00:23<00:58,  5.63it/s]

Prediction: Yes, Answer: Hmm
Prediction: No, Answer: No


Calculating perplexity:  29%|██▉       | 134/460 [00:23<00:59,  5.50it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Yes


Calculating perplexity:  30%|██▉       | 136/460 [00:24<00:58,  5.56it/s]

Prediction: Yes, Answer: Yes
Prediction: No, Answer: No


Calculating perplexity:  30%|███       | 138/460 [00:24<00:57,  5.57it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  30%|███       | 140/460 [00:25<00:57,  5.56it/s]

Prediction: No, Answer: No
Prediction: No, Answer: No


Calculating perplexity:  31%|███       | 142/460 [00:25<00:57,  5.58it/s]

Prediction: Yes, Answer: Hmm
Prediction: No, Answer: No


Calculating perplexity:  31%|███▏      | 144/460 [00:25<00:56,  5.60it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  32%|███▏      | 146/460 [00:26<00:55,  5.62it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Yes


Calculating perplexity:  32%|███▏      | 148/460 [00:26<00:55,  5.64it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Hmm


Calculating perplexity:  33%|███▎      | 150/460 [00:26<00:55,  5.63it/s]

Prediction: Yes, Answer: Hmm
Prediction: No, Answer: No


Calculating perplexity:  33%|███▎      | 152/460 [00:27<00:54,  5.65it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Yes


Calculating perplexity:  33%|███▎      | 154/460 [00:27<00:54,  5.64it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Yes


Calculating perplexity:  34%|███▍      | 156/460 [00:27<00:53,  5.64it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Hmm


Calculating perplexity:  34%|███▍      | 158/460 [00:28<00:53,  5.61it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Hmm


Calculating perplexity:  35%|███▍      | 160/460 [00:28<00:53,  5.62it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Hmm


Calculating perplexity:  35%|███▌      | 162/460 [00:28<00:53,  5.62it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Hmm


Calculating perplexity:  36%|███▌      | 164/460 [00:29<00:52,  5.62it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  36%|███▌      | 166/460 [00:29<00:52,  5.60it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Hmm


Calculating perplexity:  37%|███▋      | 168/460 [00:30<00:52,  5.61it/s]

Prediction: No, Answer: No
Prediction: No, Answer: No


Calculating perplexity:  37%|███▋      | 170/460 [00:30<00:51,  5.63it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Hmm


Calculating perplexity:  37%|███▋      | 172/460 [00:30<00:51,  5.64it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Hmm


Calculating perplexity:  38%|███▊      | 174/460 [00:31<00:50,  5.65it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Hmm


Calculating perplexity:  38%|███▊      | 176/460 [00:31<00:50,  5.65it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  39%|███▊      | 178/460 [00:31<00:49,  5.65it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  39%|███▉      | 180/460 [00:32<00:49,  5.63it/s]

Prediction: Yes, Answer: Yes
Prediction: No, Answer: No


Calculating perplexity:  40%|███▉      | 182/460 [00:32<00:49,  5.63it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Hmm


Calculating perplexity:  40%|████      | 184/460 [00:32<00:49,  5.61it/s]

Prediction: Yes, Answer: Hmm
Prediction: No, Answer: No


Calculating perplexity:  40%|████      | 186/460 [00:33<00:48,  5.63it/s]

Prediction: Yes, Answer: Hmm
Prediction: No, Answer: No


Calculating perplexity:  41%|████      | 188/460 [00:33<00:48,  5.64it/s]

Prediction: Yes, Answer: Yes
Prediction: No, Answer: No


Calculating perplexity:  41%|████▏     | 190/460 [00:33<00:48,  5.61it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Yes


Calculating perplexity:  42%|████▏     | 192/460 [00:34<00:47,  5.63it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Hmm


Calculating perplexity:  42%|████▏     | 194/460 [00:34<00:47,  5.63it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Yes


Calculating perplexity:  43%|████▎     | 196/460 [00:34<00:46,  5.64it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  43%|████▎     | 198/460 [00:35<00:46,  5.64it/s]

Prediction: Yes, Answer: Hmm
Prediction: No, Answer: No


Calculating perplexity:  43%|████▎     | 200/460 [00:35<00:45,  5.65it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Hmm


Calculating perplexity:  44%|████▍     | 202/460 [00:36<00:45,  5.64it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Hmm


Calculating perplexity:  44%|████▍     | 204/460 [00:36<00:45,  5.63it/s]

Prediction: No, Answer: No
Prediction: No, Answer: No


Calculating perplexity:  45%|████▍     | 206/460 [00:36<00:45,  5.61it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  45%|████▌     | 208/460 [00:37<00:45,  5.60it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Yes


Calculating perplexity:  46%|████▌     | 210/460 [00:37<00:44,  5.60it/s]

Prediction: Yes, Answer: Hmm
Prediction: No, Answer: Hmm


Calculating perplexity:  46%|████▌     | 212/460 [00:37<00:44,  5.62it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Hmm


Calculating perplexity:  47%|████▋     | 214/460 [00:38<00:43,  5.64it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Hmm


Calculating perplexity:  47%|████▋     | 216/460 [00:38<00:43,  5.63it/s]

Prediction: Yes, Answer: Yes
Prediction: No, Answer: No


Calculating perplexity:  47%|████▋     | 218/460 [00:38<00:43,  5.63it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Yes


Calculating perplexity:  48%|████▊     | 220/460 [00:39<00:42,  5.62it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Hmm


Calculating perplexity:  48%|████▊     | 222/460 [00:39<00:42,  5.62it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  49%|████▊     | 224/460 [00:39<00:41,  5.64it/s]

Prediction: Yes, Answer: Hmm
Prediction: No, Answer: No


Calculating perplexity:  49%|████▉     | 226/460 [00:40<00:41,  5.63it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Yes


Calculating perplexity:  50%|████▉     | 228/460 [00:40<00:41,  5.62it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Yes


Calculating perplexity:  50%|█████     | 230/460 [00:41<00:40,  5.62it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Hmm


Calculating perplexity:  50%|█████     | 232/460 [00:41<00:40,  5.64it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Yes


Calculating perplexity:  51%|█████     | 234/460 [00:41<00:40,  5.62it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  51%|█████▏    | 236/460 [00:42<00:39,  5.61it/s]

Prediction: Yes, Answer: Hmm
Prediction: No, Answer: No


Calculating perplexity:  52%|█████▏    | 238/460 [00:42<00:39,  5.61it/s]

Prediction: No, Answer: No
Prediction: No, Answer: No


Calculating perplexity:  52%|█████▏    | 240/460 [00:42<00:39,  5.58it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  53%|█████▎    | 242/460 [00:43<00:38,  5.60it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Yes


Calculating perplexity:  53%|█████▎    | 244/460 [00:43<00:38,  5.59it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  53%|█████▎    | 246/460 [00:43<00:38,  5.58it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Yes


Calculating perplexity:  54%|█████▍    | 248/460 [00:44<00:37,  5.61it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Hmm


Calculating perplexity:  54%|█████▍    | 250/460 [00:44<00:37,  5.62it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Hmm


Calculating perplexity:  55%|█████▍    | 252/460 [00:44<00:36,  5.63it/s]

Prediction: Yes, Answer: Yes
Prediction: No, Answer: No


Calculating perplexity:  55%|█████▌    | 254/460 [00:45<00:36,  5.60it/s]

Prediction: Yes, Answer: Hmm
Prediction: No, Answer: No


Calculating perplexity:  56%|█████▌    | 256/460 [00:45<00:36,  5.61it/s]

Prediction: Yes, Answer: Yes
Prediction: No, Answer: No


Calculating perplexity:  56%|█████▌    | 258/460 [00:46<00:36,  5.60it/s]

Prediction: Yes, Answer: Yes
Prediction: No, Answer: No


Calculating perplexity:  57%|█████▋    | 260/460 [00:46<00:35,  5.63it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Hmm


Calculating perplexity:  57%|█████▋    | 262/460 [00:46<00:35,  5.65it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Hmm


Calculating perplexity:  57%|█████▋    | 264/460 [00:47<00:34,  5.63it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Hmm


Calculating perplexity:  58%|█████▊    | 266/460 [00:47<00:34,  5.65it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Hmm


Calculating perplexity:  58%|█████▊    | 268/460 [00:47<00:34,  5.62it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Hmm


Calculating perplexity:  59%|█████▊    | 270/460 [00:48<00:33,  5.62it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Yes


Calculating perplexity:  59%|█████▉    | 272/460 [00:48<00:33,  5.64it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  60%|█████▉    | 274/460 [00:48<00:32,  5.64it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Hmm


Calculating perplexity:  60%|██████    | 276/460 [00:49<00:32,  5.65it/s]

Prediction: No, Answer: No
Prediction: No, Answer: No


Calculating perplexity:  60%|██████    | 278/460 [00:49<00:32,  5.66it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  61%|██████    | 280/460 [00:49<00:31,  5.63it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Hmm


Calculating perplexity:  61%|██████▏   | 282/460 [00:50<00:31,  5.63it/s]

Prediction: No, Answer: No
Prediction: No, Answer: No


Calculating perplexity:  62%|██████▏   | 284/460 [00:50<00:31,  5.63it/s]

Prediction: Yes, Answer: Hmm
Prediction: No, Answer: No


Calculating perplexity:  62%|██████▏   | 286/460 [00:50<00:30,  5.62it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Yes


Calculating perplexity:  63%|██████▎   | 288/460 [00:51<00:30,  5.61it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Yes


Calculating perplexity:  63%|██████▎   | 290/460 [00:51<00:30,  5.62it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Hmm


Calculating perplexity:  63%|██████▎   | 292/460 [00:52<00:30,  5.59it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  64%|██████▍   | 294/460 [00:52<00:29,  5.57it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Yes


Calculating perplexity:  64%|██████▍   | 296/460 [00:52<00:29,  5.56it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Yes


Calculating perplexity:  65%|██████▍   | 298/460 [00:53<00:29,  5.58it/s]

Prediction: Yes, Answer: Yes
Prediction: No, Answer: No


Calculating perplexity:  65%|██████▌   | 300/460 [00:53<00:28,  5.53it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Yes


Calculating perplexity:  66%|██████▌   | 302/460 [00:53<00:28,  5.48it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Hmm


Calculating perplexity:  66%|██████▌   | 304/460 [00:54<00:28,  5.54it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  67%|██████▋   | 306/460 [00:54<00:27,  5.57it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Hmm


Calculating perplexity:  67%|██████▋   | 308/460 [00:54<00:27,  5.60it/s]

Prediction: Yes, Answer: Yes
Prediction: No, Answer: No


Calculating perplexity:  67%|██████▋   | 310/460 [00:55<00:26,  5.63it/s]

Prediction: No, Answer: No
Prediction: No, Answer: No


Calculating perplexity:  68%|██████▊   | 312/460 [00:55<00:26,  5.60it/s]

Prediction: Yes, Answer: Yes
Prediction: No, Answer: No


Calculating perplexity:  68%|██████▊   | 314/460 [00:56<00:25,  5.62it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Yes


Calculating perplexity:  69%|██████▊   | 316/460 [00:56<00:25,  5.62it/s]

Prediction: Yes, Answer: Hmm
Prediction: No, Answer: No


Calculating perplexity:  69%|██████▉   | 318/460 [00:56<00:25,  5.62it/s]

Prediction: Yes, Answer: Hmm
Prediction: No, Answer: No


Calculating perplexity:  70%|██████▉   | 320/460 [00:57<00:24,  5.62it/s]

Prediction: Yes, Answer: Yes
Prediction: No, Answer: No


Calculating perplexity:  70%|███████   | 322/460 [00:57<00:24,  5.56it/s]

Prediction: Yes, Answer: Yes
Prediction: No, Answer: No


Calculating perplexity:  70%|███████   | 324/460 [00:57<00:25,  5.39it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Yes


Calculating perplexity:  71%|███████   | 326/460 [00:58<00:24,  5.46it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  71%|███████▏  | 328/460 [00:58<00:23,  5.56it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  72%|███████▏  | 330/460 [00:58<00:23,  5.58it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Yes


Calculating perplexity:  72%|███████▏  | 332/460 [00:59<00:22,  5.63it/s]

Prediction: Yes, Answer: Yes
Prediction: No, Answer: No


Calculating perplexity:  73%|███████▎  | 334/460 [00:59<00:22,  5.65it/s]

Prediction: No, Answer: No
Prediction: No, Answer: No


Calculating perplexity:  73%|███████▎  | 336/460 [00:59<00:21,  5.66it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Hmm


Calculating perplexity:  73%|███████▎  | 338/460 [01:00<00:21,  5.63it/s]

Prediction: Yes, Answer: Hmm
Prediction: No, Answer: No


Calculating perplexity:  74%|███████▍  | 340/460 [01:00<00:21,  5.62it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  74%|███████▍  | 342/460 [01:01<00:20,  5.62it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Yes


Calculating perplexity:  75%|███████▍  | 344/460 [01:01<00:20,  5.61it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Yes


Calculating perplexity:  75%|███████▌  | 346/460 [01:01<00:20,  5.62it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  76%|███████▌  | 348/460 [01:02<00:20,  5.58it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  76%|███████▌  | 350/460 [01:02<00:19,  5.59it/s]

Prediction: No, Answer: No
Prediction: No, Answer: No


Calculating perplexity:  77%|███████▋  | 352/460 [01:02<00:19,  5.61it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Yes


Calculating perplexity:  77%|███████▋  | 354/460 [01:03<00:18,  5.63it/s]

Prediction: Yes, Answer: Yes
Prediction: No, Answer: No


Calculating perplexity:  77%|███████▋  | 356/460 [01:03<00:18,  5.63it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Hmm


Calculating perplexity:  78%|███████▊  | 358/460 [01:03<00:18,  5.62it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Yes


Calculating perplexity:  78%|███████▊  | 360/460 [01:04<00:17,  5.65it/s]

Prediction: No, Answer: No
Prediction: No, Answer: No


Calculating perplexity:  79%|███████▊  | 362/460 [01:04<00:17,  5.63it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Yes


Calculating perplexity:  79%|███████▉  | 364/460 [01:04<00:17,  5.64it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Yes


Calculating perplexity:  80%|███████▉  | 366/460 [01:05<00:16,  5.66it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Hmm


Calculating perplexity:  80%|████████  | 368/460 [01:05<00:16,  5.64it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  80%|████████  | 370/460 [01:06<00:15,  5.64it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  81%|████████  | 372/460 [01:06<00:15,  5.65it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Hmm


Calculating perplexity:  81%|████████▏ | 374/460 [01:06<00:15,  5.64it/s]

Prediction: Yes, Answer: Yes
Prediction: No, Answer: No


Calculating perplexity:  82%|████████▏ | 376/460 [01:07<00:14,  5.64it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Hmm


Calculating perplexity:  82%|████████▏ | 378/460 [01:07<00:14,  5.68it/s]

Prediction: Yes, Answer: Yes
Prediction: No, Answer: No


Calculating perplexity:  83%|████████▎ | 380/460 [01:07<00:13,  5.72it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  83%|████████▎ | 382/460 [01:08<00:13,  5.71it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Yes


Calculating perplexity:  83%|████████▎ | 384/460 [01:08<00:13,  5.69it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  84%|████████▍ | 386/460 [01:08<00:12,  5.73it/s]

Prediction: Yes, Answer: Yes
Prediction: No, Answer: No


Calculating perplexity:  84%|████████▍ | 388/460 [01:09<00:12,  5.67it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Hmm


Calculating perplexity:  85%|████████▍ | 390/460 [01:09<00:12,  5.66it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  85%|████████▌ | 392/460 [01:09<00:11,  5.68it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Yes


Calculating perplexity:  86%|████████▌ | 394/460 [01:10<00:11,  5.74it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Hmm


Calculating perplexity:  86%|████████▌ | 396/460 [01:10<00:11,  5.76it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Hmm


Calculating perplexity:  87%|████████▋ | 398/460 [01:10<00:10,  5.77it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Hmm


Calculating perplexity:  87%|████████▋ | 400/460 [01:11<00:10,  5.75it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Yes


Calculating perplexity:  87%|████████▋ | 402/460 [01:11<00:10,  5.74it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Yes


Calculating perplexity:  88%|████████▊ | 404/460 [01:11<00:09,  5.71it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Yes


Calculating perplexity:  88%|████████▊ | 406/460 [01:12<00:09,  5.73it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  89%|████████▊ | 408/460 [01:12<00:09,  5.75it/s]

Prediction: No, Answer: No
Prediction: No, Answer: No


Calculating perplexity:  89%|████████▉ | 410/460 [01:13<00:08,  5.75it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  90%|████████▉ | 412/460 [01:13<00:08,  5.76it/s]

Prediction: No, Answer: No
Prediction: No, Answer: No


Calculating perplexity:  90%|█████████ | 414/460 [01:13<00:07,  5.75it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Yes


Calculating perplexity:  90%|█████████ | 416/460 [01:14<00:07,  5.76it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Hmm


Calculating perplexity:  91%|█████████ | 418/460 [01:14<00:07,  5.74it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Hmm


Calculating perplexity:  91%|█████████▏| 420/460 [01:14<00:06,  5.74it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Hmm


Calculating perplexity:  92%|█████████▏| 422/460 [01:15<00:06,  5.71it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  92%|█████████▏| 424/460 [01:15<00:06,  5.72it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Hmm


Calculating perplexity:  93%|█████████▎| 426/460 [01:15<00:05,  5.73it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Hmm


Calculating perplexity:  93%|█████████▎| 428/460 [01:16<00:05,  5.74it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Hmm


Calculating perplexity:  93%|█████████▎| 430/460 [01:16<00:05,  5.75it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Hmm


Calculating perplexity:  94%|█████████▍| 432/460 [01:16<00:04,  5.74it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Yes


Calculating perplexity:  94%|█████████▍| 434/460 [01:17<00:04,  5.74it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Hmm


Calculating perplexity:  95%|█████████▍| 436/460 [01:17<00:04,  5.77it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Hmm


Calculating perplexity:  95%|█████████▌| 438/460 [01:17<00:03,  5.73it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Hmm


Calculating perplexity:  96%|█████████▌| 440/460 [01:18<00:03,  5.74it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  96%|█████████▌| 442/460 [01:18<00:03,  5.77it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  97%|█████████▋| 444/460 [01:18<00:02,  5.78it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Hmm


Calculating perplexity:  97%|█████████▋| 446/460 [01:19<00:02,  5.75it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Yes


Calculating perplexity:  97%|█████████▋| 448/460 [01:19<00:02,  5.78it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes


Calculating perplexity:  98%|█████████▊| 450/460 [01:19<00:01,  5.79it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Hmm


Calculating perplexity:  98%|█████████▊| 452/460 [01:20<00:01,  5.77it/s]

Prediction: Yes, Answer: Hmm
Prediction: Yes, Answer: Yes


Calculating perplexity:  99%|█████████▊| 454/460 [01:20<00:01,  5.77it/s]

Prediction: Yes, Answer: Hmm
Prediction: No, Answer: No


Calculating perplexity:  99%|█████████▉| 456/460 [01:20<00:00,  5.76it/s]

Prediction: No, Answer: No
Prediction: Yes, Answer: Yes


Calculating perplexity: 100%|█████████▉| 458/460 [01:21<00:00,  5.74it/s]

Prediction: Yes, Answer: Yes
Prediction: No, Answer: No


Calculating perplexity: 100%|██████████| 460/460 [01:21<00:00,  5.63it/s]

Prediction: Yes, Answer: Yes
Prediction: Yes, Answer: Yes
Success Rate:  0.72





0.717391304347826