In [5]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from unsloth import FastLanguageModel
from unsloth import is_bfloat16_supported
import torch
max_seq_length = 512
lora_rank = 32 


model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "peytonnash/piqa_10",
    max_seq_length = max_seq_length,
    load_in_4bit = True,
    fast_inference = True,
    max_lora_rank = lora_rank,
    gpu_memory_utilization = 1
)

==((====))==  Unsloth 2025.2.15: Fast Llama patching. Transformers: 4.49.0.
   \\   /|    GPU: NVIDIA L4. Max memory: 21.951 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 8.9. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Your GPU cannot handle sequence lengths of 256 due to limited GPU memory.
Unsloth: Your GPU can only handle approximately the maximum sequence length of 256.
Unsloth: vLLM loading unsloth/meta-llama-3.1-8b-instruct-unsloth-bnb-4bit with actual GPU utilization = 4.81%
Unsloth: Your GPU has CUDA compute capability 8.9 with VRAM = 21.95 GB.
Unsloth: Using conservativeness = 1.0. Chunked prefill tokens = 256. Num Sequences = 128.
Unsloth: vLLM's KV Cache can use up to 0.0 GB. Also swap space = 0 GB.
INFO 03-13 14:25:03 config.py:549] 

RuntimeError: CUDA out of memory. Tried to allocate 28.00 MiB. GPU 0 has a total capacity of 21.95 GiB of which 21.88 MiB is free. Process 25238 has 18.36 GiB memory in use. Process 45753 has 1.25 GiB memory in use. Including non-PyTorch memory, this process has 2.30 GiB memory in use. Of the allocated memory 2.09 GiB is allocated by PyTorch, and 5.98 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [6]:
# Load and prep dataset
SYSTEM_PROMPT = """
You are a helpful AI answering multiple-choice science questions. and respond in the following format:
<reason>  
...
</reason>  
<answer>  
...
</answer>  
"""

text = tokenizer.apply_chat_template([
        {"role": "system", "content": SYSTEM_PROMPT},
    ], tokenize=False, add_generation_prompt=True)

print(text)

NameError: name 'tokenizer' is not defined

In [18]:
def format_question(question, choices):
    choice_text = "\n".join([f"{chr(65+i)}) {choice}" for i, choice in enumerate(choices)])
    return tokenizer.apply_chat_template([
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user", "content": f"{question}\n\n{choice_text}\n\nChoose the best answer."}
    ], tokenize=False, add_generation_prompt=True)


In [21]:
def get_prediction(prompt, max_seq_length=2048):
    """Generates an answer using the fine-tuned model."""
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=max_seq_length).to(model.device)

    with torch.no_grad():
        output_ids = model.generate(
            **inputs,
            max_length=min(inputs.input_ids.shape[1] + 256, max_seq_length),  # Ensure length limit
            temperature=0.7,
            top_p=0.9,
            do_sample=True
        )

    return tokenizer.decode(output_ids[0], skip_special_tokens=True)

In [None]:
from datasets import load_dataset
import re
from tqdm import tqdm  # Import tqdm for progress bar

arc_dataset = load_dataset("ai2_arc", "ARC-Challenge")
arc_test = arc_dataset["test"]
predictions = []

# Initialize counters for accuracy calculation
total_questions = 0
correct_predictions = 0

# Use tqdm to show progress bar
for example in tqdm(arc_test, desc="Processing ARC-Challenge Questions", unit="question"):
    formatted_question = format_question(example["question"], example["choices"]["text"])
    model_output = get_prediction(formatted_question)
    
    # Extract the letter choice from the model output (A, B, C, or D)
    pattern = r"<answer>\s*([A-D])\)"
    match = re.search(pattern, model_output)
    extracted_letter = match.group(1) if match else "Unknown"
    
    # Update counters
    total_questions += 1
    if extracted_letter == example["answerKey"]:
        correct_predictions += 1
    
    # Calculate current accuracy
    current_accuracy = (correct_predictions / total_questions) * 100
    
    predictions.append({
        "id": example["id"],
        "predicted": model_output,
        "extracted_answer": extracted_letter,
        "answerKey": example["answerKey"],
        "is_correct": extracted_letter == example["answerKey"]
    })
    
    print(f"Prediction: {extracted_letter}, Correct: {example['answerKey']}, Running Accuracy: {current_accuracy:.2f}%")

# Final accuracy
final_accuracy = (correct_predictions / len(arc_test)) * 100
print(f"\nFinal Accuracy: {final_accuracy:.2f}% ({correct_predictions}/{len(arc_test)})")

Processing ARC-Challenge Questions:   0%|          | 1/1172 [00:06<2:13:55,  6.86s/question]

Prediction: C, Correct: C, Running Accuracy: 100.00%


Processing ARC-Challenge Questions:   0%|          | 2/1172 [00:14<2:22:01,  7.28s/question]

Prediction: B, Correct: B, Running Accuracy: 100.00%


Processing ARC-Challenge Questions:   0%|          | 3/1172 [00:23<2:37:02,  8.06s/question]

Prediction: C, Correct: C, Running Accuracy: 100.00%


Processing ARC-Challenge Questions:   0%|          | 4/1172 [00:33<2:52:50,  8.88s/question]

Prediction: D, Correct: D, Running Accuracy: 100.00%


Processing ARC-Challenge Questions:   0%|          | 5/1172 [00:46<3:18:14, 10.19s/question]

Prediction: D, Correct: D, Running Accuracy: 100.00%


Processing ARC-Challenge Questions:   1%|          | 6/1172 [00:53<3:01:53,  9.36s/question]

Prediction: B, Correct: B, Running Accuracy: 100.00%


Processing ARC-Challenge Questions:   1%|          | 7/1172 [01:02<2:56:17,  9.08s/question]

Prediction: C, Correct: C, Running Accuracy: 100.00%


Processing ARC-Challenge Questions:   1%|          | 8/1172 [01:10<2:49:29,  8.74s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 87.50%


Processing ARC-Challenge Questions:   1%|          | 9/1172 [01:20<3:00:31,  9.31s/question]

Prediction: A, Correct: B, Running Accuracy: 77.78%


Processing ARC-Challenge Questions:   1%|          | 10/1172 [01:32<3:12:45,  9.95s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 70.00%


Processing ARC-Challenge Questions:   1%|          | 11/1172 [01:46<3:37:49, 11.26s/question]

Prediction: B, Correct: B, Running Accuracy: 72.73%


Processing ARC-Challenge Questions:   1%|          | 12/1172 [01:54<3:15:51, 10.13s/question]

Prediction: A, Correct: A, Running Accuracy: 75.00%


Processing ARC-Challenge Questions:   1%|          | 13/1172 [02:00<2:53:37,  8.99s/question]

Prediction: C, Correct: C, Running Accuracy: 76.92%


Processing ARC-Challenge Questions:   1%|          | 14/1172 [02:06<2:37:38,  8.17s/question]

Prediction: D, Correct: C, Running Accuracy: 71.43%


Processing ARC-Challenge Questions:   1%|▏         | 15/1172 [02:12<2:26:18,  7.59s/question]

Prediction: C, Correct: C, Running Accuracy: 73.33%


Processing ARC-Challenge Questions:   1%|▏         | 16/1172 [02:18<2:15:15,  7.02s/question]

Prediction: A, Correct: A, Running Accuracy: 75.00%


Processing ARC-Challenge Questions:   1%|▏         | 17/1172 [02:25<2:12:36,  6.89s/question]

Prediction: C, Correct: C, Running Accuracy: 76.47%


Processing ARC-Challenge Questions:   2%|▏         | 18/1172 [02:33<2:22:46,  7.42s/question]

Prediction: B, Correct: B, Running Accuracy: 77.78%


Processing ARC-Challenge Questions:   2%|▏         | 19/1172 [02:43<2:34:13,  8.03s/question]

Prediction: A, Correct: A, Running Accuracy: 78.95%


Processing ARC-Challenge Questions:   2%|▏         | 20/1172 [02:53<2:44:57,  8.59s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 75.00%


Processing ARC-Challenge Questions:   2%|▏         | 21/1172 [03:06<3:10:36,  9.94s/question]

Prediction: B, Correct: B, Running Accuracy: 76.19%


Processing ARC-Challenge Questions:   2%|▏         | 22/1172 [03:18<3:25:51, 10.74s/question]

Prediction: B, Correct: B, Running Accuracy: 77.27%


Processing ARC-Challenge Questions:   2%|▏         | 23/1172 [03:24<2:55:40,  9.17s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 73.91%


Processing ARC-Challenge Questions:   2%|▏         | 24/1172 [03:34<2:58:33,  9.33s/question]

Prediction: C, Correct: C, Running Accuracy: 75.00%


Processing ARC-Challenge Questions:   2%|▏         | 25/1172 [03:41<2:46:08,  8.69s/question]

Prediction: B, Correct: B, Running Accuracy: 76.00%


Processing ARC-Challenge Questions:   2%|▏         | 26/1172 [03:50<2:47:43,  8.78s/question]

Prediction: D, Correct: D, Running Accuracy: 76.92%


Processing ARC-Challenge Questions:   2%|▏         | 27/1172 [03:58<2:46:54,  8.75s/question]

Prediction: B, Correct: B, Running Accuracy: 77.78%


Processing ARC-Challenge Questions:   2%|▏         | 28/1172 [04:11<3:10:31,  9.99s/question]

Prediction: A, Correct: B, Running Accuracy: 75.00%


Processing ARC-Challenge Questions:   2%|▏         | 29/1172 [04:20<2:59:50,  9.44s/question]

Prediction: C, Correct: C, Running Accuracy: 75.86%


Processing ARC-Challenge Questions:   3%|▎         | 30/1172 [04:26<2:42:07,  8.52s/question]

Prediction: Unknown, Correct: B, Running Accuracy: 73.33%


Processing ARC-Challenge Questions:   3%|▎         | 31/1172 [04:35<2:43:34,  8.60s/question]

Prediction: D, Correct: D, Running Accuracy: 74.19%


Processing ARC-Challenge Questions:   3%|▎         | 32/1172 [04:43<2:43:14,  8.59s/question]

Prediction: C, Correct: C, Running Accuracy: 75.00%


Processing ARC-Challenge Questions:   3%|▎         | 33/1172 [04:50<2:32:15,  8.02s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 72.73%


Processing ARC-Challenge Questions:   3%|▎         | 34/1172 [04:56<2:19:47,  7.37s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 70.59%


Processing ARC-Challenge Questions:   3%|▎         | 35/1172 [05:02<2:13:40,  7.05s/question]

Prediction: A, Correct: A, Running Accuracy: 71.43%


Processing ARC-Challenge Questions:   3%|▎         | 36/1172 [05:10<2:16:31,  7.21s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 69.44%


Processing ARC-Challenge Questions:   3%|▎         | 37/1172 [05:16<2:11:23,  6.95s/question]

Prediction: Unknown, Correct: B, Running Accuracy: 67.57%


Processing ARC-Challenge Questions:   3%|▎         | 38/1172 [05:27<2:31:47,  8.03s/question]

Prediction: Unknown, Correct: B, Running Accuracy: 65.79%


Processing ARC-Challenge Questions:   3%|▎         | 39/1172 [05:32<2:19:32,  7.39s/question]

Prediction: C, Correct: C, Running Accuracy: 66.67%


Processing ARC-Challenge Questions:   3%|▎         | 40/1172 [05:40<2:18:52,  7.36s/question]

Prediction: B, Correct: B, Running Accuracy: 67.50%


Processing ARC-Challenge Questions:   3%|▎         | 41/1172 [05:46<2:09:43,  6.88s/question]

Prediction: C, Correct: C, Running Accuracy: 68.29%


Processing ARC-Challenge Questions:   4%|▎         | 42/1172 [05:52<2:05:33,  6.67s/question]

Prediction: A, Correct: A, Running Accuracy: 69.05%


Processing ARC-Challenge Questions:   4%|▎         | 43/1172 [06:01<2:17:52,  7.33s/question]

Prediction: C, Correct: C, Running Accuracy: 69.77%


Processing ARC-Challenge Questions:   4%|▍         | 44/1172 [06:09<2:23:54,  7.65s/question]

Prediction: D, Correct: D, Running Accuracy: 70.45%


Processing ARC-Challenge Questions:   4%|▍         | 45/1172 [06:21<2:49:14,  9.01s/question]

Prediction: B, Correct: 2, Running Accuracy: 68.89%


Processing ARC-Challenge Questions:   4%|▍         | 46/1172 [06:27<2:33:11,  8.16s/question]

Prediction: A, Correct: B, Running Accuracy: 67.39%


Processing ARC-Challenge Questions:   4%|▍         | 47/1172 [06:36<2:37:43,  8.41s/question]

Prediction: C, Correct: C, Running Accuracy: 68.09%


Processing ARC-Challenge Questions:   4%|▍         | 48/1172 [06:43<2:30:05,  8.01s/question]

Prediction: C, Correct: C, Running Accuracy: 68.75%


Processing ARC-Challenge Questions:   4%|▍         | 49/1172 [06:54<2:41:30,  8.63s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 67.35%


Processing ARC-Challenge Questions:   4%|▍         | 50/1172 [07:02<2:38:16,  8.46s/question]

Prediction: D, Correct: A, Running Accuracy: 66.00%


Processing ARC-Challenge Questions:   4%|▍         | 51/1172 [07:14<2:59:02,  9.58s/question]

Prediction: Unknown, Correct: D, Running Accuracy: 64.71%


Processing ARC-Challenge Questions:   4%|▍         | 52/1172 [07:20<2:40:39,  8.61s/question]

Prediction: C, Correct: C, Running Accuracy: 65.38%


Processing ARC-Challenge Questions:   5%|▍         | 53/1172 [07:32<2:58:17,  9.56s/question]

Prediction: A, Correct: A, Running Accuracy: 66.04%


Processing ARC-Challenge Questions:   5%|▍         | 54/1172 [07:38<2:39:45,  8.57s/question]

Prediction: B, Correct: C, Running Accuracy: 64.81%


Processing ARC-Challenge Questions:   5%|▍         | 55/1172 [07:44<2:22:33,  7.66s/question]

Prediction: B, Correct: C, Running Accuracy: 63.64%


Processing ARC-Challenge Questions:   5%|▍         | 56/1172 [07:52<2:24:05,  7.75s/question]

Prediction: B, Correct: B, Running Accuracy: 64.29%


Processing ARC-Challenge Questions:   5%|▍         | 57/1172 [08:02<2:40:32,  8.64s/question]

Prediction: D, Correct: C, Running Accuracy: 63.16%


Processing ARC-Challenge Questions:   5%|▍         | 58/1172 [08:12<2:45:45,  8.93s/question]

Prediction: A, Correct: A, Running Accuracy: 63.79%


Processing ARC-Challenge Questions:   5%|▌         | 59/1172 [08:21<2:47:15,  9.02s/question]

Prediction: A, Correct: A, Running Accuracy: 64.41%


Processing ARC-Challenge Questions:   5%|▌         | 60/1172 [08:39<3:36:24, 11.68s/question]

Prediction: C, Correct: C, Running Accuracy: 65.00%


Processing ARC-Challenge Questions:   5%|▌         | 61/1172 [08:48<3:19:10, 10.76s/question]

Prediction: D, Correct: D, Running Accuracy: 65.57%


Processing ARC-Challenge Questions:   5%|▌         | 62/1172 [08:55<2:58:46,  9.66s/question]

Prediction: B, Correct: B, Running Accuracy: 66.13%


Processing ARC-Challenge Questions:   5%|▌         | 63/1172 [09:03<2:50:16,  9.21s/question]

Prediction: D, Correct: D, Running Accuracy: 66.67%


Processing ARC-Challenge Questions:   5%|▌         | 64/1172 [09:10<2:37:47,  8.54s/question]

Prediction: C, Correct: C, Running Accuracy: 67.19%


Processing ARC-Challenge Questions:   6%|▌         | 65/1172 [09:19<2:37:46,  8.55s/question]

Prediction: Unknown, Correct: D, Running Accuracy: 66.15%


Processing ARC-Challenge Questions:   6%|▌         | 66/1172 [09:26<2:31:26,  8.22s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 65.15%


Processing ARC-Challenge Questions:   6%|▌         | 67/1172 [09:35<2:38:15,  8.59s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 64.18%


Processing ARC-Challenge Questions:   6%|▌         | 68/1172 [09:43<2:31:49,  8.25s/question]

Prediction: C, Correct: C, Running Accuracy: 64.71%


Processing ARC-Challenge Questions:   6%|▌         | 69/1172 [09:50<2:25:16,  7.90s/question]

Prediction: A, Correct: B, Running Accuracy: 63.77%


Processing ARC-Challenge Questions:   6%|▌         | 70/1172 [10:03<2:52:11,  9.38s/question]

Prediction: A, Correct: A, Running Accuracy: 64.29%


Processing ARC-Challenge Questions:   6%|▌         | 71/1172 [10:10<2:40:05,  8.72s/question]

Prediction: B, Correct: A, Running Accuracy: 63.38%


Processing ARC-Challenge Questions:   6%|▌         | 72/1172 [10:18<2:37:30,  8.59s/question]

Prediction: C, Correct: C, Running Accuracy: 63.89%


Processing ARC-Challenge Questions:   6%|▌         | 73/1172 [10:26<2:31:45,  8.29s/question]

Prediction: Unknown, Correct: B, Running Accuracy: 63.01%


Processing ARC-Challenge Questions:   6%|▋         | 74/1172 [10:35<2:38:50,  8.68s/question]

Prediction: D, Correct: D, Running Accuracy: 63.51%


Processing ARC-Challenge Questions:   6%|▋         | 75/1172 [10:41<2:23:41,  7.86s/question]

Prediction: Unknown, Correct: D, Running Accuracy: 62.67%


Processing ARC-Challenge Questions:   6%|▋         | 76/1172 [10:49<2:20:14,  7.68s/question]

Prediction: A, Correct: A, Running Accuracy: 63.16%


Processing ARC-Challenge Questions:   7%|▋         | 77/1172 [11:02<2:50:15,  9.33s/question]

Prediction: B, Correct: B, Running Accuracy: 63.64%


Processing ARC-Challenge Questions:   7%|▋         | 78/1172 [11:10<2:45:22,  9.07s/question]

Prediction: A, Correct: B, Running Accuracy: 62.82%


Processing ARC-Challenge Questions:   7%|▋         | 79/1172 [11:17<2:30:44,  8.28s/question]

Prediction: B, Correct: B, Running Accuracy: 63.29%


Processing ARC-Challenge Questions:   7%|▋         | 80/1172 [11:26<2:38:30,  8.71s/question]

Prediction: A, Correct: A, Running Accuracy: 63.75%


Processing ARC-Challenge Questions:   7%|▋         | 81/1172 [11:39<2:57:17,  9.75s/question]

Prediction: D, Correct: D, Running Accuracy: 64.20%


Processing ARC-Challenge Questions:   7%|▋         | 82/1172 [11:46<2:47:04,  9.20s/question]

Prediction: C, Correct: C, Running Accuracy: 64.63%


Processing ARC-Challenge Questions:   7%|▋         | 83/1172 [11:54<2:39:06,  8.77s/question]

Prediction: C, Correct: C, Running Accuracy: 65.06%


Processing ARC-Challenge Questions:   7%|▋         | 84/1172 [12:03<2:37:29,  8.68s/question]

Prediction: D, Correct: D, Running Accuracy: 65.48%


Processing ARC-Challenge Questions:   7%|▋         | 85/1172 [12:15<2:55:41,  9.70s/question]

Prediction: D, Correct: D, Running Accuracy: 65.88%


Processing ARC-Challenge Questions:   7%|▋         | 86/1172 [12:23<2:49:12,  9.35s/question]

Prediction: A, Correct: A, Running Accuracy: 66.28%


Processing ARC-Challenge Questions:   7%|▋         | 87/1172 [12:30<2:32:10,  8.42s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 65.52%


Processing ARC-Challenge Questions:   8%|▊         | 88/1172 [12:39<2:36:02,  8.64s/question]

Prediction: D, Correct: D, Running Accuracy: 65.91%


Processing ARC-Challenge Questions:   8%|▊         | 89/1172 [12:50<2:50:42,  9.46s/question]

Prediction: A, Correct: C, Running Accuracy: 65.17%


Processing ARC-Challenge Questions:   8%|▊         | 90/1172 [12:59<2:44:50,  9.14s/question]

Prediction: C, Correct: C, Running Accuracy: 65.56%


Processing ARC-Challenge Questions:   8%|▊         | 91/1172 [13:07<2:38:40,  8.81s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 64.84%


Processing ARC-Challenge Questions:   8%|▊         | 92/1172 [13:15<2:35:57,  8.66s/question]

Prediction: A, Correct: D, Running Accuracy: 64.13%


Processing ARC-Challenge Questions:   8%|▊         | 93/1172 [13:24<2:36:30,  8.70s/question]

Prediction: D, Correct: D, Running Accuracy: 64.52%


Processing ARC-Challenge Questions:   8%|▊         | 94/1172 [13:31<2:30:42,  8.39s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 63.83%


Processing ARC-Challenge Questions:   8%|▊         | 95/1172 [13:38<2:21:18,  7.87s/question]

Prediction: B, Correct: B, Running Accuracy: 64.21%


Processing ARC-Challenge Questions:   8%|▊         | 96/1172 [13:46<2:23:55,  8.03s/question]

Prediction: B, Correct: C, Running Accuracy: 63.54%


Processing ARC-Challenge Questions:   8%|▊         | 97/1172 [13:54<2:20:25,  7.84s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 62.89%


Processing ARC-Challenge Questions:   8%|▊         | 98/1172 [14:00<2:13:36,  7.46s/question]

Prediction: B, Correct: B, Running Accuracy: 63.27%


Processing ARC-Challenge Questions:   8%|▊         | 99/1172 [14:05<2:00:03,  6.71s/question]

Prediction: B, Correct: B, Running Accuracy: 63.64%


Processing ARC-Challenge Questions:   9%|▊         | 100/1172 [14:12<2:00:57,  6.77s/question]

Prediction: B, Correct: B, Running Accuracy: 64.00%


Processing ARC-Challenge Questions:   9%|▊         | 101/1172 [14:18<1:54:48,  6.43s/question]

Prediction: C, Correct: C, Running Accuracy: 64.36%


Processing ARC-Challenge Questions:   9%|▊         | 102/1172 [14:27<2:07:41,  7.16s/question]

Prediction: D, Correct: D, Running Accuracy: 64.71%


Processing ARC-Challenge Questions:   9%|▉         | 103/1172 [14:33<2:00:19,  6.75s/question]

Prediction: A, Correct: A, Running Accuracy: 65.05%


Processing ARC-Challenge Questions:   9%|▉         | 104/1172 [14:41<2:11:28,  7.39s/question]

Prediction: Unknown, Correct: B, Running Accuracy: 64.42%


Processing ARC-Challenge Questions:   9%|▉         | 105/1172 [14:52<2:28:59,  8.38s/question]

Prediction: C, Correct: C, Running Accuracy: 64.76%


Processing ARC-Challenge Questions:   9%|▉         | 106/1172 [15:00<2:27:00,  8.27s/question]

Prediction: B, Correct: B, Running Accuracy: 65.09%


Processing ARC-Challenge Questions:   9%|▉         | 107/1172 [15:08<2:26:43,  8.27s/question]

Prediction: D, Correct: D, Running Accuracy: 65.42%


Processing ARC-Challenge Questions:   9%|▉         | 108/1172 [15:17<2:25:53,  8.23s/question]

Prediction: B, Correct: B, Running Accuracy: 65.74%


Processing ARC-Challenge Questions:   9%|▉         | 109/1172 [15:25<2:27:34,  8.33s/question]

Prediction: D, Correct: D, Running Accuracy: 66.06%


Processing ARC-Challenge Questions:   9%|▉         | 110/1172 [15:31<2:13:03,  7.52s/question]

Prediction: A, Correct: A, Running Accuracy: 66.36%


Processing ARC-Challenge Questions:   9%|▉         | 111/1172 [15:41<2:27:17,  8.33s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 65.77%


Processing ARC-Challenge Questions:  10%|▉         | 112/1172 [15:49<2:27:17,  8.34s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 65.18%


Processing ARC-Challenge Questions:  10%|▉         | 113/1172 [15:56<2:21:01,  7.99s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 64.60%


Processing ARC-Challenge Questions:  10%|▉         | 114/1172 [16:03<2:15:34,  7.69s/question]

Prediction: A, Correct: A, Running Accuracy: 64.91%


Processing ARC-Challenge Questions:  10%|▉         | 115/1172 [16:13<2:23:28,  8.14s/question]

Prediction: B, Correct: B, Running Accuracy: 65.22%


Processing ARC-Challenge Questions:  10%|▉         | 116/1172 [16:19<2:12:18,  7.52s/question]

Prediction: D, Correct: D, Running Accuracy: 65.52%


Processing ARC-Challenge Questions:  10%|▉         | 117/1172 [16:26<2:12:56,  7.56s/question]

Prediction: B, Correct: B, Running Accuracy: 65.81%


Processing ARC-Challenge Questions:  10%|█         | 118/1172 [16:33<2:05:42,  7.16s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 65.25%


Processing ARC-Challenge Questions:  10%|█         | 119/1172 [16:43<2:23:34,  8.18s/question]

Prediction: A, Correct: D, Running Accuracy: 64.71%


Processing ARC-Challenge Questions:  10%|█         | 120/1172 [16:55<2:41:08,  9.19s/question]

Prediction: C, Correct: C, Running Accuracy: 65.00%


Processing ARC-Challenge Questions:  10%|█         | 121/1172 [17:04<2:41:29,  9.22s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 64.46%


Processing ARC-Challenge Questions:  10%|█         | 122/1172 [17:14<2:43:42,  9.35s/question]

Prediction: A, Correct: A, Running Accuracy: 64.75%


Processing ARC-Challenge Questions:  10%|█         | 123/1172 [17:20<2:29:20,  8.54s/question]

Prediction: Unknown, Correct: D, Running Accuracy: 64.23%


Processing ARC-Challenge Questions:  11%|█         | 124/1172 [17:33<2:49:34,  9.71s/question]

Prediction: A, Correct: A, Running Accuracy: 64.52%


Processing ARC-Challenge Questions:  11%|█         | 125/1172 [17:41<2:44:03,  9.40s/question]

Prediction: A, Correct: B, Running Accuracy: 64.00%


Processing ARC-Challenge Questions:  11%|█         | 126/1172 [17:59<3:28:14, 11.94s/question]

Prediction: D, Correct: D, Running Accuracy: 64.29%


Processing ARC-Challenge Questions:  11%|█         | 127/1172 [18:06<3:02:26, 10.48s/question]

Prediction: C, Correct: A, Running Accuracy: 63.78%


Processing ARC-Challenge Questions:  11%|█         | 128/1172 [18:12<2:36:47,  9.01s/question]

Prediction: B, Correct: B, Running Accuracy: 64.06%


Processing ARC-Challenge Questions:  11%|█         | 129/1172 [18:19<2:25:52,  8.39s/question]

Prediction: D, Correct: 4, Running Accuracy: 63.57%


Processing ARC-Challenge Questions:  11%|█         | 130/1172 [18:28<2:30:46,  8.68s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 63.08%


Processing ARC-Challenge Questions:  11%|█         | 131/1172 [18:37<2:29:17,  8.60s/question]

Prediction: B, Correct: B, Running Accuracy: 63.36%


Processing ARC-Challenge Questions:  11%|█▏        | 132/1172 [18:46<2:33:07,  8.83s/question]

Prediction: A, Correct: C, Running Accuracy: 62.88%


Processing ARC-Challenge Questions:  11%|█▏        | 133/1172 [18:51<2:15:06,  7.80s/question]

Prediction: Unknown, Correct: D, Running Accuracy: 62.41%


Processing ARC-Challenge Questions:  11%|█▏        | 134/1172 [19:04<2:40:05,  9.25s/question]

Prediction: B, Correct: B, Running Accuracy: 62.69%


Processing ARC-Challenge Questions:  12%|█▏        | 135/1172 [19:11<2:28:11,  8.57s/question]

Prediction: B, Correct: B, Running Accuracy: 62.96%


Processing ARC-Challenge Questions:  12%|█▏        | 136/1172 [19:20<2:28:00,  8.57s/question]

Prediction: B, Correct: 2, Running Accuracy: 62.50%


Processing ARC-Challenge Questions:  12%|█▏        | 137/1172 [19:29<2:33:36,  8.90s/question]

Prediction: B, Correct: B, Running Accuracy: 62.77%


Processing ARC-Challenge Questions:  12%|█▏        | 138/1172 [19:38<2:29:58,  8.70s/question]

Prediction: Unknown, Correct: B, Running Accuracy: 62.32%


Processing ARC-Challenge Questions:  12%|█▏        | 139/1172 [19:48<2:37:54,  9.17s/question]

Prediction: A, Correct: A, Running Accuracy: 62.59%


Processing ARC-Challenge Questions:  12%|█▏        | 140/1172 [19:57<2:39:42,  9.29s/question]

Prediction: A, Correct: B, Running Accuracy: 62.14%


Processing ARC-Challenge Questions:  12%|█▏        | 141/1172 [20:04<2:28:12,  8.63s/question]

Prediction: D, Correct: D, Running Accuracy: 62.41%


Processing ARC-Challenge Questions:  12%|█▏        | 142/1172 [20:12<2:24:40,  8.43s/question]

Prediction: C, Correct: C, Running Accuracy: 62.68%


Processing ARC-Challenge Questions:  12%|█▏        | 143/1172 [20:23<2:35:34,  9.07s/question]

Prediction: D, Correct: A, Running Accuracy: 62.24%


Processing ARC-Challenge Questions:  12%|█▏        | 144/1172 [20:29<2:21:58,  8.29s/question]

Prediction: A, Correct: A, Running Accuracy: 62.50%


Processing ARC-Challenge Questions:  12%|█▏        | 145/1172 [20:37<2:19:34,  8.15s/question]

Prediction: A, Correct: A, Running Accuracy: 62.76%


Processing ARC-Challenge Questions:  12%|█▏        | 146/1172 [20:46<2:24:10,  8.43s/question]

Prediction: C, Correct: C, Running Accuracy: 63.01%


Processing ARC-Challenge Questions:  13%|█▎        | 147/1172 [20:53<2:16:05,  7.97s/question]

Prediction: B, Correct: B, Running Accuracy: 63.27%


Processing ARC-Challenge Questions:  13%|█▎        | 148/1172 [21:00<2:11:59,  7.73s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 62.84%


Processing ARC-Challenge Questions:  13%|█▎        | 149/1172 [21:10<2:19:39,  8.19s/question]

Prediction: D, Correct: C, Running Accuracy: 62.42%


Processing ARC-Challenge Questions:  13%|█▎        | 150/1172 [21:17<2:13:16,  7.82s/question]

Prediction: A, Correct: A, Running Accuracy: 62.67%


Processing ARC-Challenge Questions:  13%|█▎        | 151/1172 [21:24<2:11:52,  7.75s/question]

Prediction: C, Correct: C, Running Accuracy: 62.91%


Processing ARC-Challenge Questions:  13%|█▎        | 152/1172 [21:32<2:11:19,  7.72s/question]

Prediction: D, Correct: D, Running Accuracy: 63.16%


Processing ARC-Challenge Questions:  13%|█▎        | 153/1172 [21:42<2:23:50,  8.47s/question]

Prediction: D, Correct: D, Running Accuracy: 63.40%


Processing ARC-Challenge Questions:  13%|█▎        | 154/1172 [21:52<2:28:56,  8.78s/question]

Prediction: C, Correct: B, Running Accuracy: 62.99%


Processing ARC-Challenge Questions:  13%|█▎        | 155/1172 [22:03<2:44:35,  9.71s/question]

Prediction: B, Correct: B, Running Accuracy: 63.23%


Processing ARC-Challenge Questions:  13%|█▎        | 156/1172 [22:11<2:31:45,  8.96s/question]

Prediction: D, Correct: D, Running Accuracy: 63.46%


Processing ARC-Challenge Questions:  13%|█▎        | 157/1172 [22:19<2:28:36,  8.78s/question]

Prediction: B, Correct: C, Running Accuracy: 63.06%


Processing ARC-Challenge Questions:  13%|█▎        | 158/1172 [22:26<2:18:03,  8.17s/question]

Prediction: Unknown, Correct: B, Running Accuracy: 62.66%


Processing ARC-Challenge Questions:  14%|█▎        | 159/1172 [22:33<2:12:35,  7.85s/question]

Prediction: C, Correct: C, Running Accuracy: 62.89%


Processing ARC-Challenge Questions:  14%|█▎        | 160/1172 [22:42<2:16:43,  8.11s/question]

Prediction: Unknown, Correct: B, Running Accuracy: 62.50%


Processing ARC-Challenge Questions:  14%|█▎        | 161/1172 [22:48<2:06:34,  7.51s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 62.11%


Processing ARC-Challenge Questions:  14%|█▍        | 162/1172 [22:57<2:17:38,  8.18s/question]

Prediction: D, Correct: D, Running Accuracy: 62.35%


Processing ARC-Challenge Questions:  14%|█▍        | 163/1172 [23:06<2:19:49,  8.31s/question]

Prediction: A, Correct: A, Running Accuracy: 62.58%


Processing ARC-Challenge Questions:  14%|█▍        | 164/1172 [23:12<2:07:19,  7.58s/question]

Prediction: Unknown, Correct: B, Running Accuracy: 62.20%


Processing ARC-Challenge Questions:  14%|█▍        | 165/1172 [23:19<2:06:52,  7.56s/question]

Prediction: A, Correct: A, Running Accuracy: 62.42%


Processing ARC-Challenge Questions:  14%|█▍        | 166/1172 [23:25<1:58:14,  7.05s/question]

Prediction: A, Correct: A, Running Accuracy: 62.65%


Processing ARC-Challenge Questions:  14%|█▍        | 167/1172 [23:32<1:54:31,  6.84s/question]

Prediction: C, Correct: C, Running Accuracy: 62.87%


Processing ARC-Challenge Questions:  14%|█▍        | 168/1172 [23:38<1:53:06,  6.76s/question]

Prediction: Unknown, Correct: B, Running Accuracy: 62.50%


Processing ARC-Challenge Questions:  14%|█▍        | 169/1172 [23:45<1:50:50,  6.63s/question]

Prediction: D, Correct: A, Running Accuracy: 62.13%


Processing ARC-Challenge Questions:  15%|█▍        | 170/1172 [23:53<2:00:03,  7.19s/question]

Prediction: D, Correct: D, Running Accuracy: 62.35%


Processing ARC-Challenge Questions:  15%|█▍        | 171/1172 [24:05<2:23:18,  8.59s/question]

Prediction: A, Correct: D, Running Accuracy: 61.99%


Processing ARC-Challenge Questions:  15%|█▍        | 172/1172 [24:15<2:30:12,  9.01s/question]

Prediction: D, Correct: D, Running Accuracy: 62.21%


Processing ARC-Challenge Questions:  15%|█▍        | 173/1172 [24:22<2:21:13,  8.48s/question]

Prediction: B, Correct: B, Running Accuracy: 62.43%


Processing ARC-Challenge Questions:  15%|█▍        | 174/1172 [24:29<2:11:25,  7.90s/question]

Prediction: D, Correct: D, Running Accuracy: 62.64%


Processing ARC-Challenge Questions:  15%|█▍        | 175/1172 [24:38<2:16:58,  8.24s/question]

Prediction: C, Correct: C, Running Accuracy: 62.86%


Processing ARC-Challenge Questions:  15%|█▌        | 176/1172 [24:51<2:40:39,  9.68s/question]

Prediction: A, Correct: A, Running Accuracy: 63.07%


Processing ARC-Challenge Questions:  15%|█▌        | 177/1172 [24:58<2:28:19,  8.94s/question]

Prediction: A, Correct: A, Running Accuracy: 63.28%


Processing ARC-Challenge Questions:  15%|█▌        | 178/1172 [25:06<2:25:19,  8.77s/question]

Prediction: D, Correct: A, Running Accuracy: 62.92%


Processing ARC-Challenge Questions:  15%|█▌        | 179/1172 [25:17<2:35:43,  9.41s/question]

Prediction: B, Correct: B, Running Accuracy: 63.13%


Processing ARC-Challenge Questions:  15%|█▌        | 180/1172 [25:27<2:35:07,  9.38s/question]

Prediction: C, Correct: C, Running Accuracy: 63.33%


Processing ARC-Challenge Questions:  15%|█▌        | 181/1172 [25:34<2:26:37,  8.88s/question]

Prediction: A, Correct: A, Running Accuracy: 63.54%


Processing ARC-Challenge Questions:  16%|█▌        | 182/1172 [25:42<2:21:43,  8.59s/question]

Prediction: D, Correct: D, Running Accuracy: 63.74%


Processing ARC-Challenge Questions:  16%|█▌        | 183/1172 [25:50<2:16:27,  8.28s/question]

Prediction: C, Correct: C, Running Accuracy: 63.93%


Processing ARC-Challenge Questions:  16%|█▌        | 184/1172 [25:56<2:08:15,  7.79s/question]

Prediction: C, Correct: C, Running Accuracy: 64.13%


Processing ARC-Challenge Questions:  16%|█▌        | 185/1172 [26:07<2:23:00,  8.69s/question]

Prediction: A, Correct: A, Running Accuracy: 64.32%


Processing ARC-Challenge Questions:  16%|█▌        | 186/1172 [26:15<2:16:44,  8.32s/question]

Prediction: C, Correct: C, Running Accuracy: 64.52%


Processing ARC-Challenge Questions:  16%|█▌        | 187/1172 [26:26<2:31:58,  9.26s/question]

Prediction: D, Correct: D, Running Accuracy: 64.71%


Processing ARC-Challenge Questions:  16%|█▌        | 188/1172 [26:36<2:35:41,  9.49s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 64.36%


Processing ARC-Challenge Questions:  16%|█▌        | 189/1172 [26:42<2:17:11,  8.37s/question]

Prediction: C, Correct: C, Running Accuracy: 64.55%


Processing ARC-Challenge Questions:  16%|█▌        | 190/1172 [26:50<2:15:42,  8.29s/question]

Prediction: A, Correct: C, Running Accuracy: 64.21%


Processing ARC-Challenge Questions:  16%|█▋        | 191/1172 [26:58<2:12:23,  8.10s/question]

Prediction: B, Correct: C, Running Accuracy: 63.87%


Processing ARC-Challenge Questions:  16%|█▋        | 192/1172 [27:07<2:19:52,  8.56s/question]

Prediction: B, Correct: B, Running Accuracy: 64.06%


Processing ARC-Challenge Questions:  16%|█▋        | 193/1172 [27:16<2:18:05,  8.46s/question]

Prediction: A, Correct: A, Running Accuracy: 64.25%


Processing ARC-Challenge Questions:  17%|█▋        | 194/1172 [27:22<2:09:57,  7.97s/question]

Prediction: D, Correct: D, Running Accuracy: 64.43%


Processing ARC-Challenge Questions:  17%|█▋        | 195/1172 [27:33<2:24:53,  8.90s/question]

Prediction: B, Correct: B, Running Accuracy: 64.62%


Processing ARC-Challenge Questions:  17%|█▋        | 196/1172 [27:40<2:14:51,  8.29s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 64.29%


Processing ARC-Challenge Questions:  17%|█▋        | 197/1172 [27:52<2:29:02,  9.17s/question]

Prediction: C, Correct: B, Running Accuracy: 63.96%


Processing ARC-Challenge Questions:  17%|█▋        | 198/1172 [28:00<2:25:08,  8.94s/question]

Prediction: C, Correct: C, Running Accuracy: 64.14%


Processing ARC-Challenge Questions:  17%|█▋        | 199/1172 [28:09<2:24:45,  8.93s/question]

Prediction: B, Correct: B, Running Accuracy: 64.32%


Processing ARC-Challenge Questions:  17%|█▋        | 200/1172 [28:15<2:09:48,  8.01s/question]

Prediction: B, Correct: B, Running Accuracy: 64.50%


Processing ARC-Challenge Questions:  17%|█▋        | 201/1172 [28:22<2:05:19,  7.74s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 64.18%


Processing ARC-Challenge Questions:  17%|█▋        | 202/1172 [28:30<2:07:58,  7.92s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 63.86%


Processing ARC-Challenge Questions:  17%|█▋        | 203/1172 [28:38<2:06:45,  7.85s/question]

Prediction: A, Correct: A, Running Accuracy: 64.04%


Processing ARC-Challenge Questions:  17%|█▋        | 204/1172 [28:46<2:09:42,  8.04s/question]

Prediction: B, Correct: B, Running Accuracy: 64.22%


Processing ARC-Challenge Questions:  17%|█▋        | 205/1172 [28:53<2:01:58,  7.57s/question]

Prediction: C, Correct: D, Running Accuracy: 63.90%


Processing ARC-Challenge Questions:  18%|█▊        | 206/1172 [29:02<2:08:57,  8.01s/question]

Prediction: A, Correct: C, Running Accuracy: 63.59%


Processing ARC-Challenge Questions:  18%|█▊        | 207/1172 [29:11<2:16:21,  8.48s/question]

Prediction: B, Correct: B, Running Accuracy: 63.77%


Processing ARC-Challenge Questions:  18%|█▊        | 208/1172 [29:19<2:12:41,  8.26s/question]

Prediction: C, Correct: C, Running Accuracy: 63.94%


Processing ARC-Challenge Questions:  18%|█▊        | 209/1172 [29:26<2:04:20,  7.75s/question]

Prediction: C, Correct: C, Running Accuracy: 64.11%


Processing ARC-Challenge Questions:  18%|█▊        | 210/1172 [29:32<1:58:35,  7.40s/question]

Prediction: C, Correct: C, Running Accuracy: 64.29%


Processing ARC-Challenge Questions:  18%|█▊        | 211/1172 [29:42<2:09:34,  8.09s/question]

Prediction: C, Correct: C, Running Accuracy: 64.45%


Processing ARC-Challenge Questions:  18%|█▊        | 212/1172 [29:56<2:38:35,  9.91s/question]

Prediction: A, Correct: D, Running Accuracy: 64.15%


Processing ARC-Challenge Questions:  18%|█▊        | 213/1172 [30:02<2:19:37,  8.74s/question]

Prediction: C, Correct: C, Running Accuracy: 64.32%


Processing ARC-Challenge Questions:  18%|█▊        | 214/1172 [30:11<2:19:11,  8.72s/question]

Prediction: B, Correct: B, Running Accuracy: 64.49%


Processing ARC-Challenge Questions:  18%|█▊        | 215/1172 [30:20<2:19:37,  8.75s/question]

Prediction: B, Correct: B, Running Accuracy: 64.65%


Processing ARC-Challenge Questions:  18%|█▊        | 216/1172 [30:30<2:27:43,  9.27s/question]

Prediction: Unknown, Correct: B, Running Accuracy: 64.35%


Processing ARC-Challenge Questions:  19%|█▊        | 217/1172 [30:38<2:18:23,  8.70s/question]

Prediction: D, Correct: D, Running Accuracy: 64.52%


Processing ARC-Challenge Questions:  19%|█▊        | 218/1172 [30:44<2:08:48,  8.10s/question]

Prediction: D, Correct: A, Running Accuracy: 64.22%


Processing ARC-Challenge Questions:  19%|█▊        | 219/1172 [30:51<2:02:50,  7.73s/question]

Prediction: C, Correct: C, Running Accuracy: 64.38%


Processing ARC-Challenge Questions:  19%|█▉        | 220/1172 [30:58<1:59:17,  7.52s/question]

Prediction: B, Correct: B, Running Accuracy: 64.55%


Processing ARC-Challenge Questions:  19%|█▉        | 221/1172 [31:08<2:08:11,  8.09s/question]

Prediction: D, Correct: A, Running Accuracy: 64.25%


Processing ARC-Challenge Questions:  19%|█▉        | 222/1172 [31:17<2:15:07,  8.53s/question]

Prediction: B, Correct: B, Running Accuracy: 64.41%


Processing ARC-Challenge Questions:  19%|█▉        | 223/1172 [31:27<2:21:28,  8.94s/question]

Prediction: C, Correct: A, Running Accuracy: 64.13%


Processing ARC-Challenge Questions:  19%|█▉        | 224/1172 [31:35<2:17:33,  8.71s/question]

Prediction: A, Correct: A, Running Accuracy: 64.29%


Processing ARC-Challenge Questions:  19%|█▉        | 225/1172 [31:41<2:05:23,  7.94s/question]

Prediction: A, Correct: A, Running Accuracy: 64.44%


Processing ARC-Challenge Questions:  19%|█▉        | 226/1172 [31:48<1:57:42,  7.47s/question]

Prediction: A, Correct: A, Running Accuracy: 64.60%


Processing ARC-Challenge Questions:  19%|█▉        | 227/1172 [31:59<2:16:13,  8.65s/question]

Prediction: C, Correct: C, Running Accuracy: 64.76%


Processing ARC-Challenge Questions:  19%|█▉        | 228/1172 [32:07<2:11:13,  8.34s/question]

Prediction: B, Correct: B, Running Accuracy: 64.91%


Processing ARC-Challenge Questions:  20%|█▉        | 229/1172 [32:14<2:06:14,  8.03s/question]

Prediction: B, Correct: B, Running Accuracy: 65.07%


Processing ARC-Challenge Questions:  20%|█▉        | 230/1172 [32:20<1:57:04,  7.46s/question]

Prediction: B, Correct: B, Running Accuracy: 65.22%


Processing ARC-Challenge Questions:  20%|█▉        | 231/1172 [32:30<2:08:49,  8.21s/question]

Prediction: D, Correct: D, Running Accuracy: 65.37%


Processing ARC-Challenge Questions:  20%|█▉        | 232/1172 [32:39<2:12:26,  8.45s/question]

Prediction: A, Correct: A, Running Accuracy: 65.52%


Processing ARC-Challenge Questions:  20%|█▉        | 233/1172 [32:45<2:00:30,  7.70s/question]

Prediction: A, Correct: A, Running Accuracy: 65.67%


Processing ARC-Challenge Questions:  20%|█▉        | 234/1172 [32:55<2:09:49,  8.30s/question]

Prediction: C, Correct: B, Running Accuracy: 65.38%


Processing ARC-Challenge Questions:  20%|██        | 235/1172 [33:08<2:32:29,  9.76s/question]

Prediction: B, Correct: C, Running Accuracy: 65.11%


Processing ARC-Challenge Questions:  20%|██        | 236/1172 [33:18<2:35:04,  9.94s/question]

Prediction: B, Correct: B, Running Accuracy: 65.25%


Processing ARC-Challenge Questions:  20%|██        | 237/1172 [33:25<2:17:36,  8.83s/question]

Prediction: Unknown, Correct: D, Running Accuracy: 64.98%


Processing ARC-Challenge Questions:  20%|██        | 238/1172 [33:36<2:27:25,  9.47s/question]

Prediction: D, Correct: D, Running Accuracy: 65.13%


Processing ARC-Challenge Questions:  20%|██        | 239/1172 [33:48<2:41:09, 10.36s/question]

Prediction: B, Correct: B, Running Accuracy: 65.27%


Processing ARC-Challenge Questions:  20%|██        | 240/1172 [33:56<2:31:43,  9.77s/question]

Prediction: A, Correct: A, Running Accuracy: 65.42%


Processing ARC-Challenge Questions:  21%|██        | 241/1172 [34:04<2:22:58,  9.21s/question]

Prediction: D, Correct: D, Running Accuracy: 65.56%


Processing ARC-Challenge Questions:  21%|██        | 242/1172 [34:11<2:11:15,  8.47s/question]

Prediction: C, Correct: C, Running Accuracy: 65.70%


Processing ARC-Challenge Questions:  21%|██        | 243/1172 [34:19<2:09:47,  8.38s/question]

Prediction: B, Correct: B, Running Accuracy: 65.84%


Processing ARC-Challenge Questions:  21%|██        | 244/1172 [34:27<2:08:32,  8.31s/question]

Prediction: A, Correct: A, Running Accuracy: 65.98%


Processing ARC-Challenge Questions:  21%|██        | 245/1172 [34:38<2:20:55,  9.12s/question]

Prediction: D, Correct: B, Running Accuracy: 65.71%


Processing ARC-Challenge Questions:  21%|██        | 246/1172 [34:46<2:12:26,  8.58s/question]

Prediction: C, Correct: C, Running Accuracy: 65.85%


Processing ARC-Challenge Questions:  21%|██        | 247/1172 [34:53<2:05:06,  8.11s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 65.59%


Processing ARC-Challenge Questions:  21%|██        | 248/1172 [35:04<2:21:27,  9.19s/question]

Prediction: C, Correct: C, Running Accuracy: 65.73%


Processing ARC-Challenge Questions:  21%|██        | 249/1172 [35:15<2:26:59,  9.56s/question]

Prediction: B, Correct: B, Running Accuracy: 65.86%


Processing ARC-Challenge Questions:  21%|██▏       | 250/1172 [35:22<2:16:12,  8.86s/question]

Prediction: A, Correct: B, Running Accuracy: 65.60%


Processing ARC-Challenge Questions:  21%|██▏       | 251/1172 [35:36<2:40:00, 10.42s/question]

Prediction: D, Correct: D, Running Accuracy: 65.74%


Processing ARC-Challenge Questions:  22%|██▏       | 252/1172 [35:43<2:24:06,  9.40s/question]

Prediction: A, Correct: B, Running Accuracy: 65.48%


Processing ARC-Challenge Questions:  22%|██▏       | 253/1172 [35:52<2:20:51,  9.20s/question]

Prediction: A, Correct: A, Running Accuracy: 65.61%


Processing ARC-Challenge Questions:  22%|██▏       | 254/1172 [35:59<2:09:17,  8.45s/question]

Prediction: D, Correct: D, Running Accuracy: 65.75%


Processing ARC-Challenge Questions:  22%|██▏       | 255/1172 [36:08<2:12:10,  8.65s/question]

Prediction: A, Correct: D, Running Accuracy: 65.49%


Processing ARC-Challenge Questions:  22%|██▏       | 256/1172 [36:16<2:11:44,  8.63s/question]

Prediction: A, Correct: A, Running Accuracy: 65.62%


Processing ARC-Challenge Questions:  22%|██▏       | 257/1172 [36:35<2:55:57, 11.54s/question]

Prediction: D, Correct: D, Running Accuracy: 65.76%


Processing ARC-Challenge Questions:  22%|██▏       | 258/1172 [36:42<2:37:35, 10.35s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 65.50%


Processing ARC-Challenge Questions:  22%|██▏       | 259/1172 [36:48<2:18:56,  9.13s/question]

Prediction: C, Correct: C, Running Accuracy: 65.64%


Processing ARC-Challenge Questions:  22%|██▏       | 260/1172 [36:57<2:14:44,  8.86s/question]

Prediction: D, Correct: A, Running Accuracy: 65.38%


Processing ARC-Challenge Questions:  22%|██▏       | 261/1172 [37:02<1:58:44,  7.82s/question]

Prediction: C, Correct: C, Running Accuracy: 65.52%


Processing ARC-Challenge Questions:  22%|██▏       | 262/1172 [37:11<2:04:44,  8.22s/question]

Prediction: D, Correct: D, Running Accuracy: 65.65%


Processing ARC-Challenge Questions:  22%|██▏       | 263/1172 [37:23<2:22:09,  9.38s/question]

Prediction: A, Correct: A, Running Accuracy: 65.78%


Processing ARC-Challenge Questions:  23%|██▎       | 264/1172 [37:33<2:23:17,  9.47s/question]

Prediction: D, Correct: D, Running Accuracy: 65.91%


Processing ARC-Challenge Questions:  23%|██▎       | 265/1172 [37:40<2:12:34,  8.77s/question]

Prediction: B, Correct: B, Running Accuracy: 66.04%


Processing ARC-Challenge Questions:  23%|██▎       | 266/1172 [37:51<2:22:53,  9.46s/question]

Prediction: C, Correct: C, Running Accuracy: 66.17%


Processing ARC-Challenge Questions:  23%|██▎       | 267/1172 [37:59<2:13:15,  8.83s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 65.92%


Processing ARC-Challenge Questions:  23%|██▎       | 268/1172 [38:03<1:55:21,  7.66s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 65.67%


Processing ARC-Challenge Questions:  23%|██▎       | 269/1172 [38:09<1:46:41,  7.09s/question]

Prediction: B, Correct: B, Running Accuracy: 65.80%


Processing ARC-Challenge Questions:  23%|██▎       | 270/1172 [38:19<1:58:56,  7.91s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 65.56%


Processing ARC-Challenge Questions:  23%|██▎       | 271/1172 [38:28<2:05:05,  8.33s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 65.31%


Processing ARC-Challenge Questions:  23%|██▎       | 272/1172 [38:34<1:52:47,  7.52s/question]

Prediction: A, Correct: A, Running Accuracy: 65.44%


Processing ARC-Challenge Questions:  23%|██▎       | 273/1172 [38:43<1:57:30,  7.84s/question]

Prediction: C, Correct: C, Running Accuracy: 65.57%


Processing ARC-Challenge Questions:  23%|██▎       | 274/1172 [38:51<2:00:53,  8.08s/question]

Prediction: B, Correct: B, Running Accuracy: 65.69%


Processing ARC-Challenge Questions:  23%|██▎       | 275/1172 [38:59<1:58:48,  7.95s/question]

Prediction: B, Correct: B, Running Accuracy: 65.82%


Processing ARC-Challenge Questions:  24%|██▎       | 276/1172 [39:08<2:03:17,  8.26s/question]

Prediction: Unknown, Correct: D, Running Accuracy: 65.58%


Processing ARC-Challenge Questions:  24%|██▎       | 277/1172 [39:21<2:23:56,  9.65s/question]

Prediction: D, Correct: B, Running Accuracy: 65.34%


Processing ARC-Challenge Questions:  24%|██▎       | 278/1172 [39:28<2:13:53,  8.99s/question]

Prediction: B, Correct: B, Running Accuracy: 65.47%


Processing ARC-Challenge Questions:  24%|██▍       | 279/1172 [39:36<2:07:01,  8.54s/question]

Prediction: D, Correct: D, Running Accuracy: 65.59%


Processing ARC-Challenge Questions:  24%|██▍       | 280/1172 [39:43<2:01:07,  8.15s/question]

Prediction: B, Correct: B, Running Accuracy: 65.71%


Processing ARC-Challenge Questions:  24%|██▍       | 281/1172 [39:50<1:55:06,  7.75s/question]

Prediction: B, Correct: B, Running Accuracy: 65.84%


Processing ARC-Challenge Questions:  24%|██▍       | 282/1172 [39:58<1:57:43,  7.94s/question]

Prediction: B, Correct: D, Running Accuracy: 65.60%


Processing ARC-Challenge Questions:  24%|██▍       | 283/1172 [40:06<1:59:17,  8.05s/question]

Prediction: A, Correct: A, Running Accuracy: 65.72%


Processing ARC-Challenge Questions:  24%|██▍       | 284/1172 [40:13<1:51:48,  7.55s/question]

Prediction: Unknown, Correct: B, Running Accuracy: 65.49%


Processing ARC-Challenge Questions:  24%|██▍       | 285/1172 [40:22<1:58:41,  8.03s/question]

Prediction: A, Correct: B, Running Accuracy: 65.26%


Processing ARC-Challenge Questions:  24%|██▍       | 286/1172 [40:32<2:07:51,  8.66s/question]

Prediction: C, Correct: C, Running Accuracy: 65.38%


Processing ARC-Challenge Questions:  24%|██▍       | 287/1172 [40:41<2:06:50,  8.60s/question]

Prediction: C, Correct: C, Running Accuracy: 65.51%


Processing ARC-Challenge Questions:  25%|██▍       | 288/1172 [40:57<2:43:18, 11.08s/question]

Prediction: D, Correct: C, Running Accuracy: 65.28%


Processing ARC-Challenge Questions:  25%|██▍       | 289/1172 [41:06<2:30:31, 10.23s/question]

Prediction: B, Correct: B, Running Accuracy: 65.40%


Processing ARC-Challenge Questions:  25%|██▍       | 290/1172 [41:13<2:15:45,  9.24s/question]

Prediction: D, Correct: C, Running Accuracy: 65.17%


Processing ARC-Challenge Questions:  25%|██▍       | 291/1172 [41:20<2:08:07,  8.73s/question]

Prediction: A, Correct: D, Running Accuracy: 64.95%


Processing ARC-Challenge Questions:  25%|██▍       | 292/1172 [41:29<2:07:59,  8.73s/question]

Prediction: A, Correct: B, Running Accuracy: 64.73%


Processing ARC-Challenge Questions:  25%|██▌       | 293/1172 [41:38<2:07:47,  8.72s/question]

Prediction: C, Correct: C, Running Accuracy: 64.85%


Processing ARC-Challenge Questions:  25%|██▌       | 294/1172 [41:46<2:04:31,  8.51s/question]

Prediction: B, Correct: B, Running Accuracy: 64.97%


Processing ARC-Challenge Questions:  25%|██▌       | 295/1172 [41:53<1:58:11,  8.09s/question]

Prediction: C, Correct: C, Running Accuracy: 65.08%


Processing ARC-Challenge Questions:  25%|██▌       | 296/1172 [41:58<1:46:19,  7.28s/question]

Prediction: B, Correct: B, Running Accuracy: 65.20%


Processing ARC-Challenge Questions:  25%|██▌       | 297/1172 [42:05<1:43:41,  7.11s/question]

Prediction: C, Correct: C, Running Accuracy: 65.32%


Processing ARC-Challenge Questions:  25%|██▌       | 298/1172 [42:14<1:51:41,  7.67s/question]

Prediction: C, Correct: C, Running Accuracy: 65.44%


Processing ARC-Challenge Questions:  26%|██▌       | 299/1172 [42:21<1:48:23,  7.45s/question]

Prediction: B, Correct: B, Running Accuracy: 65.55%


Processing ARC-Challenge Questions:  26%|██▌       | 300/1172 [42:32<2:06:13,  8.69s/question]

Prediction: D, Correct: A, Running Accuracy: 65.33%


Processing ARC-Challenge Questions:  26%|██▌       | 301/1172 [42:42<2:09:41,  8.93s/question]

Prediction: D, Correct: D, Running Accuracy: 65.45%


Processing ARC-Challenge Questions:  26%|██▌       | 302/1172 [42:48<1:56:31,  8.04s/question]

Prediction: C, Correct: C, Running Accuracy: 65.56%


Processing ARC-Challenge Questions:  26%|██▌       | 303/1172 [42:56<1:59:19,  8.24s/question]

Prediction: B, Correct: B, Running Accuracy: 65.68%


Processing ARC-Challenge Questions:  26%|██▌       | 304/1172 [43:06<2:04:30,  8.61s/question]

Prediction: C, Correct: C, Running Accuracy: 65.79%


Processing ARC-Challenge Questions:  26%|██▌       | 305/1172 [43:16<2:12:50,  9.19s/question]

Prediction: A, Correct: A, Running Accuracy: 65.90%


Processing ARC-Challenge Questions:  26%|██▌       | 306/1172 [43:21<1:54:22,  7.92s/question]

Prediction: A, Correct: A, Running Accuracy: 66.01%


Processing ARC-Challenge Questions:  26%|██▌       | 307/1172 [43:29<1:54:30,  7.94s/question]

Prediction: D, Correct: D, Running Accuracy: 66.12%


Processing ARC-Challenge Questions:  26%|██▋       | 308/1172 [43:36<1:49:03,  7.57s/question]

Prediction: B, Correct: C, Running Accuracy: 65.91%


Processing ARC-Challenge Questions:  26%|██▋       | 309/1172 [43:44<1:52:29,  7.82s/question]

Prediction: C, Correct: C, Running Accuracy: 66.02%


Processing ARC-Challenge Questions:  26%|██▋       | 310/1172 [43:50<1:44:26,  7.27s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 65.81%


Processing ARC-Challenge Questions:  27%|██▋       | 311/1172 [43:58<1:47:08,  7.47s/question]

Prediction: Unknown, Correct: B, Running Accuracy: 65.59%


Processing ARC-Challenge Questions:  27%|██▋       | 312/1172 [44:05<1:44:33,  7.30s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 65.38%


Processing ARC-Challenge Questions:  27%|██▋       | 313/1172 [44:12<1:40:19,  7.01s/question]

Prediction: D, Correct: D, Running Accuracy: 65.50%


Processing ARC-Challenge Questions:  27%|██▋       | 314/1172 [44:25<2:09:27,  9.05s/question]

Prediction: D, Correct: 3, Running Accuracy: 65.29%


Processing ARC-Challenge Questions:  27%|██▋       | 315/1172 [44:33<2:03:09,  8.62s/question]

Prediction: B, Correct: B, Running Accuracy: 65.40%


Processing ARC-Challenge Questions:  27%|██▋       | 316/1172 [44:38<1:48:57,  7.64s/question]

Prediction: A, Correct: A, Running Accuracy: 65.51%


Processing ARC-Challenge Questions:  27%|██▋       | 317/1172 [44:46<1:47:13,  7.52s/question]

Prediction: C, Correct: C, Running Accuracy: 65.62%


Processing ARC-Challenge Questions:  27%|██▋       | 318/1172 [44:52<1:42:23,  7.19s/question]

Prediction: Unknown, Correct: B, Running Accuracy: 65.41%


Processing ARC-Challenge Questions:  27%|██▋       | 319/1172 [45:00<1:45:46,  7.44s/question]

Prediction: A, Correct: A, Running Accuracy: 65.52%


Processing ARC-Challenge Questions:  27%|██▋       | 320/1172 [45:10<1:54:03,  8.03s/question]

Prediction: D, Correct: B, Running Accuracy: 65.31%


Processing ARC-Challenge Questions:  27%|██▋       | 321/1172 [45:17<1:52:30,  7.93s/question]

Prediction: B, Correct: A, Running Accuracy: 65.11%


Processing ARC-Challenge Questions:  27%|██▋       | 322/1172 [45:25<1:51:51,  7.90s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 64.91%


Processing ARC-Challenge Questions:  28%|██▊       | 323/1172 [45:32<1:47:03,  7.57s/question]

Prediction: A, Correct: A, Running Accuracy: 65.02%


Processing ARC-Challenge Questions:  28%|██▊       | 324/1172 [45:41<1:53:00,  8.00s/question]

Prediction: B, Correct: B, Running Accuracy: 65.12%


Processing ARC-Challenge Questions:  28%|██▊       | 325/1172 [45:48<1:49:21,  7.75s/question]

Prediction: A, Correct: A, Running Accuracy: 65.23%


Processing ARC-Challenge Questions:  28%|██▊       | 326/1172 [45:56<1:48:48,  7.72s/question]

Prediction: C, Correct: C, Running Accuracy: 65.34%


Processing ARC-Challenge Questions:  28%|██▊       | 327/1172 [46:01<1:37:54,  6.95s/question]

Prediction: B, Correct: B, Running Accuracy: 65.44%


Processing ARC-Challenge Questions:  28%|██▊       | 328/1172 [46:10<1:46:37,  7.58s/question]

Prediction: A, Correct: D, Running Accuracy: 65.24%


Processing ARC-Challenge Questions:  28%|██▊       | 329/1172 [46:18<1:50:29,  7.86s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 65.05%


Processing ARC-Challenge Questions:  28%|██▊       | 330/1172 [46:24<1:42:57,  7.34s/question]

Prediction: D, Correct: D, Running Accuracy: 65.15%


Processing ARC-Challenge Questions:  28%|██▊       | 331/1172 [46:32<1:42:58,  7.35s/question]

Prediction: B, Correct: D, Running Accuracy: 64.95%


Processing ARC-Challenge Questions:  28%|██▊       | 332/1172 [46:42<1:53:32,  8.11s/question]

Prediction: C, Correct: C, Running Accuracy: 65.06%


Processing ARC-Challenge Questions:  28%|██▊       | 333/1172 [46:48<1:46:49,  7.64s/question]

Prediction: A, Correct: A, Running Accuracy: 65.17%


Processing ARC-Challenge Questions:  28%|██▊       | 334/1172 [47:04<2:21:32, 10.13s/question]

Prediction: B, Correct: B, Running Accuracy: 65.27%


Processing ARC-Challenge Questions:  29%|██▊       | 335/1172 [47:13<2:16:24,  9.78s/question]

Prediction: C, Correct: C, Running Accuracy: 65.37%


Processing ARC-Challenge Questions:  29%|██▊       | 336/1172 [47:25<2:23:20, 10.29s/question]

Prediction: D, Correct: D, Running Accuracy: 65.48%


Processing ARC-Challenge Questions:  29%|██▉       | 337/1172 [47:37<2:30:15, 10.80s/question]

Prediction: A, Correct: A, Running Accuracy: 65.58%


Processing ARC-Challenge Questions:  29%|██▉       | 338/1172 [47:45<2:20:32, 10.11s/question]

Prediction: B, Correct: B, Running Accuracy: 65.68%


Processing ARC-Challenge Questions:  29%|██▉       | 339/1172 [47:53<2:12:43,  9.56s/question]

Prediction: A, Correct: A, Running Accuracy: 65.78%


Processing ARC-Challenge Questions:  29%|██▉       | 340/1172 [48:03<2:12:58,  9.59s/question]

Prediction: D, Correct: D, Running Accuracy: 65.88%


Processing ARC-Challenge Questions:  29%|██▉       | 341/1172 [48:10<2:03:32,  8.92s/question]

Prediction: C, Correct: C, Running Accuracy: 65.98%


Processing ARC-Challenge Questions:  29%|██▉       | 342/1172 [48:22<2:12:13,  9.56s/question]

Prediction: A, Correct: C, Running Accuracy: 65.79%


Processing ARC-Challenge Questions:  29%|██▉       | 343/1172 [48:28<1:58:41,  8.59s/question]

Prediction: B, Correct: B, Running Accuracy: 65.89%


Processing ARC-Challenge Questions:  29%|██▉       | 344/1172 [48:37<1:59:44,  8.68s/question]

Prediction: D, Correct: D, Running Accuracy: 65.99%


Processing ARC-Challenge Questions:  29%|██▉       | 345/1172 [48:44<1:52:21,  8.15s/question]

Prediction: B, Correct: A, Running Accuracy: 65.80%


Processing ARC-Challenge Questions:  30%|██▉       | 346/1172 [48:49<1:42:38,  7.46s/question]

Prediction: B, Correct: B, Running Accuracy: 65.90%


Processing ARC-Challenge Questions:  30%|██▉       | 347/1172 [49:01<1:58:41,  8.63s/question]

Prediction: C, Correct: C, Running Accuracy: 65.99%


Processing ARC-Challenge Questions:  30%|██▉       | 348/1172 [49:11<2:04:26,  9.06s/question]

Prediction: C, Correct: C, Running Accuracy: 66.09%


Processing ARC-Challenge Questions:  30%|██▉       | 349/1172 [49:19<2:02:06,  8.90s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 65.90%


Processing ARC-Challenge Questions:  30%|██▉       | 350/1172 [49:38<2:40:25, 11.71s/question]

Prediction: B, Correct: D, Running Accuracy: 65.71%


Processing ARC-Challenge Questions:  30%|██▉       | 351/1172 [49:44<2:17:39, 10.06s/question]

Prediction: A, Correct: D, Running Accuracy: 65.53%


Processing ARC-Challenge Questions:  30%|███       | 352/1172 [49:52<2:10:48,  9.57s/question]

Prediction: Unknown, Correct: B, Running Accuracy: 65.34%


Processing ARC-Challenge Questions:  30%|███       | 353/1172 [50:01<2:08:05,  9.38s/question]

Prediction: D, Correct: D, Running Accuracy: 65.44%


Processing ARC-Challenge Questions:  30%|███       | 354/1172 [50:08<1:58:34,  8.70s/question]

Prediction: B, Correct: C, Running Accuracy: 65.25%


Processing ARC-Challenge Questions:  30%|███       | 355/1172 [50:15<1:50:47,  8.14s/question]

Prediction: Unknown, Correct: D, Running Accuracy: 65.07%


Processing ARC-Challenge Questions:  30%|███       | 356/1172 [50:25<1:57:30,  8.64s/question]

Prediction: D, Correct: D, Running Accuracy: 65.17%


Processing ARC-Challenge Questions:  30%|███       | 357/1172 [50:32<1:51:01,  8.17s/question]

Prediction: B, Correct: A, Running Accuracy: 64.99%


Processing ARC-Challenge Questions:  31%|███       | 358/1172 [50:47<2:17:54, 10.16s/question]

Prediction: A, Correct: B, Running Accuracy: 64.80%


Processing ARC-Challenge Questions:  31%|███       | 359/1172 [50:53<2:02:39,  9.05s/question]

Prediction: D, Correct: D, Running Accuracy: 64.90%


Processing ARC-Challenge Questions:  31%|███       | 360/1172 [51:06<2:18:45, 10.25s/question]

Prediction: C, Correct: C, Running Accuracy: 65.00%


Processing ARC-Challenge Questions:  31%|███       | 361/1172 [51:16<2:17:34, 10.18s/question]

Prediction: A, Correct: B, Running Accuracy: 64.82%


Processing ARC-Challenge Questions:  31%|███       | 362/1172 [51:26<2:15:42, 10.05s/question]

Prediction: A, Correct: A, Running Accuracy: 64.92%


Processing ARC-Challenge Questions:  31%|███       | 363/1172 [51:35<2:11:49,  9.78s/question]

Prediction: C, Correct: C, Running Accuracy: 65.01%


Processing ARC-Challenge Questions:  31%|███       | 364/1172 [51:44<2:08:04,  9.51s/question]

Prediction: A, Correct: A, Running Accuracy: 65.11%


Processing ARC-Challenge Questions:  31%|███       | 365/1172 [51:52<2:01:18,  9.02s/question]

Prediction: C, Correct: C, Running Accuracy: 65.21%


Processing ARC-Challenge Questions:  31%|███       | 366/1172 [51:58<1:47:42,  8.02s/question]

Prediction: Unknown, Correct: B, Running Accuracy: 65.03%


Processing ARC-Challenge Questions:  31%|███▏      | 367/1172 [52:05<1:45:05,  7.83s/question]

Prediction: B, Correct: B, Running Accuracy: 65.12%


Processing ARC-Challenge Questions:  31%|███▏      | 368/1172 [52:11<1:38:09,  7.33s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 64.95%


Processing ARC-Challenge Questions:  31%|███▏      | 369/1172 [52:17<1:32:21,  6.90s/question]

Prediction: C, Correct: C, Running Accuracy: 65.04%


Processing ARC-Challenge Questions:  32%|███▏      | 370/1172 [52:24<1:30:17,  6.75s/question]

Prediction: Unknown, Correct: D, Running Accuracy: 64.86%


Processing ARC-Challenge Questions:  32%|███▏      | 371/1172 [52:30<1:27:40,  6.57s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 64.69%


Processing ARC-Challenge Questions:  32%|███▏      | 372/1172 [52:41<1:44:30,  7.84s/question]

Prediction: A, Correct: B, Running Accuracy: 64.52%


Processing ARC-Challenge Questions:  32%|███▏      | 373/1172 [52:48<1:41:05,  7.59s/question]

Prediction: A, Correct: B, Running Accuracy: 64.34%


Processing ARC-Challenge Questions:  32%|███▏      | 374/1172 [52:54<1:37:03,  7.30s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 64.17%


Processing ARC-Challenge Questions:  32%|███▏      | 375/1172 [53:02<1:38:54,  7.45s/question]

Prediction: C, Correct: C, Running Accuracy: 64.27%


Processing ARC-Challenge Questions:  32%|███▏      | 376/1172 [53:09<1:38:52,  7.45s/question]

Prediction: D, Correct: D, Running Accuracy: 64.36%


Processing ARC-Challenge Questions:  32%|███▏      | 377/1172 [53:18<1:41:36,  7.67s/question]

Prediction: Unknown, Correct: B, Running Accuracy: 64.19%


Processing ARC-Challenge Questions:  32%|███▏      | 378/1172 [53:25<1:39:53,  7.55s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 64.02%


Processing ARC-Challenge Questions:  32%|███▏      | 379/1172 [53:36<1:53:20,  8.58s/question]

Prediction: C, Correct: C, Running Accuracy: 64.12%


Processing ARC-Challenge Questions:  32%|███▏      | 380/1172 [53:47<2:03:31,  9.36s/question]

Prediction: B, Correct: B, Running Accuracy: 64.21%


Processing ARC-Challenge Questions:  33%|███▎      | 381/1172 [53:55<1:56:51,  8.86s/question]

Prediction: A, Correct: A, Running Accuracy: 64.30%


Processing ARC-Challenge Questions:  33%|███▎      | 382/1172 [54:02<1:48:23,  8.23s/question]

Prediction: C, Correct: C, Running Accuracy: 64.40%


Processing ARC-Challenge Questions:  33%|███▎      | 383/1172 [54:13<2:01:36,  9.25s/question]

Prediction: A, Correct: C, Running Accuracy: 64.23%


Processing ARC-Challenge Questions:  33%|███▎      | 384/1172 [54:21<1:54:18,  8.70s/question]

Prediction: A, Correct: A, Running Accuracy: 64.32%


Processing ARC-Challenge Questions:  33%|███▎      | 385/1172 [54:27<1:44:27,  7.96s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 64.16%


Processing ARC-Challenge Questions:  33%|███▎      | 386/1172 [54:37<1:54:11,  8.72s/question]

Prediction: B, Correct: 2, Running Accuracy: 63.99%


Processing ARC-Challenge Questions:  33%|███▎      | 387/1172 [54:44<1:47:01,  8.18s/question]

Prediction: B, Correct: B, Running Accuracy: 64.08%


Processing ARC-Challenge Questions:  33%|███▎      | 388/1172 [54:53<1:48:59,  8.34s/question]

Prediction: D, Correct: D, Running Accuracy: 64.18%


Processing ARC-Challenge Questions:  33%|███▎      | 389/1172 [55:02<1:51:56,  8.58s/question]

Prediction: D, Correct: D, Running Accuracy: 64.27%


Processing ARC-Challenge Questions:  33%|███▎      | 390/1172 [55:12<1:57:20,  9.00s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 64.10%


Processing ARC-Challenge Questions:  33%|███▎      | 391/1172 [55:22<2:01:46,  9.36s/question]

Prediction: C, Correct: C, Running Accuracy: 64.19%


Processing ARC-Challenge Questions:  33%|███▎      | 392/1172 [55:31<1:58:03,  9.08s/question]

Prediction: D, Correct: D, Running Accuracy: 64.29%


Processing ARC-Challenge Questions:  34%|███▎      | 393/1172 [55:47<2:25:40, 11.22s/question]

Prediction: C, Correct: C, Running Accuracy: 64.38%


Processing ARC-Challenge Questions:  34%|███▎      | 394/1172 [56:01<2:35:59, 12.03s/question]

Prediction: Unknown, Correct: B, Running Accuracy: 64.21%


Processing ARC-Challenge Questions:  34%|███▎      | 395/1172 [56:10<2:25:15, 11.22s/question]

Prediction: A, Correct: A, Running Accuracy: 64.30%


Processing ARC-Challenge Questions:  34%|███▍      | 396/1172 [56:17<2:09:12,  9.99s/question]

Prediction: C, Correct: C, Running Accuracy: 64.39%


Processing ARC-Challenge Questions:  34%|███▍      | 397/1172 [56:29<2:15:37, 10.50s/question]

Prediction: C, Correct: C, Running Accuracy: 64.48%


Processing ARC-Challenge Questions:  34%|███▍      | 398/1172 [56:35<1:57:32,  9.11s/question]

Prediction: Unknown, Correct: D, Running Accuracy: 64.32%


Processing ARC-Challenge Questions:  34%|███▍      | 399/1172 [56:42<1:50:18,  8.56s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 64.16%


Processing ARC-Challenge Questions:  34%|███▍      | 400/1172 [56:53<1:59:52,  9.32s/question]

Prediction: C, Correct: C, Running Accuracy: 64.25%


Processing ARC-Challenge Questions:  34%|███▍      | 401/1172 [57:00<1:51:22,  8.67s/question]

Prediction: C, Correct: C, Running Accuracy: 64.34%


Processing ARC-Challenge Questions:  34%|███▍      | 402/1172 [57:08<1:47:50,  8.40s/question]

Prediction: A, Correct: A, Running Accuracy: 64.43%


Processing ARC-Challenge Questions:  34%|███▍      | 403/1172 [57:16<1:45:46,  8.25s/question]

Prediction: C, Correct: C, Running Accuracy: 64.52%


Processing ARC-Challenge Questions:  34%|███▍      | 404/1172 [57:25<1:47:45,  8.42s/question]

Prediction: D, Correct: D, Running Accuracy: 64.60%


Processing ARC-Challenge Questions:  35%|███▍      | 405/1172 [57:33<1:45:10,  8.23s/question]

Prediction: A, Correct: A, Running Accuracy: 64.69%


Processing ARC-Challenge Questions:  35%|███▍      | 406/1172 [57:40<1:41:44,  7.97s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 64.53%


Processing ARC-Challenge Questions:  35%|███▍      | 407/1172 [57:48<1:41:37,  7.97s/question]

Prediction: A, Correct: C, Running Accuracy: 64.37%


Processing ARC-Challenge Questions:  35%|███▍      | 408/1172 [57:56<1:43:06,  8.10s/question]

Prediction: A, Correct: D, Running Accuracy: 64.22%


Processing ARC-Challenge Questions:  35%|███▍      | 409/1172 [58:05<1:45:58,  8.33s/question]

Prediction: B, Correct: B, Running Accuracy: 64.30%


Processing ARC-Challenge Questions:  35%|███▍      | 410/1172 [58:14<1:46:27,  8.38s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 64.15%


Processing ARC-Challenge Questions:  35%|███▌      | 411/1172 [58:22<1:47:08,  8.45s/question]

Prediction: B, Correct: B, Running Accuracy: 64.23%


Processing ARC-Challenge Questions:  35%|███▌      | 412/1172 [58:32<1:50:12,  8.70s/question]

Prediction: D, Correct: D, Running Accuracy: 64.32%


Processing ARC-Challenge Questions:  35%|███▌      | 413/1172 [58:37<1:38:57,  7.82s/question]

Prediction: B, Correct: B, Running Accuracy: 64.41%


Processing ARC-Challenge Questions:  35%|███▌      | 414/1172 [58:45<1:38:57,  7.83s/question]

Prediction: D, Correct: A, Running Accuracy: 64.25%


Processing ARC-Challenge Questions:  35%|███▌      | 415/1172 [59:00<2:03:29,  9.79s/question]

Prediction: B, Correct: C, Running Accuracy: 64.10%


Processing ARC-Challenge Questions:  35%|███▌      | 416/1172 [59:11<2:08:09, 10.17s/question]

Prediction: D, Correct: D, Running Accuracy: 64.18%


Processing ARC-Challenge Questions:  36%|███▌      | 417/1172 [59:20<2:04:52,  9.92s/question]

Prediction: A, Correct: A, Running Accuracy: 64.27%


Processing ARC-Challenge Questions:  36%|███▌      | 418/1172 [59:30<2:03:28,  9.83s/question]

Prediction: D, Correct: D, Running Accuracy: 64.35%


Processing ARC-Challenge Questions:  36%|███▌      | 419/1172 [59:37<1:55:20,  9.19s/question]

Prediction: B, Correct: D, Running Accuracy: 64.20%


Processing ARC-Challenge Questions:  36%|███▌      | 420/1172 [59:47<1:55:33,  9.22s/question]

Prediction: B, Correct: B, Running Accuracy: 64.29%


Processing ARC-Challenge Questions:  36%|███▌      | 421/1172 [59:53<1:44:49,  8.37s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 64.13%


Processing ARC-Challenge Questions:  36%|███▌      | 422/1172 [1:00:01<1:44:13,  8.34s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 63.98%


Processing ARC-Challenge Questions:  36%|███▌      | 423/1172 [1:00:11<1:48:52,  8.72s/question]

Prediction: B, Correct: B, Running Accuracy: 64.07%


Processing ARC-Challenge Questions:  36%|███▌      | 424/1172 [1:00:19<1:47:32,  8.63s/question]

Prediction: B, Correct: B, Running Accuracy: 64.15%


Processing ARC-Challenge Questions:  36%|███▋      | 425/1172 [1:00:27<1:44:07,  8.36s/question]

Prediction: Unknown, Correct: B, Running Accuracy: 64.00%


Processing ARC-Challenge Questions:  36%|███▋      | 426/1172 [1:00:35<1:41:01,  8.13s/question]

Prediction: B, Correct: D, Running Accuracy: 63.85%


Processing ARC-Challenge Questions:  36%|███▋      | 427/1172 [1:00:43<1:42:09,  8.23s/question]

Prediction: C, Correct: C, Running Accuracy: 63.93%


Processing ARC-Challenge Questions:  37%|███▋      | 428/1172 [1:00:48<1:31:26,  7.37s/question]

Prediction: B, Correct: 2, Running Accuracy: 63.79%


Processing ARC-Challenge Questions:  37%|███▋      | 429/1172 [1:00:57<1:36:55,  7.83s/question]

Prediction: B, Correct: B, Running Accuracy: 63.87%


Processing ARC-Challenge Questions:  37%|███▋      | 430/1172 [1:01:05<1:35:39,  7.74s/question]

Prediction: B, Correct: B, Running Accuracy: 63.95%


Processing ARC-Challenge Questions:  37%|███▋      | 431/1172 [1:01:13<1:35:17,  7.72s/question]

Prediction: A, Correct: A, Running Accuracy: 64.04%


Processing ARC-Challenge Questions:  37%|███▋      | 432/1172 [1:01:23<1:46:59,  8.68s/question]

Prediction: D, Correct: D, Running Accuracy: 64.12%


Processing ARC-Challenge Questions:  37%|███▋      | 433/1172 [1:01:30<1:40:41,  8.18s/question]

Prediction: D, Correct: D, Running Accuracy: 64.20%


Processing ARC-Challenge Questions:  37%|███▋      | 434/1172 [1:01:41<1:48:06,  8.79s/question]

Prediction: C, Correct: C, Running Accuracy: 64.29%


Processing ARC-Challenge Questions:  37%|███▋      | 435/1172 [1:01:48<1:43:22,  8.42s/question]

Prediction: A, Correct: A, Running Accuracy: 64.37%


Processing ARC-Challenge Questions:  37%|███▋      | 436/1172 [1:02:01<1:57:31,  9.58s/question]

Prediction: A, Correct: B, Running Accuracy: 64.22%


Processing ARC-Challenge Questions:  37%|███▋      | 437/1172 [1:02:10<1:57:15,  9.57s/question]

Prediction: D, Correct: D, Running Accuracy: 64.30%


Processing ARC-Challenge Questions:  37%|███▋      | 438/1172 [1:02:17<1:48:46,  8.89s/question]

Prediction: C, Correct: 3, Running Accuracy: 64.16%


Processing ARC-Challenge Questions:  37%|███▋      | 439/1172 [1:02:25<1:44:56,  8.59s/question]

Prediction: C, Correct: A, Running Accuracy: 64.01%


Processing ARC-Challenge Questions:  38%|███▊      | 440/1172 [1:02:35<1:48:12,  8.87s/question]

Prediction: B, Correct: C, Running Accuracy: 63.86%


Processing ARC-Challenge Questions:  38%|███▊      | 441/1172 [1:02:43<1:46:11,  8.72s/question]

Prediction: D, Correct: D, Running Accuracy: 63.95%


Processing ARC-Challenge Questions:  38%|███▊      | 442/1172 [1:02:52<1:45:02,  8.63s/question]

Prediction: A, Correct: C, Running Accuracy: 63.80%


Processing ARC-Challenge Questions:  38%|███▊      | 443/1172 [1:02:58<1:35:29,  7.86s/question]

Prediction: A, Correct: A, Running Accuracy: 63.88%


Processing ARC-Challenge Questions:  38%|███▊      | 444/1172 [1:03:05<1:32:58,  7.66s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 63.74%


Processing ARC-Challenge Questions:  38%|███▊      | 445/1172 [1:03:16<1:44:14,  8.60s/question]

Prediction: B, Correct: 2, Running Accuracy: 63.60%


Processing ARC-Challenge Questions:  38%|███▊      | 446/1172 [1:03:26<1:51:37,  9.22s/question]

Prediction: D, Correct: D, Running Accuracy: 63.68%


Processing ARC-Challenge Questions:  38%|███▊      | 447/1172 [1:03:34<1:44:21,  8.64s/question]

Prediction: C, Correct: C, Running Accuracy: 63.76%


Processing ARC-Challenge Questions:  38%|███▊      | 448/1172 [1:03:40<1:37:01,  8.04s/question]

Prediction: D, Correct: D, Running Accuracy: 63.84%


Processing ARC-Challenge Questions:  38%|███▊      | 449/1172 [1:03:47<1:33:39,  7.77s/question]

Prediction: Unknown, Correct: B, Running Accuracy: 63.70%


Processing ARC-Challenge Questions:  38%|███▊      | 450/1172 [1:03:56<1:35:20,  7.92s/question]

Prediction: Unknown, Correct: D, Running Accuracy: 63.56%


Processing ARC-Challenge Questions:  38%|███▊      | 451/1172 [1:04:04<1:38:00,  8.16s/question]

Prediction: B, Correct: B, Running Accuracy: 63.64%


Processing ARC-Challenge Questions:  39%|███▊      | 452/1172 [1:04:11<1:32:05,  7.67s/question]

Prediction: B, Correct: B, Running Accuracy: 63.72%


Processing ARC-Challenge Questions:  39%|███▊      | 453/1172 [1:04:18<1:29:22,  7.46s/question]

Prediction: B, Correct: D, Running Accuracy: 63.58%


Processing ARC-Challenge Questions:  39%|███▊      | 454/1172 [1:04:29<1:43:39,  8.66s/question]

Prediction: C, Correct: 3, Running Accuracy: 63.44%


Processing ARC-Challenge Questions:  39%|███▉      | 455/1172 [1:04:38<1:45:08,  8.80s/question]

Prediction: A, Correct: A, Running Accuracy: 63.52%


Processing ARC-Challenge Questions:  39%|███▉      | 456/1172 [1:04:46<1:42:01,  8.55s/question]

Prediction: B, Correct: B, Running Accuracy: 63.60%


Processing ARC-Challenge Questions:  39%|███▉      | 457/1172 [1:04:53<1:34:00,  7.89s/question]

Prediction: D, Correct: D, Running Accuracy: 63.68%


Processing ARC-Challenge Questions:  39%|███▉      | 458/1172 [1:05:00<1:31:17,  7.67s/question]

Prediction: C, Correct: C, Running Accuracy: 63.76%


Processing ARC-Challenge Questions:  39%|███▉      | 459/1172 [1:05:10<1:38:18,  8.27s/question]

Prediction: C, Correct: C, Running Accuracy: 63.83%


Processing ARC-Challenge Questions:  39%|███▉      | 460/1172 [1:05:17<1:36:19,  8.12s/question]

Prediction: D, Correct: D, Running Accuracy: 63.91%


Processing ARC-Challenge Questions:  39%|███▉      | 461/1172 [1:05:23<1:28:44,  7.49s/question]

Prediction: A, Correct: A, Running Accuracy: 63.99%


Processing ARC-Challenge Questions:  39%|███▉      | 462/1172 [1:05:31<1:29:56,  7.60s/question]

Prediction: D, Correct: B, Running Accuracy: 63.85%


Processing ARC-Challenge Questions:  40%|███▉      | 463/1172 [1:05:38<1:25:44,  7.26s/question]

Prediction: B, Correct: B, Running Accuracy: 63.93%


Processing ARC-Challenge Questions:  40%|███▉      | 464/1172 [1:05:49<1:38:32,  8.35s/question]

Prediction: C, Correct: C, Running Accuracy: 64.01%


Processing ARC-Challenge Questions:  40%|███▉      | 465/1172 [1:05:56<1:33:42,  7.95s/question]

Prediction: D, Correct: D, Running Accuracy: 64.09%


Processing ARC-Challenge Questions:  40%|███▉      | 466/1172 [1:06:03<1:32:21,  7.85s/question]

Prediction: B, Correct: A, Running Accuracy: 63.95%


Processing ARC-Challenge Questions:  40%|███▉      | 467/1172 [1:06:12<1:34:51,  8.07s/question]

Prediction: B, Correct: B, Running Accuracy: 64.03%


Processing ARC-Challenge Questions:  40%|███▉      | 468/1172 [1:06:19<1:31:46,  7.82s/question]

Prediction: B, Correct: B, Running Accuracy: 64.10%


Processing ARC-Challenge Questions:  40%|████      | 469/1172 [1:06:26<1:28:29,  7.55s/question]

Prediction: A, Correct: A, Running Accuracy: 64.18%


Processing ARC-Challenge Questions:  40%|████      | 470/1172 [1:06:34<1:28:45,  7.59s/question]

Prediction: C, Correct: C, Running Accuracy: 64.26%


Processing ARC-Challenge Questions:  40%|████      | 471/1172 [1:06:42<1:30:42,  7.76s/question]

Prediction: D, Correct: D, Running Accuracy: 64.33%


Processing ARC-Challenge Questions:  40%|████      | 472/1172 [1:06:49<1:28:06,  7.55s/question]

Prediction: D, Correct: D, Running Accuracy: 64.41%


Processing ARC-Challenge Questions:  40%|████      | 473/1172 [1:06:57<1:29:20,  7.67s/question]

Prediction: C, Correct: C, Running Accuracy: 64.48%


Processing ARC-Challenge Questions:  40%|████      | 474/1172 [1:07:04<1:27:49,  7.55s/question]

Prediction: B, Correct: B, Running Accuracy: 64.56%


Processing ARC-Challenge Questions:  41%|████      | 475/1172 [1:07:15<1:38:46,  8.50s/question]

Prediction: A, Correct: A, Running Accuracy: 64.63%


Processing ARC-Challenge Questions:  41%|████      | 476/1172 [1:07:23<1:37:01,  8.36s/question]

Prediction: B, Correct: B, Running Accuracy: 64.71%


Processing ARC-Challenge Questions:  41%|████      | 477/1172 [1:07:29<1:28:44,  7.66s/question]

Prediction: B, Correct: B, Running Accuracy: 64.78%


Processing ARC-Challenge Questions:  41%|████      | 478/1172 [1:07:36<1:26:48,  7.50s/question]

Prediction: C, Correct: B, Running Accuracy: 64.64%


Processing ARC-Challenge Questions:  41%|████      | 479/1172 [1:07:44<1:28:56,  7.70s/question]

Prediction: A, Correct: A, Running Accuracy: 64.72%


Processing ARC-Challenge Questions:  41%|████      | 480/1172 [1:07:52<1:29:45,  7.78s/question]

Prediction: C, Correct: C, Running Accuracy: 64.79%


Processing ARC-Challenge Questions:  41%|████      | 481/1172 [1:07:59<1:25:34,  7.43s/question]

Prediction: D, Correct: D, Running Accuracy: 64.86%


Processing ARC-Challenge Questions:  41%|████      | 482/1172 [1:08:08<1:33:04,  8.09s/question]

Prediction: C, Correct: C, Running Accuracy: 64.94%


Processing ARC-Challenge Questions:  41%|████      | 483/1172 [1:08:17<1:34:20,  8.22s/question]

Prediction: Unknown, Correct: B, Running Accuracy: 64.80%


Processing ARC-Challenge Questions:  41%|████▏     | 484/1172 [1:08:28<1:44:54,  9.15s/question]

Prediction: B, Correct: B, Running Accuracy: 64.88%


Processing ARC-Challenge Questions:  41%|████▏     | 485/1172 [1:08:34<1:33:51,  8.20s/question]

Prediction: Unknown, Correct: D, Running Accuracy: 64.74%


Processing ARC-Challenge Questions:  41%|████▏     | 486/1172 [1:08:42<1:32:05,  8.06s/question]

Prediction: D, Correct: D, Running Accuracy: 64.81%


Processing ARC-Challenge Questions:  42%|████▏     | 487/1172 [1:08:51<1:35:19,  8.35s/question]

Prediction: C, Correct: C, Running Accuracy: 64.89%


Processing ARC-Challenge Questions:  42%|████▏     | 488/1172 [1:09:00<1:36:06,  8.43s/question]

Prediction: D, Correct: D, Running Accuracy: 64.96%


Processing ARC-Challenge Questions:  42%|████▏     | 489/1172 [1:09:08<1:35:33,  8.39s/question]

Prediction: B, Correct: B, Running Accuracy: 65.03%


Processing ARC-Challenge Questions:  42%|████▏     | 490/1172 [1:09:15<1:31:05,  8.01s/question]

Prediction: D, Correct: D, Running Accuracy: 65.10%


Processing ARC-Challenge Questions:  42%|████▏     | 491/1172 [1:09:23<1:32:28,  8.15s/question]

Prediction: B, Correct: B, Running Accuracy: 65.17%


Processing ARC-Challenge Questions:  42%|████▏     | 492/1172 [1:09:32<1:32:21,  8.15s/question]

Prediction: D, Correct: D, Running Accuracy: 65.24%


Processing ARC-Challenge Questions:  42%|████▏     | 493/1172 [1:09:44<1:46:53,  9.45s/question]

Prediction: A, Correct: A, Running Accuracy: 65.31%


Processing ARC-Challenge Questions:  42%|████▏     | 494/1172 [1:09:55<1:51:17,  9.85s/question]

Prediction: B, Correct: B, Running Accuracy: 65.38%


Processing ARC-Challenge Questions:  42%|████▏     | 495/1172 [1:10:04<1:50:05,  9.76s/question]

Prediction: C, Correct: C, Running Accuracy: 65.45%


Processing ARC-Challenge Questions:  42%|████▏     | 496/1172 [1:10:12<1:42:42,  9.12s/question]

Prediction: B, Correct: B, Running Accuracy: 65.52%


Processing ARC-Challenge Questions:  42%|████▏     | 497/1172 [1:10:19<1:35:07,  8.45s/question]

Prediction: B, Correct: B, Running Accuracy: 65.59%


Processing ARC-Challenge Questions:  42%|████▏     | 498/1172 [1:10:28<1:35:55,  8.54s/question]

Prediction: B, Correct: B, Running Accuracy: 65.66%


Processing ARC-Challenge Questions:  43%|████▎     | 499/1172 [1:10:37<1:37:19,  8.68s/question]

Prediction: D, Correct: B, Running Accuracy: 65.53%


Processing ARC-Challenge Questions:  43%|████▎     | 500/1172 [1:10:48<1:45:14,  9.40s/question]

Prediction: A, Correct: A, Running Accuracy: 65.60%


Processing ARC-Challenge Questions:  43%|████▎     | 501/1172 [1:10:57<1:45:57,  9.47s/question]

Prediction: C, Correct: C, Running Accuracy: 65.67%


Processing ARC-Challenge Questions:  43%|████▎     | 502/1172 [1:11:07<1:47:43,  9.65s/question]

Prediction: A, Correct: A, Running Accuracy: 65.74%


Processing ARC-Challenge Questions:  43%|████▎     | 503/1172 [1:11:14<1:38:32,  8.84s/question]

Prediction: C, Correct: C, Running Accuracy: 65.81%


Processing ARC-Challenge Questions:  43%|████▎     | 504/1172 [1:11:23<1:36:39,  8.68s/question]

Prediction: D, Correct: A, Running Accuracy: 65.67%


Processing ARC-Challenge Questions:  43%|████▎     | 505/1172 [1:11:32<1:38:00,  8.82s/question]

Prediction: B, Correct: B, Running Accuracy: 65.74%


Processing ARC-Challenge Questions:  43%|████▎     | 506/1172 [1:11:40<1:36:59,  8.74s/question]

Prediction: A, Correct: B, Running Accuracy: 65.61%


Processing ARC-Challenge Questions:  43%|████▎     | 507/1172 [1:12:03<2:24:14, 13.01s/question]

Prediction: A, Correct: B, Running Accuracy: 65.48%


Processing ARC-Challenge Questions:  43%|████▎     | 508/1172 [1:12:15<2:18:45, 12.54s/question]

Prediction: B, Correct: B, Running Accuracy: 65.55%


Processing ARC-Challenge Questions:  43%|████▎     | 509/1172 [1:12:22<2:01:59, 11.04s/question]

Prediction: C, Correct: C, Running Accuracy: 65.62%


Processing ARC-Challenge Questions:  44%|████▎     | 510/1172 [1:12:37<2:12:11, 11.98s/question]

Prediction: C, Correct: A, Running Accuracy: 65.49%


Processing ARC-Challenge Questions:  44%|████▎     | 511/1172 [1:12:45<1:59:46, 10.87s/question]

Prediction: B, Correct: B, Running Accuracy: 65.56%


Processing ARC-Challenge Questions:  44%|████▎     | 512/1172 [1:12:54<1:55:28, 10.50s/question]

Prediction: B, Correct: C, Running Accuracy: 65.43%


Processing ARC-Challenge Questions:  44%|████▍     | 513/1172 [1:13:03<1:47:35,  9.80s/question]

Prediction: A, Correct: A, Running Accuracy: 65.50%


Processing ARC-Challenge Questions:  44%|████▍     | 514/1172 [1:13:09<1:35:49,  8.74s/question]

Prediction: D, Correct: D, Running Accuracy: 65.56%


Processing ARC-Challenge Questions:  44%|████▍     | 515/1172 [1:13:15<1:28:32,  8.09s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 65.44%


Processing ARC-Challenge Questions:  44%|████▍     | 516/1172 [1:13:22<1:23:36,  7.65s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 65.31%


Processing ARC-Challenge Questions:  44%|████▍     | 517/1172 [1:13:29<1:21:55,  7.50s/question]

Prediction: Unknown, Correct: B, Running Accuracy: 65.18%


Processing ARC-Challenge Questions:  44%|████▍     | 518/1172 [1:13:36<1:18:02,  7.16s/question]

Prediction: D, Correct: D, Running Accuracy: 65.25%


Processing ARC-Challenge Questions:  44%|████▍     | 519/1172 [1:13:42<1:14:35,  6.85s/question]

Prediction: B, Correct: C, Running Accuracy: 65.13%


Processing ARC-Challenge Questions:  44%|████▍     | 520/1172 [1:13:48<1:13:56,  6.80s/question]

Prediction: B, Correct: B, Running Accuracy: 65.19%


Processing ARC-Challenge Questions:  44%|████▍     | 521/1172 [1:13:54<1:10:23,  6.49s/question]

Prediction: C, Correct: C, Running Accuracy: 65.26%


Processing ARC-Challenge Questions:  45%|████▍     | 522/1172 [1:14:05<1:23:13,  7.68s/question]

Prediction: D, Correct: D, Running Accuracy: 65.33%


Processing ARC-Challenge Questions:  45%|████▍     | 523/1172 [1:14:12<1:22:09,  7.60s/question]

Prediction: C, Correct: C, Running Accuracy: 65.39%


Processing ARC-Challenge Questions:  45%|████▍     | 524/1172 [1:14:21<1:26:42,  8.03s/question]

Prediction: A, Correct: A, Running Accuracy: 65.46%


Processing ARC-Challenge Questions:  45%|████▍     | 525/1172 [1:14:30<1:30:37,  8.40s/question]

Prediction: C, Correct: C, Running Accuracy: 65.52%


Processing ARC-Challenge Questions:  45%|████▍     | 526/1172 [1:14:36<1:21:48,  7.60s/question]

Prediction: B, Correct: B, Running Accuracy: 65.59%


Processing ARC-Challenge Questions:  45%|████▍     | 527/1172 [1:14:45<1:26:38,  8.06s/question]

Prediction: B, Correct: B, Running Accuracy: 65.65%


Processing ARC-Challenge Questions:  45%|████▌     | 528/1172 [1:14:56<1:35:11,  8.87s/question]

Prediction: C, Correct: D, Running Accuracy: 65.53%


Processing ARC-Challenge Questions:  45%|████▌     | 529/1172 [1:15:06<1:37:24,  9.09s/question]

Prediction: D, Correct: D, Running Accuracy: 65.60%


Processing ARC-Challenge Questions:  45%|████▌     | 530/1172 [1:15:13<1:32:38,  8.66s/question]

Prediction: Unknown, Correct: B, Running Accuracy: 65.47%


Processing ARC-Challenge Questions:  45%|████▌     | 531/1172 [1:15:21<1:28:58,  8.33s/question]

Prediction: D, Correct: 4, Running Accuracy: 65.35%


Processing ARC-Challenge Questions:  45%|████▌     | 532/1172 [1:15:28<1:26:14,  8.09s/question]

Prediction: C, Correct: C, Running Accuracy: 65.41%


Processing ARC-Challenge Questions:  45%|████▌     | 533/1172 [1:15:35<1:20:59,  7.60s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 65.29%


Processing ARC-Challenge Questions:  46%|████▌     | 534/1172 [1:15:42<1:18:11,  7.35s/question]

Prediction: C, Correct: C, Running Accuracy: 65.36%


Processing ARC-Challenge Questions:  46%|████▌     | 535/1172 [1:15:49<1:18:50,  7.43s/question]

Prediction: D, Correct: D, Running Accuracy: 65.42%


Processing ARC-Challenge Questions:  46%|████▌     | 536/1172 [1:15:55<1:13:43,  6.95s/question]

Prediction: B, Correct: A, Running Accuracy: 65.30%


Processing ARC-Challenge Questions:  46%|████▌     | 537/1172 [1:16:02<1:14:47,  7.07s/question]

Prediction: C, Correct: C, Running Accuracy: 65.36%


Processing ARC-Challenge Questions:  46%|████▌     | 538/1172 [1:16:13<1:25:09,  8.06s/question]

Prediction: A, Correct: A, Running Accuracy: 65.43%


Processing ARC-Challenge Questions:  46%|████▌     | 539/1172 [1:16:23<1:32:27,  8.76s/question]

Prediction: B, Correct: B, Running Accuracy: 65.49%


Processing ARC-Challenge Questions:  46%|████▌     | 540/1172 [1:16:31<1:28:44,  8.42s/question]

Prediction: D, Correct: D, Running Accuracy: 65.56%


Processing ARC-Challenge Questions:  46%|████▌     | 541/1172 [1:16:37<1:22:42,  7.86s/question]

Prediction: C, Correct: C, Running Accuracy: 65.62%


Processing ARC-Challenge Questions:  46%|████▌     | 542/1172 [1:16:47<1:27:06,  8.30s/question]

Prediction: C, Correct: C, Running Accuracy: 65.68%


Processing ARC-Challenge Questions:  46%|████▋     | 543/1172 [1:16:53<1:20:44,  7.70s/question]

Prediction: C, Correct: C, Running Accuracy: 65.75%


Processing ARC-Challenge Questions:  46%|████▋     | 544/1172 [1:17:00<1:18:17,  7.48s/question]

Prediction: C, Correct: C, Running Accuracy: 65.81%


Processing ARC-Challenge Questions:  47%|████▋     | 545/1172 [1:17:20<1:58:13, 11.31s/question]

Prediction: A, Correct: C, Running Accuracy: 65.69%


Processing ARC-Challenge Questions:  47%|████▋     | 546/1172 [1:17:30<1:53:57, 10.92s/question]

Prediction: A, Correct: C, Running Accuracy: 65.57%


Processing ARC-Challenge Questions:  47%|████▋     | 547/1172 [1:17:39<1:46:03, 10.18s/question]

Prediction: A, Correct: 1, Running Accuracy: 65.45%


Processing ARC-Challenge Questions:  47%|████▋     | 548/1172 [1:17:45<1:33:52,  9.03s/question]

Prediction: C, Correct: C, Running Accuracy: 65.51%


Processing ARC-Challenge Questions:  47%|████▋     | 549/1172 [1:17:55<1:36:22,  9.28s/question]

Prediction: C, Correct: B, Running Accuracy: 65.39%


Processing ARC-Challenge Questions:  47%|████▋     | 550/1172 [1:18:03<1:33:22,  9.01s/question]

Prediction: C, Correct: C, Running Accuracy: 65.45%


Processing ARC-Challenge Questions:  47%|████▋     | 551/1172 [1:18:11<1:28:03,  8.51s/question]

Prediction: D, Correct: D, Running Accuracy: 65.52%


Processing ARC-Challenge Questions:  47%|████▋     | 552/1172 [1:18:19<1:28:01,  8.52s/question]

Prediction: A, Correct: A, Running Accuracy: 65.58%


Processing ARC-Challenge Questions:  47%|████▋     | 553/1172 [1:18:27<1:27:16,  8.46s/question]

Prediction: C, Correct: C, Running Accuracy: 65.64%


Processing ARC-Challenge Questions:  47%|████▋     | 554/1172 [1:18:34<1:21:39,  7.93s/question]

Prediction: D, Correct: D, Running Accuracy: 65.70%


Processing ARC-Challenge Questions:  47%|████▋     | 555/1172 [1:18:42<1:22:44,  8.05s/question]

Prediction: D, Correct: D, Running Accuracy: 65.77%


Processing ARC-Challenge Questions:  47%|████▋     | 556/1172 [1:18:49<1:17:43,  7.57s/question]

Prediction: D, Correct: D, Running Accuracy: 65.83%


Processing ARC-Challenge Questions:  48%|████▊     | 557/1172 [1:18:57<1:20:30,  7.85s/question]

Prediction: B, Correct: D, Running Accuracy: 65.71%


Processing ARC-Challenge Questions:  48%|████▊     | 558/1172 [1:19:10<1:34:04,  9.19s/question]

Prediction: B, Correct: B, Running Accuracy: 65.77%


Processing ARC-Challenge Questions:  48%|████▊     | 559/1172 [1:19:17<1:29:29,  8.76s/question]

Prediction: D, Correct: D, Running Accuracy: 65.83%


Processing ARC-Challenge Questions:  48%|████▊     | 560/1172 [1:19:26<1:28:22,  8.66s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 65.71%


Processing ARC-Challenge Questions:  48%|████▊     | 561/1172 [1:19:30<1:15:08,  7.38s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 65.60%


Processing ARC-Challenge Questions:  48%|████▊     | 562/1172 [1:19:37<1:14:19,  7.31s/question]

Prediction: A, Correct: A, Running Accuracy: 65.66%


Processing ARC-Challenge Questions:  48%|████▊     | 563/1172 [1:19:57<1:52:39, 11.10s/question]

Prediction: B, Correct: B, Running Accuracy: 65.72%


Processing ARC-Challenge Questions:  48%|████▊     | 564/1172 [1:20:06<1:43:32, 10.22s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 65.60%


Processing ARC-Challenge Questions:  48%|████▊     | 565/1172 [1:20:14<1:36:53,  9.58s/question]

Prediction: B, Correct: B, Running Accuracy: 65.66%


Processing ARC-Challenge Questions:  48%|████▊     | 566/1172 [1:20:21<1:29:23,  8.85s/question]

Prediction: C, Correct: C, Running Accuracy: 65.72%


Processing ARC-Challenge Questions:  48%|████▊     | 567/1172 [1:20:30<1:31:05,  9.03s/question]

Prediction: A, Correct: A, Running Accuracy: 65.78%


Processing ARC-Challenge Questions:  48%|████▊     | 568/1172 [1:20:36<1:21:44,  8.12s/question]

Prediction: C, Correct: C, Running Accuracy: 65.85%


Processing ARC-Challenge Questions:  49%|████▊     | 569/1172 [1:20:43<1:18:21,  7.80s/question]

Prediction: D, Correct: D, Running Accuracy: 65.91%


Processing ARC-Challenge Questions:  49%|████▊     | 570/1172 [1:20:53<1:24:49,  8.45s/question]

Prediction: D, Correct: D, Running Accuracy: 65.96%


Processing ARC-Challenge Questions:  49%|████▊     | 571/1172 [1:21:02<1:25:08,  8.50s/question]

Prediction: C, Correct: D, Running Accuracy: 65.85%


Processing ARC-Challenge Questions:  49%|████▉     | 572/1172 [1:21:08<1:17:18,  7.73s/question]

Prediction: A, Correct: A, Running Accuracy: 65.91%


Processing ARC-Challenge Questions:  49%|████▉     | 573/1172 [1:21:15<1:15:48,  7.59s/question]

Prediction: B, Correct: B, Running Accuracy: 65.97%


Processing ARC-Challenge Questions:  49%|████▉     | 574/1172 [1:21:25<1:21:55,  8.22s/question]

Prediction: A, Correct: B, Running Accuracy: 65.85%


Processing ARC-Challenge Questions:  49%|████▉     | 575/1172 [1:21:36<1:29:20,  8.98s/question]

Prediction: Unknown, Correct: B, Running Accuracy: 65.74%


Processing ARC-Challenge Questions:  49%|████▉     | 576/1172 [1:21:44<1:27:06,  8.77s/question]

Prediction: C, Correct: C, Running Accuracy: 65.80%


Processing ARC-Challenge Questions:  49%|████▉     | 577/1172 [1:21:52<1:24:13,  8.49s/question]

Prediction: C, Correct: A, Running Accuracy: 65.68%


Processing ARC-Challenge Questions:  49%|████▉     | 578/1172 [1:22:01<1:25:25,  8.63s/question]

Prediction: C, Correct: C, Running Accuracy: 65.74%


Processing ARC-Challenge Questions:  49%|████▉     | 579/1172 [1:22:11<1:31:32,  9.26s/question]

Prediction: D, Correct: D, Running Accuracy: 65.80%


Processing ARC-Challenge Questions:  49%|████▉     | 580/1172 [1:22:19<1:27:36,  8.88s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 65.69%


Processing ARC-Challenge Questions:  50%|████▉     | 581/1172 [1:22:26<1:20:38,  8.19s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 65.58%


Processing ARC-Challenge Questions:  50%|████▉     | 582/1172 [1:22:34<1:19:12,  8.05s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 65.46%


Processing ARC-Challenge Questions:  50%|████▉     | 583/1172 [1:22:41<1:16:45,  7.82s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 65.35%


Processing ARC-Challenge Questions:  50%|████▉     | 584/1172 [1:22:53<1:29:43,  9.16s/question]

Prediction: B, Correct: B, Running Accuracy: 65.41%


Processing ARC-Challenge Questions:  50%|████▉     | 585/1172 [1:23:02<1:27:45,  8.97s/question]

Prediction: C, Correct: A, Running Accuracy: 65.30%


Processing ARC-Challenge Questions:  50%|█████     | 586/1172 [1:23:09<1:21:17,  8.32s/question]

Prediction: D, Correct: D, Running Accuracy: 65.36%


Processing ARC-Challenge Questions:  50%|█████     | 587/1172 [1:23:15<1:15:38,  7.76s/question]

Prediction: B, Correct: B, Running Accuracy: 65.42%


Processing ARC-Challenge Questions:  50%|█████     | 588/1172 [1:23:22<1:12:26,  7.44s/question]

Prediction: A, Correct: B, Running Accuracy: 65.31%


Processing ARC-Challenge Questions:  50%|█████     | 589/1172 [1:23:30<1:13:26,  7.56s/question]

Prediction: C, Correct: C, Running Accuracy: 65.37%


Processing ARC-Challenge Questions:  50%|█████     | 590/1172 [1:23:35<1:07:25,  6.95s/question]

Prediction: D, Correct: D, Running Accuracy: 65.42%


Processing ARC-Challenge Questions:  50%|█████     | 591/1172 [1:23:44<1:13:17,  7.57s/question]

Prediction: A, Correct: A, Running Accuracy: 65.48%


Processing ARC-Challenge Questions:  51%|█████     | 592/1172 [1:23:52<1:13:36,  7.61s/question]

Prediction: D, Correct: D, Running Accuracy: 65.54%


Processing ARC-Challenge Questions:  51%|█████     | 593/1172 [1:23:59<1:11:09,  7.37s/question]

Prediction: C, Correct: C, Running Accuracy: 65.60%


Processing ARC-Challenge Questions:  51%|█████     | 594/1172 [1:24:06<1:11:52,  7.46s/question]

Prediction: A, Correct: A, Running Accuracy: 65.66%


Processing ARC-Challenge Questions:  51%|█████     | 595/1172 [1:24:15<1:15:06,  7.81s/question]

Prediction: B, Correct: D, Running Accuracy: 65.55%


Processing ARC-Challenge Questions:  51%|█████     | 596/1172 [1:24:23<1:16:53,  8.01s/question]

Prediction: A, Correct: A, Running Accuracy: 65.60%


Processing ARC-Challenge Questions:  51%|█████     | 597/1172 [1:24:35<1:27:21,  9.12s/question]

Prediction: D, Correct: D, Running Accuracy: 65.66%


Processing ARC-Challenge Questions:  51%|█████     | 598/1172 [1:24:44<1:26:23,  9.03s/question]

Prediction: C, Correct: A, Running Accuracy: 65.55%


Processing ARC-Challenge Questions:  51%|█████     | 599/1172 [1:24:49<1:15:38,  7.92s/question]

Prediction: Unknown, Correct: B, Running Accuracy: 65.44%


Processing ARC-Challenge Questions:  51%|█████     | 600/1172 [1:24:59<1:21:43,  8.57s/question]

Prediction: D, Correct: D, Running Accuracy: 65.50%


Processing ARC-Challenge Questions:  51%|█████▏    | 601/1172 [1:25:07<1:17:51,  8.18s/question]

Prediction: B, Correct: B, Running Accuracy: 65.56%


Processing ARC-Challenge Questions:  51%|█████▏    | 602/1172 [1:25:13<1:12:01,  7.58s/question]

Prediction: D, Correct: D, Running Accuracy: 65.61%


Processing ARC-Challenge Questions:  51%|█████▏    | 603/1172 [1:25:24<1:21:51,  8.63s/question]

Prediction: A, Correct: A, Running Accuracy: 65.67%


Processing ARC-Challenge Questions:  52%|█████▏    | 604/1172 [1:25:31<1:17:10,  8.15s/question]

Prediction: D, Correct: D, Running Accuracy: 65.73%


Processing ARC-Challenge Questions:  52%|█████▏    | 605/1172 [1:25:38<1:13:52,  7.82s/question]

Prediction: D, Correct: D, Running Accuracy: 65.79%


Processing ARC-Challenge Questions:  52%|█████▏    | 606/1172 [1:25:52<1:32:15,  9.78s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 65.68%


Processing ARC-Challenge Questions:  52%|█████▏    | 607/1172 [1:26:05<1:40:36, 10.68s/question]

Prediction: B, Correct: B, Running Accuracy: 65.73%


Processing ARC-Challenge Questions:  52%|█████▏    | 608/1172 [1:26:13<1:31:56,  9.78s/question]

Prediction: C, Correct: B, Running Accuracy: 65.62%


Processing ARC-Challenge Questions:  52%|█████▏    | 609/1172 [1:26:21<1:26:43,  9.24s/question]

Prediction: D, Correct: D, Running Accuracy: 65.68%


Processing ARC-Challenge Questions:  52%|█████▏    | 610/1172 [1:26:29<1:25:14,  9.10s/question]

Prediction: D, Correct: C, Running Accuracy: 65.57%


Processing ARC-Challenge Questions:  52%|█████▏    | 611/1172 [1:26:37<1:19:37,  8.52s/question]

Prediction: D, Correct: D, Running Accuracy: 65.63%


Processing ARC-Challenge Questions:  52%|█████▏    | 612/1172 [1:26:46<1:22:27,  8.83s/question]

Prediction: B, Correct: B, Running Accuracy: 65.69%


Processing ARC-Challenge Questions:  52%|█████▏    | 613/1172 [1:27:00<1:36:26, 10.35s/question]

Prediction: C, Correct: C, Running Accuracy: 65.74%


Processing ARC-Challenge Questions:  52%|█████▏    | 614/1172 [1:27:12<1:40:46, 10.84s/question]

Prediction: A, Correct: A, Running Accuracy: 65.80%


Processing ARC-Challenge Questions:  52%|█████▏    | 615/1172 [1:27:20<1:31:11,  9.82s/question]

Prediction: A, Correct: A, Running Accuracy: 65.85%


Processing ARC-Challenge Questions:  53%|█████▎    | 616/1172 [1:27:32<1:38:26, 10.62s/question]

Prediction: A, Correct: A, Running Accuracy: 65.91%


Processing ARC-Challenge Questions:  53%|█████▎    | 617/1172 [1:27:38<1:25:59,  9.30s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 65.80%


Processing ARC-Challenge Questions:  53%|█████▎    | 618/1172 [1:27:55<1:45:14, 11.40s/question]

Prediction: C, Correct: C, Running Accuracy: 65.86%


Processing ARC-Challenge Questions:  53%|█████▎    | 619/1172 [1:28:00<1:29:31,  9.71s/question]

Prediction: B, Correct: B, Running Accuracy: 65.91%


Processing ARC-Challenge Questions:  53%|█████▎    | 620/1172 [1:28:09<1:26:21,  9.39s/question]

Prediction: C, Correct: C, Running Accuracy: 65.97%


Processing ARC-Challenge Questions:  53%|█████▎    | 621/1172 [1:28:19<1:27:10,  9.49s/question]

Prediction: D, Correct: D, Running Accuracy: 66.02%


Processing ARC-Challenge Questions:  53%|█████▎    | 622/1172 [1:28:25<1:19:06,  8.63s/question]

Prediction: B, Correct: B, Running Accuracy: 66.08%


Processing ARC-Challenge Questions:  53%|█████▎    | 623/1172 [1:28:32<1:12:43,  7.95s/question]

Prediction: B, Correct: B, Running Accuracy: 66.13%


Processing ARC-Challenge Questions:  53%|█████▎    | 624/1172 [1:28:43<1:23:07,  9.10s/question]

Prediction: C, Correct: C, Running Accuracy: 66.19%


Processing ARC-Challenge Questions:  53%|█████▎    | 625/1172 [1:28:53<1:23:25,  9.15s/question]

Prediction: B, Correct: B, Running Accuracy: 66.24%


Processing ARC-Challenge Questions:  53%|█████▎    | 626/1172 [1:29:01<1:21:03,  8.91s/question]

Prediction: D, Correct: B, Running Accuracy: 66.13%


Processing ARC-Challenge Questions:  53%|█████▎    | 627/1172 [1:29:10<1:20:52,  8.90s/question]

Prediction: C, Correct: C, Running Accuracy: 66.19%


Processing ARC-Challenge Questions:  54%|█████▎    | 628/1172 [1:29:17<1:14:37,  8.23s/question]

Prediction: Unknown, Correct: B, Running Accuracy: 66.08%


Processing ARC-Challenge Questions:  54%|█████▎    | 629/1172 [1:29:27<1:19:17,  8.76s/question]

Prediction: C, Correct: C, Running Accuracy: 66.14%


Processing ARC-Challenge Questions:  54%|█████▍    | 630/1172 [1:29:32<1:10:43,  7.83s/question]

Prediction: Unknown, Correct: B, Running Accuracy: 66.03%


Processing ARC-Challenge Questions:  54%|█████▍    | 631/1172 [1:29:42<1:15:08,  8.33s/question]

Prediction: D, Correct: B, Running Accuracy: 65.93%


Processing ARC-Challenge Questions:  54%|█████▍    | 632/1172 [1:29:50<1:13:26,  8.16s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 65.82%


Processing ARC-Challenge Questions:  54%|█████▍    | 633/1172 [1:29:58<1:13:57,  8.23s/question]

Prediction: D, Correct: D, Running Accuracy: 65.88%


Processing ARC-Challenge Questions:  54%|█████▍    | 634/1172 [1:30:05<1:10:53,  7.91s/question]

Prediction: B, Correct: B, Running Accuracy: 65.93%


Processing ARC-Challenge Questions:  54%|█████▍    | 635/1172 [1:30:15<1:16:10,  8.51s/question]

Prediction: D, Correct: D, Running Accuracy: 65.98%


Processing ARC-Challenge Questions:  54%|█████▍    | 636/1172 [1:30:33<1:42:48, 11.51s/question]

Prediction: B, Correct: B, Running Accuracy: 66.04%


Processing ARC-Challenge Questions:  54%|█████▍    | 637/1172 [1:30:44<1:41:09, 11.35s/question]

Prediction: Unknown, Correct: B, Running Accuracy: 65.93%


Processing ARC-Challenge Questions:  54%|█████▍    | 638/1172 [1:30:58<1:45:41, 11.88s/question]

Prediction: D, Correct: D, Running Accuracy: 65.99%


Processing ARC-Challenge Questions:  55%|█████▍    | 639/1172 [1:31:05<1:33:09, 10.49s/question]

Prediction: A, Correct: C, Running Accuracy: 65.88%


Processing ARC-Challenge Questions:  55%|█████▍    | 640/1172 [1:31:18<1:40:45, 11.36s/question]

Prediction: A, Correct: D, Running Accuracy: 65.78%


Processing ARC-Challenge Questions:  55%|█████▍    | 641/1172 [1:31:27<1:33:04, 10.52s/question]

Prediction: B, Correct: B, Running Accuracy: 65.83%


Processing ARC-Challenge Questions:  55%|█████▍    | 642/1172 [1:31:31<1:16:56,  8.71s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 65.73%


Processing ARC-Challenge Questions:  55%|█████▍    | 643/1172 [1:31:42<1:21:07,  9.20s/question]

Prediction: C, Correct: C, Running Accuracy: 65.79%


Processing ARC-Challenge Questions:  55%|█████▍    | 644/1172 [1:31:50<1:19:51,  9.07s/question]

Prediction: B, Correct: B, Running Accuracy: 65.84%


Processing ARC-Challenge Questions:  55%|█████▌    | 645/1172 [1:32:00<1:21:59,  9.33s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 65.74%


Processing ARC-Challenge Questions:  55%|█████▌    | 646/1172 [1:32:08<1:17:58,  8.90s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 65.63%


Processing ARC-Challenge Questions:  55%|█████▌    | 647/1172 [1:32:16<1:15:33,  8.64s/question]

Prediction: C, Correct: C, Running Accuracy: 65.69%


Processing ARC-Challenge Questions:  55%|█████▌    | 648/1172 [1:32:23<1:10:23,  8.06s/question]

Prediction: Unknown, Correct: D, Running Accuracy: 65.59%


Processing ARC-Challenge Questions:  55%|█████▌    | 649/1172 [1:32:31<1:09:33,  7.98s/question]

Prediction: B, Correct: B, Running Accuracy: 65.64%


Processing ARC-Challenge Questions:  55%|█████▌    | 650/1172 [1:32:39<1:09:14,  7.96s/question]

Prediction: A, Correct: A, Running Accuracy: 65.69%


Processing ARC-Challenge Questions:  56%|█████▌    | 651/1172 [1:32:45<1:04:37,  7.44s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 65.59%


Processing ARC-Challenge Questions:  56%|█████▌    | 652/1172 [1:32:53<1:06:09,  7.63s/question]

Prediction: C, Correct: C, Running Accuracy: 65.64%


Processing ARC-Challenge Questions:  56%|█████▌    | 653/1172 [1:32:58<1:00:00,  6.94s/question]

Prediction: C, Correct: C, Running Accuracy: 65.70%


Processing ARC-Challenge Questions:  56%|█████▌    | 654/1172 [1:33:07<1:05:38,  7.60s/question]

Prediction: A, Correct: A, Running Accuracy: 65.75%


Processing ARC-Challenge Questions:  56%|█████▌    | 655/1172 [1:33:16<1:08:13,  7.92s/question]

Prediction: Unknown, Correct: B, Running Accuracy: 65.65%


Processing ARC-Challenge Questions:  56%|█████▌    | 656/1172 [1:33:30<1:23:41,  9.73s/question]

Prediction: C, Correct: C, Running Accuracy: 65.70%


Processing ARC-Challenge Questions:  56%|█████▌    | 657/1172 [1:33:38<1:18:40,  9.17s/question]

Prediction: B, Correct: B, Running Accuracy: 65.75%


Processing ARC-Challenge Questions:  56%|█████▌    | 658/1172 [1:33:46<1:15:44,  8.84s/question]

Prediction: D, Correct: D, Running Accuracy: 65.81%


Processing ARC-Challenge Questions:  56%|█████▌    | 659/1172 [1:34:09<1:51:11, 13.01s/question]

Prediction: C, Correct: C, Running Accuracy: 65.86%


Processing ARC-Challenge Questions:  56%|█████▋    | 660/1172 [1:34:21<1:48:22, 12.70s/question]

Prediction: B, Correct: B, Running Accuracy: 65.91%


Processing ARC-Challenge Questions:  56%|█████▋    | 661/1172 [1:34:31<1:41:22, 11.90s/question]

Prediction: A, Correct: B, Running Accuracy: 65.81%


Processing ARC-Challenge Questions:  56%|█████▋    | 662/1172 [1:34:44<1:43:36, 12.19s/question]

Prediction: A, Correct: C, Running Accuracy: 65.71%


Processing ARC-Challenge Questions:  57%|█████▋    | 663/1172 [1:34:51<1:32:07, 10.86s/question]

Prediction: C, Correct: C, Running Accuracy: 65.76%


Processing ARC-Challenge Questions:  57%|█████▋    | 664/1172 [1:34:58<1:22:19,  9.72s/question]

Prediction: D, Correct: A, Running Accuracy: 65.66%


Processing ARC-Challenge Questions:  57%|█████▋    | 665/1172 [1:35:07<1:19:16,  9.38s/question]

Prediction: C, Correct: C, Running Accuracy: 65.71%


Processing ARC-Challenge Questions:  57%|█████▋    | 666/1172 [1:35:14<1:12:18,  8.57s/question]

Prediction: D, Correct: D, Running Accuracy: 65.77%


Processing ARC-Challenge Questions:  57%|█████▋    | 667/1172 [1:35:20<1:06:33,  7.91s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 65.67%


Processing ARC-Challenge Questions:  57%|█████▋    | 668/1172 [1:35:30<1:12:29,  8.63s/question]

Prediction: A, Correct: A, Running Accuracy: 65.72%


Processing ARC-Challenge Questions:  57%|█████▋    | 669/1172 [1:35:38<1:09:17,  8.26s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 65.62%


Processing ARC-Challenge Questions:  57%|█████▋    | 670/1172 [1:35:48<1:13:54,  8.83s/question]

Prediction: C, Correct: C, Running Accuracy: 65.67%


Processing ARC-Challenge Questions:  57%|█████▋    | 671/1172 [1:35:55<1:08:11,  8.17s/question]

Prediction: C, Correct: 3, Running Accuracy: 65.57%


Processing ARC-Challenge Questions:  57%|█████▋    | 672/1172 [1:36:04<1:11:30,  8.58s/question]

Prediction: C, Correct: C, Running Accuracy: 65.62%


Processing ARC-Challenge Questions:  57%|█████▋    | 673/1172 [1:36:11<1:06:18,  7.97s/question]

Prediction: D, Correct: D, Running Accuracy: 65.68%


Processing ARC-Challenge Questions:  58%|█████▊    | 674/1172 [1:36:19<1:07:36,  8.15s/question]

Prediction: C, Correct: C, Running Accuracy: 65.73%


Processing ARC-Challenge Questions:  58%|█████▊    | 675/1172 [1:36:28<1:09:31,  8.39s/question]

Prediction: A, Correct: A, Running Accuracy: 65.78%


Processing ARC-Challenge Questions:  58%|█████▊    | 676/1172 [1:36:35<1:04:42,  7.83s/question]

Prediction: C, Correct: C, Running Accuracy: 65.83%


Processing ARC-Challenge Questions:  58%|█████▊    | 677/1172 [1:36:44<1:07:19,  8.16s/question]

Prediction: B, Correct: B, Running Accuracy: 65.88%


Processing ARC-Challenge Questions:  58%|█████▊    | 678/1172 [1:36:52<1:07:36,  8.21s/question]

Prediction: D, Correct: A, Running Accuracy: 65.78%


Processing ARC-Challenge Questions:  58%|█████▊    | 679/1172 [1:37:00<1:06:09,  8.05s/question]

Prediction: D, Correct: D, Running Accuracy: 65.83%


Processing ARC-Challenge Questions:  58%|█████▊    | 680/1172 [1:37:06<1:02:05,  7.57s/question]

Prediction: D, Correct: C, Running Accuracy: 65.74%


Processing ARC-Challenge Questions:  58%|█████▊    | 681/1172 [1:37:14<1:03:55,  7.81s/question]

Prediction: D, Correct: D, Running Accuracy: 65.79%


Processing ARC-Challenge Questions:  58%|█████▊    | 682/1172 [1:37:23<1:05:18,  8.00s/question]

Prediction: B, Correct: B, Running Accuracy: 65.84%


Processing ARC-Challenge Questions:  58%|█████▊    | 683/1172 [1:37:32<1:09:07,  8.48s/question]

Prediction: A, Correct: A, Running Accuracy: 65.89%


Processing ARC-Challenge Questions:  58%|█████▊    | 684/1172 [1:37:43<1:14:19,  9.14s/question]

Prediction: C, Correct: C, Running Accuracy: 65.94%


Processing ARC-Challenge Questions:  58%|█████▊    | 685/1172 [1:37:53<1:16:02,  9.37s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 65.84%


Processing ARC-Challenge Questions:  59%|█████▊    | 686/1172 [1:38:01<1:13:29,  9.07s/question]

Prediction: B, Correct: B, Running Accuracy: 65.89%


Processing ARC-Challenge Questions:  59%|█████▊    | 687/1172 [1:38:11<1:15:19,  9.32s/question]

Prediction: C, Correct: B, Running Accuracy: 65.79%


Processing ARC-Challenge Questions:  59%|█████▊    | 688/1172 [1:38:20<1:13:13,  9.08s/question]

Prediction: B, Correct: B, Running Accuracy: 65.84%


Processing ARC-Challenge Questions:  59%|█████▉    | 689/1172 [1:38:26<1:07:01,  8.33s/question]

Prediction: B, Correct: B, Running Accuracy: 65.89%


Processing ARC-Challenge Questions:  59%|█████▉    | 690/1172 [1:38:36<1:10:17,  8.75s/question]

Prediction: D, Correct: D, Running Accuracy: 65.94%


Processing ARC-Challenge Questions:  59%|█████▉    | 691/1172 [1:38:44<1:08:37,  8.56s/question]

Prediction: D, Correct: D, Running Accuracy: 65.99%


Processing ARC-Challenge Questions:  59%|█████▉    | 692/1172 [1:38:53<1:08:19,  8.54s/question]

Prediction: A, Correct: A, Running Accuracy: 66.04%


Processing ARC-Challenge Questions:  59%|█████▉    | 693/1172 [1:39:02<1:09:32,  8.71s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 65.95%


Processing ARC-Challenge Questions:  59%|█████▉    | 694/1172 [1:39:16<1:21:21, 10.21s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 65.85%


Processing ARC-Challenge Questions:  59%|█████▉    | 695/1172 [1:39:23<1:14:16,  9.34s/question]

Prediction: D, Correct: A, Running Accuracy: 65.76%


Processing ARC-Challenge Questions:  59%|█████▉    | 696/1172 [1:39:29<1:06:50,  8.42s/question]

Prediction: C, Correct: C, Running Accuracy: 65.80%


Processing ARC-Challenge Questions:  59%|█████▉    | 697/1172 [1:39:36<1:02:06,  7.85s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 65.71%


Processing ARC-Challenge Questions:  60%|█████▉    | 698/1172 [1:39:42<58:58,  7.46s/question]  

Prediction: D, Correct: A, Running Accuracy: 65.62%


Processing ARC-Challenge Questions:  60%|█████▉    | 699/1172 [1:39:51<1:02:44,  7.96s/question]

Prediction: B, Correct: B, Running Accuracy: 65.67%


Processing ARC-Challenge Questions:  60%|█████▉    | 700/1172 [1:39:58<59:32,  7.57s/question]  

Prediction: C, Correct: C, Running Accuracy: 65.71%


Processing ARC-Challenge Questions:  60%|█████▉    | 701/1172 [1:40:04<56:30,  7.20s/question]

Prediction: A, Correct: D, Running Accuracy: 65.62%


Processing ARC-Challenge Questions:  60%|█████▉    | 702/1172 [1:40:13<1:00:19,  7.70s/question]

Prediction: D, Correct: D, Running Accuracy: 65.67%


Processing ARC-Challenge Questions:  60%|█████▉    | 703/1172 [1:40:20<57:03,  7.30s/question]  

Prediction: C, Correct: C, Running Accuracy: 65.72%


Processing ARC-Challenge Questions:  60%|██████    | 704/1172 [1:40:30<1:04:48,  8.31s/question]

Prediction: D, Correct: D, Running Accuracy: 65.77%


Processing ARC-Challenge Questions:  60%|██████    | 705/1172 [1:40:37<1:01:09,  7.86s/question]

Prediction: B, Correct: B, Running Accuracy: 65.82%


Processing ARC-Challenge Questions:  60%|██████    | 706/1172 [1:40:47<1:05:57,  8.49s/question]

Prediction: C, Correct: C, Running Accuracy: 65.86%


Processing ARC-Challenge Questions:  60%|██████    | 707/1172 [1:40:55<1:04:56,  8.38s/question]

Prediction: D, Correct: A, Running Accuracy: 65.77%


Processing ARC-Challenge Questions:  60%|██████    | 708/1172 [1:41:07<1:13:20,  9.48s/question]

Prediction: C, Correct: B, Running Accuracy: 65.68%


Processing ARC-Challenge Questions:  60%|██████    | 709/1172 [1:41:15<1:09:33,  9.01s/question]

Prediction: B, Correct: B, Running Accuracy: 65.73%


Processing ARC-Challenge Questions:  61%|██████    | 710/1172 [1:41:24<1:08:32,  8.90s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 65.63%


Processing ARC-Challenge Questions:  61%|██████    | 711/1172 [1:41:30<1:01:19,  7.98s/question]

Prediction: D, Correct: D, Running Accuracy: 65.68%


Processing ARC-Challenge Questions:  61%|██████    | 712/1172 [1:41:36<57:45,  7.53s/question]  

Prediction: C, Correct: C, Running Accuracy: 65.73%


Processing ARC-Challenge Questions:  61%|██████    | 713/1172 [1:41:50<1:11:20,  9.33s/question]

Prediction: A, Correct: A, Running Accuracy: 65.78%


Processing ARC-Challenge Questions:  61%|██████    | 714/1172 [1:41:57<1:05:57,  8.64s/question]

Prediction: B, Correct: B, Running Accuracy: 65.83%


Processing ARC-Challenge Questions:  61%|██████    | 715/1172 [1:42:08<1:11:38,  9.41s/question]

Prediction: A, Correct: C, Running Accuracy: 65.73%


Processing ARC-Challenge Questions:  61%|██████    | 716/1172 [1:42:19<1:15:57, 10.00s/question]

Prediction: D, Correct: D, Running Accuracy: 65.78%


Processing ARC-Challenge Questions:  61%|██████    | 717/1172 [1:42:25<1:05:40,  8.66s/question]

Prediction: Unknown, Correct: B, Running Accuracy: 65.69%


Processing ARC-Challenge Questions:  61%|██████▏   | 718/1172 [1:42:36<1:10:38,  9.34s/question]

Prediction: A, Correct: C, Running Accuracy: 65.60%


Processing ARC-Challenge Questions:  61%|██████▏   | 719/1172 [1:42:44<1:07:32,  8.95s/question]

Prediction: A, Correct: A, Running Accuracy: 65.65%


Processing ARC-Challenge Questions:  61%|██████▏   | 720/1172 [1:42:54<1:10:36,  9.37s/question]

Prediction: D, Correct: D, Running Accuracy: 65.69%


Processing ARC-Challenge Questions:  62%|██████▏   | 721/1172 [1:43:02<1:06:49,  8.89s/question]

Prediction: B, Correct: B, Running Accuracy: 65.74%


Processing ARC-Challenge Questions:  62%|██████▏   | 722/1172 [1:43:09<1:01:44,  8.23s/question]

Prediction: B, Correct: B, Running Accuracy: 65.79%


Processing ARC-Challenge Questions:  62%|██████▏   | 723/1172 [1:43:17<1:01:22,  8.20s/question]

Prediction: D, Correct: D, Running Accuracy: 65.84%


Processing ARC-Challenge Questions:  62%|██████▏   | 724/1172 [1:43:28<1:09:16,  9.28s/question]

Prediction: B, Correct: B, Running Accuracy: 65.88%


Processing ARC-Challenge Questions:  62%|██████▏   | 725/1172 [1:43:37<1:06:24,  8.91s/question]

Prediction: D, Correct: D, Running Accuracy: 65.93%


Processing ARC-Challenge Questions:  62%|██████▏   | 726/1172 [1:43:45<1:05:52,  8.86s/question]

Prediction: B, Correct: B, Running Accuracy: 65.98%


Processing ARC-Challenge Questions:  62%|██████▏   | 727/1172 [1:43:56<1:08:50,  9.28s/question]

Prediction: D, Correct: D, Running Accuracy: 66.02%


Processing ARC-Challenge Questions:  62%|██████▏   | 728/1172 [1:44:05<1:08:37,  9.27s/question]

Prediction: C, Correct: C, Running Accuracy: 66.07%


Processing ARC-Challenge Questions:  62%|██████▏   | 729/1172 [1:44:13<1:05:57,  8.93s/question]

Prediction: A, Correct: D, Running Accuracy: 65.98%


Processing ARC-Challenge Questions:  62%|██████▏   | 730/1172 [1:44:21<1:03:22,  8.60s/question]

Prediction: C, Correct: C, Running Accuracy: 66.03%


Processing ARC-Challenge Questions:  62%|██████▏   | 731/1172 [1:44:31<1:07:25,  9.17s/question]

Prediction: B, Correct: B, Running Accuracy: 66.07%


Processing ARC-Challenge Questions:  62%|██████▏   | 732/1172 [1:44:40<1:06:08,  9.02s/question]

Prediction: D, Correct: B, Running Accuracy: 65.98%


Processing ARC-Challenge Questions:  63%|██████▎   | 733/1172 [1:44:53<1:15:10, 10.27s/question]

Prediction: D, Correct: D, Running Accuracy: 66.03%


Processing ARC-Challenge Questions:  63%|██████▎   | 734/1172 [1:45:00<1:07:29,  9.25s/question]

Prediction: C, Correct: C, Running Accuracy: 66.08%


Processing ARC-Challenge Questions:  63%|██████▎   | 735/1172 [1:45:09<1:07:12,  9.23s/question]

Prediction: D, Correct: D, Running Accuracy: 66.12%


Processing ARC-Challenge Questions:  63%|██████▎   | 736/1172 [1:45:19<1:08:35,  9.44s/question]

Prediction: B, Correct: D, Running Accuracy: 66.03%


Processing ARC-Challenge Questions:  63%|██████▎   | 737/1172 [1:45:29<1:08:47,  9.49s/question]

Prediction: D, Correct: D, Running Accuracy: 66.08%


Processing ARC-Challenge Questions:  63%|██████▎   | 738/1172 [1:45:36<1:04:05,  8.86s/question]

Prediction: A, Correct: D, Running Accuracy: 65.99%


Processing ARC-Challenge Questions:  63%|██████▎   | 739/1172 [1:45:44<1:02:43,  8.69s/question]

Prediction: D, Correct: D, Running Accuracy: 66.04%


Processing ARC-Challenge Questions:  63%|██████▎   | 740/1172 [1:45:57<1:10:48,  9.83s/question]

Prediction: C, Correct: C, Running Accuracy: 66.08%


Processing ARC-Challenge Questions:  63%|██████▎   | 741/1172 [1:46:06<1:08:21,  9.52s/question]

Prediction: B, Correct: B, Running Accuracy: 66.13%


Processing ARC-Challenge Questions:  63%|██████▎   | 742/1172 [1:46:14<1:05:44,  9.17s/question]

Prediction: D, Correct: D, Running Accuracy: 66.17%


Processing ARC-Challenge Questions:  63%|██████▎   | 743/1172 [1:46:23<1:05:48,  9.20s/question]

Prediction: B, Correct: B, Running Accuracy: 66.22%


Processing ARC-Challenge Questions:  63%|██████▎   | 744/1172 [1:46:33<1:06:37,  9.34s/question]

Prediction: D, Correct: D, Running Accuracy: 66.26%


Processing ARC-Challenge Questions:  64%|██████▎   | 745/1172 [1:46:43<1:08:32,  9.63s/question]

Prediction: A, Correct: C, Running Accuracy: 66.17%


Processing ARC-Challenge Questions:  64%|██████▎   | 746/1172 [1:46:51<1:05:06,  9.17s/question]

Prediction: D, Correct: D, Running Accuracy: 66.22%


Processing ARC-Challenge Questions:  64%|██████▎   | 747/1172 [1:47:00<1:04:31,  9.11s/question]

Prediction: D, Correct: B, Running Accuracy: 66.13%


Processing ARC-Challenge Questions:  64%|██████▍   | 748/1172 [1:47:07<58:37,  8.30s/question]  

Prediction: Unknown, Correct: C, Running Accuracy: 66.04%


Processing ARC-Challenge Questions:  64%|██████▍   | 749/1172 [1:47:16<1:00:06,  8.53s/question]

Prediction: A, Correct: A, Running Accuracy: 66.09%


Processing ARC-Challenge Questions:  64%|██████▍   | 750/1172 [1:47:32<1:16:20, 10.85s/question]

Prediction: C, Correct: C, Running Accuracy: 66.13%


Processing ARC-Challenge Questions:  64%|██████▍   | 751/1172 [1:47:42<1:13:55, 10.54s/question]

Prediction: D, Correct: D, Running Accuracy: 66.18%


Processing ARC-Challenge Questions:  64%|██████▍   | 752/1172 [1:47:50<1:09:09,  9.88s/question]

Prediction: D, Correct: D, Running Accuracy: 66.22%


Processing ARC-Challenge Questions:  64%|██████▍   | 753/1172 [1:48:03<1:14:59, 10.74s/question]

Prediction: C, Correct: D, Running Accuracy: 66.14%


Processing ARC-Challenge Questions:  64%|██████▍   | 754/1172 [1:48:13<1:12:32, 10.41s/question]

Prediction: D, Correct: D, Running Accuracy: 66.18%


Processing ARC-Challenge Questions:  64%|██████▍   | 755/1172 [1:48:18<1:01:42,  8.88s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 66.09%


Processing ARC-Challenge Questions:  65%|██████▍   | 756/1172 [1:48:34<1:16:46, 11.07s/question]

Prediction: D, Correct: D, Running Accuracy: 66.14%


Processing ARC-Challenge Questions:  65%|██████▍   | 757/1172 [1:48:43<1:12:58, 10.55s/question]

Prediction: D, Correct: D, Running Accuracy: 66.18%


Processing ARC-Challenge Questions:  65%|██████▍   | 758/1172 [1:48:51<1:05:47,  9.53s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 66.09%


Processing ARC-Challenge Questions:  65%|██████▍   | 759/1172 [1:48:59<1:02:53,  9.14s/question]

Prediction: A, Correct: A, Running Accuracy: 66.14%


Processing ARC-Challenge Questions:  65%|██████▍   | 760/1172 [1:49:06<59:22,  8.65s/question]  

Prediction: Unknown, Correct: B, Running Accuracy: 66.05%


Processing ARC-Challenge Questions:  65%|██████▍   | 761/1172 [1:49:14<56:14,  8.21s/question]

Prediction: C, Correct: C, Running Accuracy: 66.10%


Processing ARC-Challenge Questions:  65%|██████▌   | 762/1172 [1:49:25<1:01:56,  9.07s/question]

Prediction: B, Correct: C, Running Accuracy: 66.01%


Processing ARC-Challenge Questions:  65%|██████▌   | 763/1172 [1:49:35<1:05:12,  9.57s/question]

Prediction: A, Correct: A, Running Accuracy: 66.06%


Processing ARC-Challenge Questions:  65%|██████▌   | 764/1172 [1:49:42<58:43,  8.64s/question]  

Prediction: Unknown, Correct: A, Running Accuracy: 65.97%


Processing ARC-Challenge Questions:  65%|██████▌   | 765/1172 [1:49:50<58:00,  8.55s/question]

Prediction: Unknown, Correct: B, Running Accuracy: 65.88%


Processing ARC-Challenge Questions:  65%|██████▌   | 766/1172 [1:49:59<59:01,  8.72s/question]

Prediction: B, Correct: A, Running Accuracy: 65.80%


Processing ARC-Challenge Questions:  65%|██████▌   | 767/1172 [1:50:11<1:04:17,  9.52s/question]

Prediction: D, Correct: D, Running Accuracy: 65.84%


Processing ARC-Challenge Questions:  66%|██████▌   | 768/1172 [1:50:19<1:01:59,  9.21s/question]

Prediction: C, Correct: C, Running Accuracy: 65.89%


Processing ARC-Challenge Questions:  66%|██████▌   | 769/1172 [1:50:30<1:05:30,  9.75s/question]

Prediction: C, Correct: A, Running Accuracy: 65.80%


Processing ARC-Challenge Questions:  66%|██████▌   | 770/1172 [1:50:37<59:52,  8.94s/question]  

Prediction: A, Correct: A, Running Accuracy: 65.84%


Processing ARC-Challenge Questions:  66%|██████▌   | 771/1172 [1:50:46<59:27,  8.90s/question]

Prediction: D, Correct: D, Running Accuracy: 65.89%


Processing ARC-Challenge Questions:  66%|██████▌   | 772/1172 [1:50:53<54:43,  8.21s/question]

Prediction: B, Correct: B, Running Accuracy: 65.93%


Processing ARC-Challenge Questions:  66%|██████▌   | 773/1172 [1:51:03<58:27,  8.79s/question]

Prediction: C, Correct: C, Running Accuracy: 65.98%


Processing ARC-Challenge Questions:  66%|██████▌   | 774/1172 [1:51:11<57:06,  8.61s/question]

Prediction: B, Correct: B, Running Accuracy: 66.02%


Processing ARC-Challenge Questions:  66%|██████▌   | 775/1172 [1:51:21<1:00:26,  9.13s/question]

Prediction: C, Correct: C, Running Accuracy: 66.06%


Processing ARC-Challenge Questions:  66%|██████▌   | 776/1172 [1:51:28<55:30,  8.41s/question]  

Prediction: A, Correct: A, Running Accuracy: 66.11%


Processing ARC-Challenge Questions:  66%|██████▋   | 777/1172 [1:51:40<1:03:27,  9.64s/question]

Prediction: A, Correct: A, Running Accuracy: 66.15%


Processing ARC-Challenge Questions:  66%|██████▋   | 778/1172 [1:51:48<58:32,  8.92s/question]  

Prediction: A, Correct: D, Running Accuracy: 66.07%


Processing ARC-Challenge Questions:  66%|██████▋   | 779/1172 [1:51:55<55:23,  8.46s/question]

Prediction: B, Correct: B, Running Accuracy: 66.11%


Processing ARC-Challenge Questions:  67%|██████▋   | 780/1172 [1:52:06<59:23,  9.09s/question]

Prediction: A, Correct: A, Running Accuracy: 66.15%


Processing ARC-Challenge Questions:  67%|██████▋   | 781/1172 [1:52:14<58:04,  8.91s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 66.07%


Processing ARC-Challenge Questions:  67%|██████▋   | 782/1172 [1:52:22<56:14,  8.65s/question]

Prediction: B, Correct: B, Running Accuracy: 66.11%


Processing ARC-Challenge Questions:  67%|██████▋   | 783/1172 [1:52:28<51:12,  7.90s/question]

Prediction: B, Correct: B, Running Accuracy: 66.16%


Processing ARC-Challenge Questions:  67%|██████▋   | 784/1172 [1:52:37<53:27,  8.27s/question]

Prediction: B, Correct: B, Running Accuracy: 66.20%


Processing ARC-Challenge Questions:  67%|██████▋   | 785/1172 [1:52:45<52:05,  8.08s/question]

Prediction: Unknown, Correct: D, Running Accuracy: 66.11%


Processing ARC-Challenge Questions:  67%|██████▋   | 786/1172 [1:52:53<50:37,  7.87s/question]

Prediction: D, Correct: D, Running Accuracy: 66.16%


Processing ARC-Challenge Questions:  67%|██████▋   | 787/1172 [1:53:00<50:35,  7.88s/question]

Prediction: B, Correct: C, Running Accuracy: 66.07%


Processing ARC-Challenge Questions:  67%|██████▋   | 788/1172 [1:53:20<1:12:56, 11.40s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 65.99%


Processing ARC-Challenge Questions:  67%|██████▋   | 789/1172 [1:53:28<1:06:42, 10.45s/question]

Prediction: C, Correct: C, Running Accuracy: 66.03%


Processing ARC-Challenge Questions:  67%|██████▋   | 790/1172 [1:53:38<1:05:21, 10.27s/question]

Prediction: A, Correct: A, Running Accuracy: 66.08%


Processing ARC-Challenge Questions:  67%|██████▋   | 791/1172 [1:53:47<1:02:10,  9.79s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 65.99%


Processing ARC-Challenge Questions:  68%|██████▊   | 792/1172 [1:53:53<54:20,  8.58s/question]  

Prediction: D, Correct: D, Running Accuracy: 66.04%


Processing ARC-Challenge Questions:  68%|██████▊   | 793/1172 [1:54:00<51:50,  8.21s/question]

Prediction: Unknown, Correct: B, Running Accuracy: 65.95%


Processing ARC-Challenge Questions:  68%|██████▊   | 794/1172 [1:54:07<50:29,  8.01s/question]

Prediction: B, Correct: B, Running Accuracy: 65.99%


Processing ARC-Challenge Questions:  68%|██████▊   | 795/1172 [1:54:15<48:47,  7.77s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 65.91%


Processing ARC-Challenge Questions:  68%|██████▊   | 796/1172 [1:54:22<47:20,  7.55s/question]

Prediction: A, Correct: D, Running Accuracy: 65.83%


Processing ARC-Challenge Questions:  68%|██████▊   | 797/1172 [1:54:30<48:35,  7.77s/question]

Prediction: B, Correct: B, Running Accuracy: 65.87%


Processing ARC-Challenge Questions:  68%|██████▊   | 798/1172 [1:54:38<48:04,  7.71s/question]

Prediction: B, Correct: D, Running Accuracy: 65.79%


Processing ARC-Challenge Questions:  68%|██████▊   | 799/1172 [1:54:47<52:00,  8.37s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 65.71%


Processing ARC-Challenge Questions:  68%|██████▊   | 800/1172 [1:54:57<54:17,  8.76s/question]

Prediction: C, Correct: C, Running Accuracy: 65.75%


Processing ARC-Challenge Questions:  68%|██████▊   | 801/1172 [1:55:07<56:31,  9.14s/question]

Prediction: C, Correct: C, Running Accuracy: 65.79%


Processing ARC-Challenge Questions:  68%|██████▊   | 802/1172 [1:55:15<53:28,  8.67s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 65.71%


Processing ARC-Challenge Questions:  69%|██████▊   | 803/1172 [1:55:24<53:44,  8.74s/question]

Prediction: C, Correct: C, Running Accuracy: 65.75%


Processing ARC-Challenge Questions:  69%|██████▊   | 804/1172 [1:55:34<55:44,  9.09s/question]

Prediction: C, Correct: D, Running Accuracy: 65.67%


Processing ARC-Challenge Questions:  69%|██████▊   | 805/1172 [1:55:42<54:38,  8.93s/question]

Prediction: C, Correct: C, Running Accuracy: 65.71%


Processing ARC-Challenge Questions:  69%|██████▉   | 806/1172 [1:55:48<48:41,  7.98s/question]

Prediction: B, Correct: B, Running Accuracy: 65.76%


Processing ARC-Challenge Questions:  69%|██████▉   | 807/1172 [1:55:55<46:54,  7.71s/question]

Prediction: C, Correct: C, Running Accuracy: 65.80%


Processing ARC-Challenge Questions:  69%|██████▉   | 808/1172 [1:56:03<46:34,  7.68s/question]

Prediction: B, Correct: D, Running Accuracy: 65.72%


Processing ARC-Challenge Questions:  69%|██████▉   | 809/1172 [1:56:12<49:30,  8.18s/question]

Prediction: Unknown, Correct: D, Running Accuracy: 65.64%


Processing ARC-Challenge Questions:  69%|██████▉   | 810/1172 [1:56:21<51:00,  8.45s/question]

Prediction: D, Correct: D, Running Accuracy: 65.68%


Processing ARC-Challenge Questions:  69%|██████▉   | 811/1172 [1:56:31<52:55,  8.80s/question]

Prediction: C, Correct: C, Running Accuracy: 65.72%


Processing ARC-Challenge Questions:  69%|██████▉   | 812/1172 [1:56:38<50:30,  8.42s/question]

Prediction: A, Correct: A, Running Accuracy: 65.76%


Processing ARC-Challenge Questions:  69%|██████▉   | 813/1172 [1:56:46<49:21,  8.25s/question]

Prediction: C, Correct: C, Running Accuracy: 65.81%


Processing ARC-Challenge Questions:  69%|██████▉   | 814/1172 [1:56:57<55:05,  9.23s/question]

Prediction: D, Correct: D, Running Accuracy: 65.85%


Processing ARC-Challenge Questions:  70%|██████▉   | 815/1172 [1:57:08<56:42,  9.53s/question]

Prediction: A, Correct: B, Running Accuracy: 65.77%


Processing ARC-Challenge Questions:  70%|██████▉   | 816/1172 [1:57:17<55:29,  9.35s/question]

Prediction: D, Correct: A, Running Accuracy: 65.69%


Processing ARC-Challenge Questions:  70%|██████▉   | 817/1172 [1:57:27<56:17,  9.52s/question]

Prediction: A, Correct: A, Running Accuracy: 65.73%


Processing ARC-Challenge Questions:  70%|██████▉   | 818/1172 [1:57:37<57:18,  9.71s/question]

Prediction: B, Correct: D, Running Accuracy: 65.65%


Processing ARC-Challenge Questions:  70%|██████▉   | 819/1172 [1:57:48<1:00:41, 10.32s/question]

Prediction: Unknown, Correct: D, Running Accuracy: 65.57%


Processing ARC-Challenge Questions:  70%|██████▉   | 820/1172 [1:57:56<55:54,  9.53s/question]  

Prediction: C, Correct: C, Running Accuracy: 65.61%


Processing ARC-Challenge Questions:  70%|███████   | 821/1172 [1:58:03<51:20,  8.78s/question]

Prediction: D, Correct: D, Running Accuracy: 65.65%


Processing ARC-Challenge Questions:  70%|███████   | 822/1172 [1:58:11<49:04,  8.41s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 65.57%


Processing ARC-Challenge Questions:  70%|███████   | 823/1172 [1:58:18<46:26,  7.99s/question]

Prediction: D, Correct: D, Running Accuracy: 65.61%


Processing ARC-Challenge Questions:  70%|███████   | 824/1172 [1:58:28<50:59,  8.79s/question]

Prediction: B, Correct: B, Running Accuracy: 65.66%


Processing ARC-Challenge Questions:  70%|███████   | 825/1172 [1:58:40<56:29,  9.77s/question]

Prediction: D, Correct: D, Running Accuracy: 65.70%


Processing ARC-Challenge Questions:  70%|███████   | 826/1172 [1:58:50<55:49,  9.68s/question]

Prediction: B, Correct: 2, Running Accuracy: 65.62%


Processing ARC-Challenge Questions:  71%|███████   | 827/1172 [1:59:00<56:11,  9.77s/question]

Prediction: A, Correct: A, Running Accuracy: 65.66%


Processing ARC-Challenge Questions:  71%|███████   | 828/1172 [1:59:08<52:36,  9.18s/question]

Prediction: D, Correct: D, Running Accuracy: 65.70%


Processing ARC-Challenge Questions:  71%|███████   | 829/1172 [1:59:15<50:02,  8.75s/question]

Prediction: Unknown, Correct: B, Running Accuracy: 65.62%


Processing ARC-Challenge Questions:  71%|███████   | 830/1172 [1:59:25<51:33,  9.04s/question]

Prediction: A, Correct: A, Running Accuracy: 65.66%


Processing ARC-Challenge Questions:  71%|███████   | 831/1172 [1:59:32<47:17,  8.32s/question]

Prediction: B, Correct: D, Running Accuracy: 65.58%


Processing ARC-Challenge Questions:  71%|███████   | 832/1172 [1:59:38<44:12,  7.80s/question]

Prediction: B, Correct: B, Running Accuracy: 65.62%


Processing ARC-Challenge Questions:  71%|███████   | 833/1172 [1:59:44<40:37,  7.19s/question]

Prediction: B, Correct: B, Running Accuracy: 65.67%


Processing ARC-Challenge Questions:  71%|███████   | 834/1172 [1:59:52<41:39,  7.39s/question]

Prediction: D, Correct: D, Running Accuracy: 65.71%


Processing ARC-Challenge Questions:  71%|███████   | 835/1172 [2:00:05<51:07,  9.10s/question]

Prediction: A, Correct: A, Running Accuracy: 65.75%


Processing ARC-Challenge Questions:  71%|███████▏  | 836/1172 [2:00:18<57:43, 10.31s/question]

Prediction: A, Correct: A, Running Accuracy: 65.79%


Processing ARC-Challenge Questions:  71%|███████▏  | 837/1172 [2:00:27<54:37,  9.78s/question]

Prediction: D, Correct: D, Running Accuracy: 65.83%


Processing ARC-Challenge Questions:  72%|███████▏  | 838/1172 [2:00:32<46:57,  8.44s/question]

Prediction: A, Correct: A, Running Accuracy: 65.87%


Processing ARC-Challenge Questions:  72%|███████▏  | 839/1172 [2:00:41<47:13,  8.51s/question]

Prediction: D, Correct: D, Running Accuracy: 65.91%


Processing ARC-Challenge Questions:  72%|███████▏  | 840/1172 [2:00:49<45:58,  8.31s/question]

Prediction: B, Correct: D, Running Accuracy: 65.83%


Processing ARC-Challenge Questions:  72%|███████▏  | 841/1172 [2:00:56<44:00,  7.98s/question]

Prediction: C, Correct: C, Running Accuracy: 65.87%


Processing ARC-Challenge Questions:  72%|███████▏  | 842/1172 [2:01:08<50:35,  9.20s/question]

Prediction: B, Correct: B, Running Accuracy: 65.91%


Processing ARC-Challenge Questions:  72%|███████▏  | 843/1172 [2:01:21<57:16, 10.44s/question]

Prediction: D, Correct: B, Running Accuracy: 65.84%


Processing ARC-Challenge Questions:  72%|███████▏  | 844/1172 [2:01:36<1:03:37, 11.64s/question]

Prediction: D, Correct: D, Running Accuracy: 65.88%


Processing ARC-Challenge Questions:  72%|███████▏  | 845/1172 [2:01:45<59:31, 10.92s/question]  

Prediction: A, Correct: A, Running Accuracy: 65.92%


Processing ARC-Challenge Questions:  72%|███████▏  | 846/1172 [2:01:52<52:42,  9.70s/question]

Prediction: B, Correct: 2, Running Accuracy: 65.84%


Processing ARC-Challenge Questions:  72%|███████▏  | 847/1172 [2:01:58<46:43,  8.63s/question]

Prediction: D, Correct: D, Running Accuracy: 65.88%


Processing ARC-Challenge Questions:  72%|███████▏  | 848/1172 [2:02:07<47:07,  8.73s/question]

Prediction: Unknown, Correct: B, Running Accuracy: 65.80%


Processing ARC-Challenge Questions:  72%|███████▏  | 849/1172 [2:02:16<47:58,  8.91s/question]

Prediction: B, Correct: B, Running Accuracy: 65.84%


Processing ARC-Challenge Questions:  73%|███████▎  | 850/1172 [2:02:23<44:29,  8.29s/question]

Prediction: B, Correct: D, Running Accuracy: 65.76%


Processing ARC-Challenge Questions:  73%|███████▎  | 851/1172 [2:02:29<41:08,  7.69s/question]

Prediction: D, Correct: D, Running Accuracy: 65.80%


Processing ARC-Challenge Questions:  73%|███████▎  | 852/1172 [2:02:38<42:07,  7.90s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 65.73%


Processing ARC-Challenge Questions:  73%|███████▎  | 853/1172 [2:02:49<47:56,  9.02s/question]

Prediction: B, Correct: B, Running Accuracy: 65.77%


Processing ARC-Challenge Questions:  73%|███████▎  | 854/1172 [2:02:59<48:34,  9.17s/question]

Prediction: A, Correct: B, Running Accuracy: 65.69%


Processing ARC-Challenge Questions:  73%|███████▎  | 855/1172 [2:03:07<47:36,  9.01s/question]

Prediction: B, Correct: B, Running Accuracy: 65.73%


Processing ARC-Challenge Questions:  73%|███████▎  | 856/1172 [2:03:20<52:33,  9.98s/question]

Prediction: C, Correct: C, Running Accuracy: 65.77%


Processing ARC-Challenge Questions:  73%|███████▎  | 857/1172 [2:03:30<53:32, 10.20s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 65.69%


Processing ARC-Challenge Questions:  73%|███████▎  | 858/1172 [2:03:42<56:20, 10.77s/question]

Prediction: C, Correct: 3, Running Accuracy: 65.62%


Processing ARC-Challenge Questions:  73%|███████▎  | 859/1172 [2:03:49<49:45,  9.54s/question]

Prediction: A, Correct: A, Running Accuracy: 65.66%


Processing ARC-Challenge Questions:  73%|███████▎  | 860/1172 [2:04:03<56:05, 10.79s/question]

Prediction: A, Correct: B, Running Accuracy: 65.58%


Processing ARC-Challenge Questions:  73%|███████▎  | 861/1172 [2:04:10<50:06,  9.67s/question]

Prediction: C, Correct: C, Running Accuracy: 65.62%


Processing ARC-Challenge Questions:  74%|███████▎  | 862/1172 [2:04:20<51:12,  9.91s/question]

Prediction: B, Correct: B, Running Accuracy: 65.66%


Processing ARC-Challenge Questions:  74%|███████▎  | 863/1172 [2:04:31<51:20,  9.97s/question]

Prediction: A, Correct: D, Running Accuracy: 65.59%


Processing ARC-Challenge Questions:  74%|███████▎  | 864/1172 [2:04:39<49:16,  9.60s/question]

Prediction: D, Correct: D, Running Accuracy: 65.62%


Processing ARC-Challenge Questions:  74%|███████▍  | 865/1172 [2:04:52<53:17, 10.41s/question]

Prediction: A, Correct: D, Running Accuracy: 65.55%


Processing ARC-Challenge Questions:  74%|███████▍  | 866/1172 [2:04:58<47:02,  9.22s/question]

Prediction: B, Correct: C, Running Accuracy: 65.47%


Processing ARC-Challenge Questions:  74%|███████▍  | 867/1172 [2:05:05<44:01,  8.66s/question]

Prediction: B, Correct: B, Running Accuracy: 65.51%


Processing ARC-Challenge Questions:  74%|███████▍  | 868/1172 [2:05:12<41:09,  8.12s/question]

Prediction: C, Correct: C, Running Accuracy: 65.55%


Processing ARC-Challenge Questions:  74%|███████▍  | 869/1172 [2:05:20<40:10,  7.96s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 65.48%


Processing ARC-Challenge Questions:  74%|███████▍  | 870/1172 [2:05:33<47:36,  9.46s/question]

Prediction: B, Correct: D, Running Accuracy: 65.40%


Processing ARC-Challenge Questions:  74%|███████▍  | 871/1172 [2:05:41<45:16,  9.02s/question]

Prediction: D, Correct: D, Running Accuracy: 65.44%


Processing ARC-Challenge Questions:  74%|███████▍  | 872/1172 [2:05:49<44:05,  8.82s/question]

Prediction: C, Correct: C, Running Accuracy: 65.48%


Processing ARC-Challenge Questions:  74%|███████▍  | 873/1172 [2:05:55<39:55,  8.01s/question]

Prediction: C, Correct: C, Running Accuracy: 65.52%


Processing ARC-Challenge Questions:  75%|███████▍  | 874/1172 [2:06:03<40:02,  8.06s/question]

Prediction: Unknown, Correct: B, Running Accuracy: 65.45%


Processing ARC-Challenge Questions:  75%|███████▍  | 875/1172 [2:06:10<38:17,  7.73s/question]

Prediction: D, Correct: D, Running Accuracy: 65.49%


Processing ARC-Challenge Questions:  75%|███████▍  | 876/1172 [2:06:23<45:36,  9.25s/question]

Prediction: B, Correct: A, Running Accuracy: 65.41%


Processing ARC-Challenge Questions:  75%|███████▍  | 877/1172 [2:06:32<44:16,  9.00s/question]

Prediction: A, Correct: A, Running Accuracy: 65.45%


Processing ARC-Challenge Questions:  75%|███████▍  | 878/1172 [2:06:39<41:45,  8.52s/question]

Prediction: A, Correct: A, Running Accuracy: 65.49%


Processing ARC-Challenge Questions:  75%|███████▌  | 879/1172 [2:06:48<42:30,  8.70s/question]

Prediction: B, Correct: B, Running Accuracy: 65.53%


Processing ARC-Challenge Questions:  75%|███████▌  | 880/1172 [2:06:54<38:44,  7.96s/question]

Prediction: Unknown, Correct: B, Running Accuracy: 65.45%


Processing ARC-Challenge Questions:  75%|███████▌  | 881/1172 [2:07:01<36:29,  7.52s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 65.38%


Processing ARC-Challenge Questions:  75%|███████▌  | 882/1172 [2:07:12<41:46,  8.64s/question]

Prediction: D, Correct: D, Running Accuracy: 65.42%


Processing ARC-Challenge Questions:  75%|███████▌  | 883/1172 [2:07:19<39:35,  8.22s/question]

Prediction: D, Correct: D, Running Accuracy: 65.46%


Processing ARC-Challenge Questions:  75%|███████▌  | 884/1172 [2:07:28<39:42,  8.27s/question]

Prediction: D, Correct: C, Running Accuracy: 65.38%


Processing ARC-Challenge Questions:  76%|███████▌  | 885/1172 [2:07:36<38:54,  8.14s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 65.31%


Processing ARC-Challenge Questions:  76%|███████▌  | 886/1172 [2:07:44<38:34,  8.09s/question]

Prediction: B, Correct: B, Running Accuracy: 65.35%


Processing ARC-Challenge Questions:  76%|███████▌  | 887/1172 [2:07:55<43:15,  9.11s/question]

Prediction: A, Correct: 1, Running Accuracy: 65.28%


Processing ARC-Challenge Questions:  76%|███████▌  | 888/1172 [2:08:07<47:02,  9.94s/question]

Prediction: C, Correct: C, Running Accuracy: 65.32%


Processing ARC-Challenge Questions:  76%|███████▌  | 889/1172 [2:08:16<46:20,  9.82s/question]

Prediction: D, Correct: C, Running Accuracy: 65.24%


Processing ARC-Challenge Questions:  76%|███████▌  | 890/1172 [2:08:23<41:38,  8.86s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 65.17%


Processing ARC-Challenge Questions:  76%|███████▌  | 891/1172 [2:08:32<40:58,  8.75s/question]

Prediction: C, Correct: B, Running Accuracy: 65.10%


Processing ARC-Challenge Questions:  76%|███████▌  | 892/1172 [2:08:41<42:03,  9.01s/question]

Prediction: A, Correct: A, Running Accuracy: 65.13%


Processing ARC-Challenge Questions:  76%|███████▌  | 893/1172 [2:08:51<43:03,  9.26s/question]

Prediction: B, Correct: B, Running Accuracy: 65.17%


Processing ARC-Challenge Questions:  76%|███████▋  | 894/1172 [2:09:01<43:23,  9.36s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 65.10%


Processing ARC-Challenge Questions:  76%|███████▋  | 895/1172 [2:09:13<46:52, 10.15s/question]

Prediction: B, Correct: B, Running Accuracy: 65.14%


Processing ARC-Challenge Questions:  76%|███████▋  | 896/1172 [2:09:24<48:58, 10.65s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 65.07%


Processing ARC-Challenge Questions:  77%|███████▋  | 897/1172 [2:09:31<43:14,  9.43s/question]

Prediction: A, Correct: A, Running Accuracy: 65.11%


Processing ARC-Challenge Questions:  77%|███████▋  | 898/1172 [2:09:43<46:45, 10.24s/question]

Prediction: Unknown, Correct: D, Running Accuracy: 65.03%


Processing ARC-Challenge Questions:  77%|███████▋  | 899/1172 [2:09:50<42:29,  9.34s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 64.96%


Processing ARC-Challenge Questions:  77%|███████▋  | 900/1172 [2:09:59<41:57,  9.26s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 64.89%


Processing ARC-Challenge Questions:  77%|███████▋  | 901/1172 [2:10:14<49:11, 10.89s/question]

Prediction: D, Correct: D, Running Accuracy: 64.93%


Processing ARC-Challenge Questions:  77%|███████▋  | 902/1172 [2:10:22<44:54,  9.98s/question]

Prediction: B, Correct: B, Running Accuracy: 64.97%


Processing ARC-Challenge Questions:  77%|███████▋  | 903/1172 [2:10:32<45:00, 10.04s/question]

Prediction: B, Correct: B, Running Accuracy: 65.01%


Processing ARC-Challenge Questions:  77%|███████▋  | 904/1172 [2:10:41<42:54,  9.60s/question]

Prediction: A, Correct: A, Running Accuracy: 65.04%


Processing ARC-Challenge Questions:  77%|███████▋  | 905/1172 [2:10:48<39:28,  8.87s/question]

Prediction: C, Correct: C, Running Accuracy: 65.08%


Processing ARC-Challenge Questions:  77%|███████▋  | 906/1172 [2:10:56<38:37,  8.71s/question]

Prediction: D, Correct: D, Running Accuracy: 65.12%


Processing ARC-Challenge Questions:  77%|███████▋  | 907/1172 [2:11:05<37:53,  8.58s/question]

Prediction: C, Correct: C, Running Accuracy: 65.16%


Processing ARC-Challenge Questions:  77%|███████▋  | 908/1172 [2:11:15<40:25,  9.19s/question]

Prediction: A, Correct: A, Running Accuracy: 65.20%


Processing ARC-Challenge Questions:  78%|███████▊  | 909/1172 [2:11:22<36:59,  8.44s/question]

Prediction: A, Correct: A, Running Accuracy: 65.24%


Processing ARC-Challenge Questions:  78%|███████▊  | 910/1172 [2:11:32<38:32,  8.83s/question]

Prediction: D, Correct: D, Running Accuracy: 65.27%


Processing ARC-Challenge Questions:  78%|███████▊  | 911/1172 [2:11:38<34:42,  7.98s/question]

Prediction: D, Correct: D, Running Accuracy: 65.31%


Processing ARC-Challenge Questions:  78%|███████▊  | 912/1172 [2:11:50<40:02,  9.24s/question]

Prediction: A, Correct: D, Running Accuracy: 65.24%


Processing ARC-Challenge Questions:  78%|███████▊  | 913/1172 [2:11:56<36:07,  8.37s/question]

Prediction: A, Correct: A, Running Accuracy: 65.28%


Processing ARC-Challenge Questions:  78%|███████▊  | 914/1172 [2:12:04<35:17,  8.21s/question]

Prediction: C, Correct: B, Running Accuracy: 65.21%


Processing ARC-Challenge Questions:  78%|███████▊  | 915/1172 [2:12:15<38:27,  8.98s/question]

Prediction: A, Correct: B, Running Accuracy: 65.14%


Processing ARC-Challenge Questions:  78%|███████▊  | 916/1172 [2:12:24<38:51,  9.11s/question]

Prediction: D, Correct: D, Running Accuracy: 65.17%


Processing ARC-Challenge Questions:  78%|███████▊  | 917/1172 [2:12:33<38:37,  9.09s/question]

Prediction: B, Correct: B, Running Accuracy: 65.21%


Processing ARC-Challenge Questions:  78%|███████▊  | 918/1172 [2:12:40<35:18,  8.34s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 65.14%


Processing ARC-Challenge Questions:  78%|███████▊  | 919/1172 [2:12:47<33:48,  8.02s/question]

Prediction: D, Correct: D, Running Accuracy: 65.18%


Processing ARC-Challenge Questions:  78%|███████▊  | 920/1172 [2:12:54<32:39,  7.78s/question]

Prediction: D, Correct: B, Running Accuracy: 65.11%


Processing ARC-Challenge Questions:  79%|███████▊  | 921/1172 [2:13:03<33:40,  8.05s/question]

Prediction: A, Correct: A, Running Accuracy: 65.15%


Processing ARC-Challenge Questions:  79%|███████▊  | 922/1172 [2:13:12<34:49,  8.36s/question]

Prediction: D, Correct: C, Running Accuracy: 65.08%


Processing ARC-Challenge Questions:  79%|███████▉  | 923/1172 [2:13:24<39:29,  9.52s/question]

Prediction: A, Correct: B, Running Accuracy: 65.01%


Processing ARC-Challenge Questions:  79%|███████▉  | 924/1172 [2:13:30<34:41,  8.40s/question]

Prediction: Unknown, Correct: D, Running Accuracy: 64.94%


Processing ARC-Challenge Questions:  79%|███████▉  | 925/1172 [2:13:38<34:24,  8.36s/question]

Prediction: B, Correct: B, Running Accuracy: 64.97%


Processing ARC-Challenge Questions:  79%|███████▉  | 926/1172 [2:13:49<36:48,  8.98s/question]

Prediction: C, Correct: C, Running Accuracy: 65.01%


Processing ARC-Challenge Questions:  79%|███████▉  | 927/1172 [2:13:58<37:12,  9.11s/question]

Prediction: A, Correct: A, Running Accuracy: 65.05%


Processing ARC-Challenge Questions:  79%|███████▉  | 928/1172 [2:14:06<35:16,  8.67s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 64.98%


Processing ARC-Challenge Questions:  79%|███████▉  | 929/1172 [2:14:13<33:29,  8.27s/question]

Prediction: D, Correct: D, Running Accuracy: 65.02%


Processing ARC-Challenge Questions:  79%|███████▉  | 930/1172 [2:14:24<36:10,  8.97s/question]

Prediction: A, Correct: A, Running Accuracy: 65.05%


Processing ARC-Challenge Questions:  79%|███████▉  | 931/1172 [2:14:34<37:34,  9.35s/question]

Prediction: B, Correct: B, Running Accuracy: 65.09%


Processing ARC-Challenge Questions:  80%|███████▉  | 932/1172 [2:14:42<36:11,  9.05s/question]

Prediction: C, Correct: B, Running Accuracy: 65.02%


Processing ARC-Challenge Questions:  80%|███████▉  | 933/1172 [2:14:54<38:49,  9.75s/question]

Prediction: D, Correct: D, Running Accuracy: 65.06%


Processing ARC-Challenge Questions:  80%|███████▉  | 934/1172 [2:15:06<42:10, 10.63s/question]

Prediction: C, Correct: B, Running Accuracy: 64.99%


Processing ARC-Challenge Questions:  80%|███████▉  | 935/1172 [2:15:14<38:31,  9.75s/question]

Prediction: B, Correct: B, Running Accuracy: 65.03%


Processing ARC-Challenge Questions:  80%|███████▉  | 936/1172 [2:15:22<35:42,  9.08s/question]

Prediction: D, Correct: D, Running Accuracy: 65.06%


Processing ARC-Challenge Questions:  80%|███████▉  | 937/1172 [2:15:32<37:07,  9.48s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 64.99%


Processing ARC-Challenge Questions:  80%|████████  | 938/1172 [2:15:47<43:37, 11.19s/question]

Prediction: D, Correct: C, Running Accuracy: 64.93%


Processing ARC-Challenge Questions:  80%|████████  | 939/1172 [2:15:54<38:37,  9.95s/question]

Prediction: B, Correct: B, Running Accuracy: 64.96%


Processing ARC-Challenge Questions:  80%|████████  | 940/1172 [2:16:07<41:53, 10.83s/question]

Prediction: A, Correct: D, Running Accuracy: 64.89%


Processing ARC-Challenge Questions:  80%|████████  | 941/1172 [2:16:15<38:50, 10.09s/question]

Prediction: B, Correct: B, Running Accuracy: 64.93%


Processing ARC-Challenge Questions:  80%|████████  | 942/1172 [2:16:24<37:21,  9.75s/question]

Prediction: B, Correct: B, Running Accuracy: 64.97%


Processing ARC-Challenge Questions:  80%|████████  | 943/1172 [2:16:34<36:31,  9.57s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 64.90%


Processing ARC-Challenge Questions:  81%|████████  | 944/1172 [2:16:40<33:06,  8.71s/question]

Prediction: B, Correct: B, Running Accuracy: 64.94%


Processing ARC-Challenge Questions:  81%|████████  | 945/1172 [2:16:48<32:19,  8.54s/question]

Prediction: A, Correct: A, Running Accuracy: 64.97%


Processing ARC-Challenge Questions:  81%|████████  | 946/1172 [2:16:57<31:41,  8.41s/question]

Prediction: A, Correct: A, Running Accuracy: 65.01%


Processing ARC-Challenge Questions:  81%|████████  | 947/1172 [2:17:06<32:22,  8.63s/question]

Prediction: C, Correct: C, Running Accuracy: 65.05%


Processing ARC-Challenge Questions:  81%|████████  | 948/1172 [2:17:13<31:19,  8.39s/question]

Prediction: C, Correct: C, Running Accuracy: 65.08%


Processing ARC-Challenge Questions:  81%|████████  | 949/1172 [2:17:23<32:02,  8.62s/question]

Prediction: D, Correct: D, Running Accuracy: 65.12%


Processing ARC-Challenge Questions:  81%|████████  | 950/1172 [2:17:30<30:51,  8.34s/question]

Prediction: A, Correct: A, Running Accuracy: 65.16%


Processing ARC-Challenge Questions:  81%|████████  | 951/1172 [2:17:39<30:36,  8.31s/question]

Prediction: Unknown, Correct: D, Running Accuracy: 65.09%


Processing ARC-Challenge Questions:  81%|████████  | 952/1172 [2:17:56<40:26, 11.03s/question]

Prediction: B, Correct: B, Running Accuracy: 65.13%


Processing ARC-Challenge Questions:  81%|████████▏ | 953/1172 [2:18:03<36:21,  9.96s/question]

Prediction: C, Correct: C, Running Accuracy: 65.16%


Processing ARC-Challenge Questions:  81%|████████▏ | 954/1172 [2:18:11<33:06,  9.11s/question]

Prediction: Unknown, Correct: 1, Running Accuracy: 65.09%


Processing ARC-Challenge Questions:  81%|████████▏ | 955/1172 [2:18:16<29:30,  8.16s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 65.03%


Processing ARC-Challenge Questions:  82%|████████▏ | 956/1172 [2:18:26<30:25,  8.45s/question]

Prediction: C, Correct: C, Running Accuracy: 65.06%


Processing ARC-Challenge Questions:  82%|████████▏ | 957/1172 [2:18:34<30:30,  8.52s/question]

Prediction: C, Correct: C, Running Accuracy: 65.10%


Processing ARC-Challenge Questions:  82%|████████▏ | 958/1172 [2:18:42<29:39,  8.32s/question]

Prediction: D, Correct: D, Running Accuracy: 65.14%


Processing ARC-Challenge Questions:  82%|████████▏ | 959/1172 [2:18:48<27:12,  7.66s/question]

Prediction: D, Correct: D, Running Accuracy: 65.17%


Processing ARC-Challenge Questions:  82%|████████▏ | 960/1172 [2:18:55<26:06,  7.39s/question]

Prediction: C, Correct: C, Running Accuracy: 65.21%


Processing ARC-Challenge Questions:  82%|████████▏ | 961/1172 [2:19:04<28:02,  7.97s/question]

Prediction: C, Correct: C, Running Accuracy: 65.24%


Processing ARC-Challenge Questions:  82%|████████▏ | 962/1172 [2:19:09<24:43,  7.07s/question]

Prediction: B, Correct: B, Running Accuracy: 65.28%


Processing ARC-Challenge Questions:  82%|████████▏ | 963/1172 [2:19:18<26:09,  7.51s/question]

Prediction: B, Correct: B, Running Accuracy: 65.32%


Processing ARC-Challenge Questions:  82%|████████▏ | 964/1172 [2:19:26<26:58,  7.78s/question]

Prediction: B, Correct: 2, Running Accuracy: 65.25%


Processing ARC-Challenge Questions:  82%|████████▏ | 965/1172 [2:19:37<29:29,  8.55s/question]

Prediction: D, Correct: D, Running Accuracy: 65.28%


Processing ARC-Challenge Questions:  82%|████████▏ | 966/1172 [2:19:46<30:28,  8.88s/question]

Prediction: C, Correct: C, Running Accuracy: 65.32%


Processing ARC-Challenge Questions:  83%|████████▎ | 967/1172 [2:19:55<30:09,  8.83s/question]

Prediction: D, Correct: C, Running Accuracy: 65.25%


Processing ARC-Challenge Questions:  83%|████████▎ | 968/1172 [2:20:03<28:46,  8.46s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 65.19%


Processing ARC-Challenge Questions:  83%|████████▎ | 969/1172 [2:20:09<26:16,  7.77s/question]

Prediction: C, Correct: C, Running Accuracy: 65.22%


Processing ARC-Challenge Questions:  83%|████████▎ | 970/1172 [2:20:17<26:54,  7.99s/question]

Prediction: B, Correct: B, Running Accuracy: 65.26%


Processing ARC-Challenge Questions:  83%|████████▎ | 971/1172 [2:20:25<26:33,  7.93s/question]

Prediction: A, Correct: A, Running Accuracy: 65.29%


Processing ARC-Challenge Questions:  83%|████████▎ | 972/1172 [2:20:34<27:06,  8.13s/question]

Prediction: B, Correct: B, Running Accuracy: 65.33%


Processing ARC-Challenge Questions:  83%|████████▎ | 973/1172 [2:20:42<27:39,  8.34s/question]

Prediction: Unknown, Correct: B, Running Accuracy: 65.26%


Processing ARC-Challenge Questions:  83%|████████▎ | 974/1172 [2:20:51<27:27,  8.32s/question]

Prediction: D, Correct: D, Running Accuracy: 65.30%


Processing ARC-Challenge Questions:  83%|████████▎ | 975/1172 [2:20:58<26:05,  7.94s/question]

Prediction: A, Correct: A, Running Accuracy: 65.33%


Processing ARC-Challenge Questions:  83%|████████▎ | 976/1172 [2:21:11<30:54,  9.46s/question]

Prediction: Unknown, Correct: 3, Running Accuracy: 65.27%


Processing ARC-Challenge Questions:  83%|████████▎ | 977/1172 [2:21:23<33:28, 10.30s/question]

Prediction: D, Correct: B, Running Accuracy: 65.20%


Processing ARC-Challenge Questions:  83%|████████▎ | 978/1172 [2:21:33<32:58, 10.20s/question]

Prediction: Unknown, Correct: D, Running Accuracy: 65.13%


Processing ARC-Challenge Questions:  84%|████████▎ | 979/1172 [2:21:40<29:58,  9.32s/question]

Prediction: B, Correct: B, Running Accuracy: 65.17%


Processing ARC-Challenge Questions:  84%|████████▎ | 980/1172 [2:21:46<26:13,  8.19s/question]

Prediction: B, Correct: B, Running Accuracy: 65.20%


Processing ARC-Challenge Questions:  84%|████████▎ | 981/1172 [2:21:55<27:24,  8.61s/question]

Prediction: A, Correct: D, Running Accuracy: 65.14%


Processing ARC-Challenge Questions:  84%|████████▍ | 982/1172 [2:22:06<29:29,  9.31s/question]

Prediction: D, Correct: D, Running Accuracy: 65.17%


Processing ARC-Challenge Questions:  84%|████████▍ | 983/1172 [2:22:15<29:05,  9.24s/question]

Prediction: D, Correct: D, Running Accuracy: 65.21%


Processing ARC-Challenge Questions:  84%|████████▍ | 984/1172 [2:22:27<31:07,  9.93s/question]

Prediction: C, Correct: C, Running Accuracy: 65.24%


Processing ARC-Challenge Questions:  84%|████████▍ | 985/1172 [2:22:35<29:12,  9.37s/question]

Prediction: D, Correct: B, Running Accuracy: 65.18%


Processing ARC-Challenge Questions:  84%|████████▍ | 986/1172 [2:22:41<25:46,  8.31s/question]

Prediction: D, Correct: D, Running Accuracy: 65.21%


Processing ARC-Challenge Questions:  84%|████████▍ | 987/1172 [2:22:48<24:06,  7.82s/question]

Prediction: A, Correct: D, Running Accuracy: 65.15%


Processing ARC-Challenge Questions:  84%|████████▍ | 988/1172 [2:22:56<24:46,  8.08s/question]

Prediction: C, Correct: D, Running Accuracy: 65.08%


Processing ARC-Challenge Questions:  84%|████████▍ | 989/1172 [2:23:04<24:29,  8.03s/question]

Prediction: B, Correct: B, Running Accuracy: 65.12%


Processing ARC-Challenge Questions:  84%|████████▍ | 990/1172 [2:23:11<23:23,  7.71s/question]

Prediction: B, Correct: D, Running Accuracy: 65.05%


Processing ARC-Challenge Questions:  85%|████████▍ | 991/1172 [2:23:20<24:30,  8.13s/question]

Prediction: D, Correct: D, Running Accuracy: 65.09%


Processing ARC-Challenge Questions:  85%|████████▍ | 992/1172 [2:23:28<24:14,  8.08s/question]

Prediction: C, Correct: C, Running Accuracy: 65.12%


Processing ARC-Challenge Questions:  85%|████████▍ | 993/1172 [2:23:34<21:57,  7.36s/question]

Prediction: Unknown, Correct: B, Running Accuracy: 65.06%


Processing ARC-Challenge Questions:  85%|████████▍ | 994/1172 [2:23:41<21:58,  7.41s/question]

Prediction: A, Correct: D, Running Accuracy: 64.99%


Processing ARC-Challenge Questions:  85%|████████▍ | 995/1172 [2:23:51<23:24,  7.93s/question]

Prediction: C, Correct: C, Running Accuracy: 65.03%


Processing ARC-Challenge Questions:  85%|████████▍ | 996/1172 [2:24:02<26:14,  8.95s/question]

Prediction: A, Correct: B, Running Accuracy: 64.96%


Processing ARC-Challenge Questions:  85%|████████▌ | 997/1172 [2:24:09<24:08,  8.28s/question]

Prediction: B, Correct: B, Running Accuracy: 64.99%


Processing ARC-Challenge Questions:  85%|████████▌ | 998/1172 [2:24:15<22:27,  7.74s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 64.93%


Processing ARC-Challenge Questions:  85%|████████▌ | 999/1172 [2:24:22<21:26,  7.44s/question]

Prediction: C, Correct: C, Running Accuracy: 64.96%


Processing ARC-Challenge Questions:  85%|████████▌ | 1000/1172 [2:24:33<24:31,  8.56s/question]

Prediction: B, Correct: A, Running Accuracy: 64.90%


Processing ARC-Challenge Questions:  85%|████████▌ | 1001/1172 [2:24:42<24:47,  8.70s/question]

Prediction: A, Correct: A, Running Accuracy: 64.94%


Processing ARC-Challenge Questions:  85%|████████▌ | 1002/1172 [2:24:50<23:57,  8.45s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 64.87%


Processing ARC-Challenge Questions:  86%|████████▌ | 1003/1172 [2:24:58<23:15,  8.26s/question]

Prediction: B, Correct: A, Running Accuracy: 64.81%


Processing ARC-Challenge Questions:  86%|████████▌ | 1004/1172 [2:25:07<23:45,  8.48s/question]

Prediction: A, Correct: B, Running Accuracy: 64.74%


Processing ARC-Challenge Questions:  86%|████████▌ | 1005/1172 [2:25:16<24:32,  8.82s/question]

Prediction: C, Correct: C, Running Accuracy: 64.78%


Processing ARC-Challenge Questions:  86%|████████▌ | 1006/1172 [2:25:25<24:04,  8.70s/question]

Prediction: D, Correct: D, Running Accuracy: 64.81%


Processing ARC-Challenge Questions:  86%|████████▌ | 1007/1172 [2:25:32<22:27,  8.16s/question]

Prediction: D, Correct: D, Running Accuracy: 64.85%


Processing ARC-Challenge Questions:  86%|████████▌ | 1008/1172 [2:25:39<21:32,  7.88s/question]

Prediction: D, Correct: D, Running Accuracy: 64.88%


Processing ARC-Challenge Questions:  86%|████████▌ | 1009/1172 [2:25:46<21:12,  7.80s/question]

Prediction: A, Correct: A, Running Accuracy: 64.92%


Processing ARC-Challenge Questions:  86%|████████▌ | 1010/1172 [2:25:56<22:47,  8.44s/question]

Prediction: C, Correct: A, Running Accuracy: 64.85%


Processing ARC-Challenge Questions:  86%|████████▋ | 1011/1172 [2:26:04<21:41,  8.08s/question]

Prediction: D, Correct: D, Running Accuracy: 64.89%


Processing ARC-Challenge Questions:  86%|████████▋ | 1012/1172 [2:26:13<22:54,  8.59s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 64.82%


Processing ARC-Challenge Questions:  86%|████████▋ | 1013/1172 [2:26:22<23:01,  8.69s/question]

Prediction: A, Correct: A, Running Accuracy: 64.86%


Processing ARC-Challenge Questions:  87%|████████▋ | 1014/1172 [2:26:28<20:26,  7.76s/question]

Prediction: A, Correct: A, Running Accuracy: 64.89%


Processing ARC-Challenge Questions:  87%|████████▋ | 1015/1172 [2:26:43<26:06,  9.98s/question]

Prediction: B, Correct: B, Running Accuracy: 64.93%


Processing ARC-Challenge Questions:  87%|████████▋ | 1016/1172 [2:26:51<24:12,  9.31s/question]

Prediction: D, Correct: D, Running Accuracy: 64.96%


Processing ARC-Challenge Questions:  87%|████████▋ | 1017/1172 [2:26:58<22:44,  8.80s/question]

Prediction: Unknown, Correct: D, Running Accuracy: 64.90%


Processing ARC-Challenge Questions:  87%|████████▋ | 1018/1172 [2:27:06<21:38,  8.43s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 64.83%


Processing ARC-Challenge Questions:  87%|████████▋ | 1019/1172 [2:27:16<22:23,  8.78s/question]

Prediction: D, Correct: D, Running Accuracy: 64.87%


Processing ARC-Challenge Questions:  87%|████████▋ | 1020/1172 [2:27:23<20:56,  8.27s/question]

Prediction: A, Correct: A, Running Accuracy: 64.90%


Processing ARC-Challenge Questions:  87%|████████▋ | 1021/1172 [2:27:40<27:43, 11.02s/question]

Prediction: D, Correct: C, Running Accuracy: 64.84%


Processing ARC-Challenge Questions:  87%|████████▋ | 1022/1172 [2:27:48<25:10, 10.07s/question]

Prediction: D, Correct: D, Running Accuracy: 64.87%


Processing ARC-Challenge Questions:  87%|████████▋ | 1023/1172 [2:27:56<23:18,  9.39s/question]

Prediction: D, Correct: D, Running Accuracy: 64.91%


Processing ARC-Challenge Questions:  87%|████████▋ | 1024/1172 [2:28:07<24:08,  9.79s/question]

Prediction: A, Correct: D, Running Accuracy: 64.84%


Processing ARC-Challenge Questions:  87%|████████▋ | 1025/1172 [2:28:16<24:03,  9.82s/question]

Prediction: C, Correct: C, Running Accuracy: 64.88%


Processing ARC-Challenge Questions:  88%|████████▊ | 1026/1172 [2:28:25<23:19,  9.59s/question]

Prediction: C, Correct: C, Running Accuracy: 64.91%


Processing ARC-Challenge Questions:  88%|████████▊ | 1027/1172 [2:28:32<21:13,  8.79s/question]

Prediction: C, Correct: C, Running Accuracy: 64.95%


Processing ARC-Challenge Questions:  88%|████████▊ | 1028/1172 [2:28:40<20:25,  8.51s/question]

Prediction: C, Correct: C, Running Accuracy: 64.98%


Processing ARC-Challenge Questions:  88%|████████▊ | 1029/1172 [2:28:57<26:04, 10.94s/question]

Prediction: D, Correct: B, Running Accuracy: 64.92%


Processing ARC-Challenge Questions:  88%|████████▊ | 1030/1172 [2:29:04<23:26,  9.91s/question]

Prediction: D, Correct: D, Running Accuracy: 64.95%


Processing ARC-Challenge Questions:  88%|████████▊ | 1031/1172 [2:29:13<22:40,  9.65s/question]

Prediction: D, Correct: D, Running Accuracy: 64.99%


Processing ARC-Challenge Questions:  88%|████████▊ | 1032/1172 [2:29:21<20:55,  8.97s/question]

Prediction: B, Correct: A, Running Accuracy: 64.92%


Processing ARC-Challenge Questions:  88%|████████▊ | 1033/1172 [2:29:31<21:59,  9.49s/question]

Prediction: D, Correct: D, Running Accuracy: 64.96%


Processing ARC-Challenge Questions:  88%|████████▊ | 1034/1172 [2:29:40<20:50,  9.06s/question]

Prediction: D, Correct: D, Running Accuracy: 64.99%


Processing ARC-Challenge Questions:  88%|████████▊ | 1035/1172 [2:29:48<20:16,  8.88s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 64.93%


Processing ARC-Challenge Questions:  88%|████████▊ | 1036/1172 [2:29:56<19:40,  8.68s/question]

Prediction: B, Correct: B, Running Accuracy: 64.96%


Processing ARC-Challenge Questions:  88%|████████▊ | 1037/1172 [2:30:08<21:48,  9.69s/question]

Prediction: D, Correct: D, Running Accuracy: 65.00%


Processing ARC-Challenge Questions:  89%|████████▊ | 1038/1172 [2:30:17<20:46,  9.30s/question]

Prediction: B, Correct: B, Running Accuracy: 65.03%


Processing ARC-Challenge Questions:  89%|████████▊ | 1039/1172 [2:30:22<18:10,  8.20s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 64.97%


Processing ARC-Challenge Questions:  89%|████████▊ | 1040/1172 [2:30:29<17:18,  7.87s/question]

Prediction: A, Correct: B, Running Accuracy: 64.90%


Processing ARC-Challenge Questions:  89%|████████▉ | 1041/1172 [2:30:40<19:03,  8.73s/question]

Prediction: B, Correct: B, Running Accuracy: 64.94%


Processing ARC-Challenge Questions:  89%|████████▉ | 1042/1172 [2:30:48<18:23,  8.49s/question]

Prediction: B, Correct: B, Running Accuracy: 64.97%


Processing ARC-Challenge Questions:  89%|████████▉ | 1043/1172 [2:30:56<17:56,  8.35s/question]

Prediction: A, Correct: A, Running Accuracy: 65.00%


Processing ARC-Challenge Questions:  89%|████████▉ | 1044/1172 [2:31:07<19:30,  9.14s/question]

Prediction: C, Correct: C, Running Accuracy: 65.04%


Processing ARC-Challenge Questions:  89%|████████▉ | 1045/1172 [2:31:15<18:30,  8.74s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 64.98%


Processing ARC-Challenge Questions:  89%|████████▉ | 1046/1172 [2:31:24<18:30,  8.82s/question]

Prediction: B, Correct: B, Running Accuracy: 65.01%


Processing ARC-Challenge Questions:  89%|████████▉ | 1047/1172 [2:31:34<19:08,  9.19s/question]

Prediction: C, Correct: D, Running Accuracy: 64.95%


Processing ARC-Challenge Questions:  89%|████████▉ | 1048/1172 [2:31:44<19:20,  9.36s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 64.89%


Processing ARC-Challenge Questions:  90%|████████▉ | 1049/1172 [2:31:52<18:24,  8.98s/question]

Prediction: B, Correct: B, Running Accuracy: 64.92%


Processing ARC-Challenge Questions:  90%|████████▉ | 1050/1172 [2:32:00<18:02,  8.88s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 64.86%


Processing ARC-Challenge Questions:  90%|████████▉ | 1051/1172 [2:32:09<17:40,  8.76s/question]

Prediction: D, Correct: D, Running Accuracy: 64.89%


Processing ARC-Challenge Questions:  90%|████████▉ | 1052/1172 [2:32:19<18:12,  9.10s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 64.83%


Processing ARC-Challenge Questions:  90%|████████▉ | 1053/1172 [2:32:26<17:07,  8.63s/question]

Prediction: A, Correct: D, Running Accuracy: 64.77%


Processing ARC-Challenge Questions:  90%|████████▉ | 1054/1172 [2:32:34<16:07,  8.20s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 64.71%


Processing ARC-Challenge Questions:  90%|█████████ | 1055/1172 [2:32:45<17:39,  9.05s/question]

Prediction: A, Correct: A, Running Accuracy: 64.74%


Processing ARC-Challenge Questions:  90%|█████████ | 1056/1172 [2:32:53<17:08,  8.87s/question]

Prediction: A, Correct: A, Running Accuracy: 64.77%


Processing ARC-Challenge Questions:  90%|█████████ | 1057/1172 [2:32:59<15:25,  8.05s/question]

Prediction: A, Correct: A, Running Accuracy: 64.81%


Processing ARC-Challenge Questions:  90%|█████████ | 1058/1172 [2:33:08<15:42,  8.27s/question]

Prediction: B, Correct: B, Running Accuracy: 64.84%


Processing ARC-Challenge Questions:  90%|█████████ | 1059/1172 [2:33:17<16:03,  8.53s/question]

Prediction: Unknown, Correct: A, Running Accuracy: 64.78%


Processing ARC-Challenge Questions:  90%|█████████ | 1060/1172 [2:33:27<16:53,  9.05s/question]

Prediction: B, Correct: B, Running Accuracy: 64.81%


Processing ARC-Challenge Questions:  91%|█████████ | 1061/1172 [2:33:34<15:32,  8.40s/question]

Prediction: B, Correct: B, Running Accuracy: 64.84%


Processing ARC-Challenge Questions:  91%|█████████ | 1062/1172 [2:33:45<16:47,  9.16s/question]

Prediction: A, Correct: A, Running Accuracy: 64.88%


Processing ARC-Challenge Questions:  91%|█████████ | 1063/1172 [2:33:55<17:10,  9.46s/question]

Prediction: A, Correct: B, Running Accuracy: 64.82%


Processing ARC-Challenge Questions:  91%|█████████ | 1064/1172 [2:34:04<16:50,  9.36s/question]

Prediction: Unknown, Correct: D, Running Accuracy: 64.76%


Processing ARC-Challenge Questions:  91%|█████████ | 1065/1172 [2:34:12<15:48,  8.87s/question]

Prediction: C, Correct: D, Running Accuracy: 64.69%


Processing ARC-Challenge Questions:  91%|█████████ | 1066/1172 [2:34:26<18:07, 10.26s/question]

Prediction: C, Correct: C, Running Accuracy: 64.73%


Processing ARC-Challenge Questions:  91%|█████████ | 1067/1172 [2:34:34<17:07,  9.79s/question]

Prediction: Unknown, Correct: C, Running Accuracy: 64.67%


Processing ARC-Challenge Questions:  91%|█████████ | 1068/1172 [2:34:45<17:21, 10.02s/question]

Prediction: D, Correct: D, Running Accuracy: 64.70%


Processing ARC-Challenge Questions:  91%|█████████ | 1069/1172 [2:34:52<15:43,  9.16s/question]

Prediction: C, Correct: C, Running Accuracy: 64.73%


Processing ARC-Challenge Questions:  91%|█████████▏| 1070/1172 [2:34:59<14:34,  8.57s/question]

Prediction: C, Correct: D, Running Accuracy: 64.67%


Processing ARC-Challenge Questions:  91%|█████████▏| 1071/1172 [2:35:06<13:32,  8.04s/question]

Prediction: Unknown, Correct: D, Running Accuracy: 64.61%


Processing ARC-Challenge Questions:  91%|█████████▏| 1072/1172 [2:35:14<13:10,  7.91s/question]

Prediction: A, Correct: A, Running Accuracy: 64.65%


In [None]:
from sklearn.metrics import accuracy_score

y_true = [p["answerKey"] for p in predictions]
y_pred = [p["extracted_answer"] for p in predictions]

accuracy = accuracy_score(y_true, y_pred)
print(f"ARC Benchmark Accuracy: {accuracy:.2%}")

In [None]:
import pandas as pd

df = pd.DataFrame(predictions)

df.to_csv("model_predictions.csv", index=False)