# Setup

## Make both T4 GPUs visiable to CUDA

In [1]:
import os, math, numpy as np
os.environ["CUDA_VISIBLE_DEVICES"]="0,1"

## Install vLLM

In [6]:
%%time
!pip uninstall -y torch
!pip install -U --no-index --find-links=/kaggle/input/vllm-whl -U vllm
!pip install -U --upgrade /kaggle/input/vllm-t4-fix/grpcio-1.62.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
!pip install -U --upgrade /kaggle/input/vllm-t4-fix/ray-2.11.0-cp310-cp310-manylinux2014_x86_64.whl

Found existing installation: torch 2.1.2
Uninstalling torch-2.1.2:
  Successfully uninstalled torch-2.1.2
Looking in links: /kaggle/input/vllm-whl
Processing /kaggle/input/vllm-whl/vllm-0.4.0.post1-cp310-cp310-manylinux1_x86_64.whl
Processing /kaggle/input/vllm-whl/torch-2.1.2-cp310-cp310-manylinux1_x86_64.whl (from vllm)
Processing /kaggle/input/vllm-whl/xformers-0.0.23.post1-cp310-cp310-manylinux2014_x86_64.whl (from vllm)
Processing /kaggle/input/vllm-whl/outlines-0.0.34-py3-none-any.whl (from vllm)
Installing collected packages: torch, xformers, outlines, vllm
Successfully installed outlines-0.0.34 torch-2.1.2 vllm-0.4.0.post1 xformers-0.0.23.post1
Processing /kaggle/input/vllm-t4-fix/grpcio-1.62.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
grpcio is already installed with the same version as the provided wheel. Use --force-reinstall to force an installation of the wheel.
Processing /kaggle/input/vllm-t4-fix/ray-2.11.0-cp310-cp310-manylinux2014_x86_64.whl
ray is alr

## Some Configuration

In [35]:
# In DEBUG mode, infer only on 5 problems
DEBUG = False
# Number of candidate solutions to generate
K = 7
DEPTH = 4
TEMPERATURE = 0.75
TOP_P = 0.85
BATCH_SIZE = 64

## Imports

In [8]:
import vllm
import re
import csv
import torch
import gc
from tqdm import tqdm
import pandas as pd
from queue import Queue, Empty
import os
import re
import signal
import subprocess
import tempfile
from collections import Counter
from contextlib import contextmanager

import threading
from concurrent.futures import ThreadPoolExecutor, as_completed

2024-10-20 16:10:20,550	INFO util.py:124 -- Outdated packages:
  ipywidgets==7.7.1 found, needs ipywidgets>=8
Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


# Python Code Execution Environment

In [9]:
1

1

## Find Python code blocks within text

In [14]:
def find_python_blocks(text):
    blocks = re.findall(r"(```python.*?```)", text, re.DOTALL)
    # filter blocks by trying to convert them to float or int
    filtered_blocks = []
    for block in blocks:
        code = block[len("```python"):-len("```")].strip()
        try:
            x = int(code)
        except:
            filtered_blocks.append(code)
            continue
        try:
            x = float(code)
        except:
            filtered_blocks.append(code)
    return filtered_blocks        

## Class to Execute Python code (adopted from Numina)

In [15]:
class PythonREPL:
    def __init__(self, timeout=5):
        self.timeout = timeout
    # handles timeout
    @contextmanager
    def time_limit(self, seconds):
        def signal_handler(*_):
            raise TimeoutError(f"Timed out after {seconds} seconds.")

        signal.signal(signal.SIGALRM, signal_handler)
        signal.alarm(seconds)
        try:
            yield
        finally:
            signal.alarm(0)

    def __call__(self, query):
        query = "import math\nimport numpy as np\nimport sympy as sp\n" + query
        query = query.strip().split("\n")
        if "print(" not in query[-1]:
            if "#" in query[-1]:
                query[-1] = query[-1].split("#")[0]
            query[-1] = "print(" + query[-1] + ")"
        query = "\n".join(query)
        with tempfile.TemporaryDirectory() as temp_dir:
            temp_file_path = os.path.join(temp_dir, "tmp.py")
            with open(temp_file_path, "w", encoding="utf-8") as f:
                f.write(query)
            with self.time_limit(self.timeout):
                result = subprocess.run(
                    ["python3", temp_file_path],
                    capture_output=True,
                    check=False,
                    text=True,
                    timeout=self.timeout,
                )
                if result.returncode == 0:
                    output = result.stdout
                    return True, output.strip()
                error_msg = result.stderr.strip()
                msgs = error_msg.split("\n")
                new_msgs = []
                want_next = False
                for m in msgs:
                    if "Traceback" in m:
                        new_msgs.append(m)
                    elif m == msgs[-1]:
                        new_msgs.append(m)
                    elif temp_file_path in m:
                        st = m.index('"/') + 1 if '"/' in m else 0
                        ed = m.index(temp_file_path) + 1 if temp_file_path in m else None
                        clr = m[st:ed] if not ed else m[st:]
                        m = m.replace(clr, "")
                        new_msgs.append(m)
                        want_next = True
                    elif want_next:
                        new_msgs.append(m)
                        want_next = False
                error_msg = "\n".join(new_msgs)
                return False, error_msg.strip()

## Execute a Python code block

In [16]:
def execute(executor, code):
    success = False
    for lib in ("subprocess", "venv"):
        if lib in code:
            output = f"{lib} is not allowed"
            outputs.append(output)
            successes.append(success)
            continue
    try:
        success, output = executor(code)
    except TimeoutError as e:
        output = str(e)

    output = output.strip()
    
    return output, success

## Test by running some python code

In [17]:
text = """Block 1
```python
s = 0
for i in range(100):
    s += i
print(s)
```
Block 2
```python
2**12
```
Block 3
```python
3
```
"""

In [18]:
blocks = find_python_blocks(text)
blocks

['s = 0\nfor i in range(100):\n    s += i\nprint(s)', '2**12']

In [19]:
executor = PythonREPL()
outputs = [execute(executor, block) for block in blocks]
outputs

[('4950', True), ('4096', True)]

In [20]:
for block, output in zip(blocks, outputs):
    print(f"""
```python
{block}
```
```output
{output[0]}
```"""
    )


```python
s = 0
for i in range(100):
    s += i
print(s)
```
```output
4950
```

```python
2**12
```
```output
4096
```


# Load Model on vLLM

## We use the Qwen 2.5 7b Instruct Model here by Alibaba. You should explore other models.

In [21]:
llm = vllm.LLM(
    "AI-MO/NuminaMath-7B-TIR",
    tensor_parallel_size=2, 
    gpu_memory_utilization=0.95, 
    trust_remote_code=True,
    dtype="half", 
    enforce_eager=True,
    max_model_len=4096,    
)
tokenizer = llm.get_tokenizer()

config.json:   0%|          | 0.00/716 [00:00<?, ?B/s]



2024-10-20 16:11:31,973	INFO worker.py:1749 -- Started a local Ray instance.


INFO 10-20 16:11:33 llm_engine.py:74] Initializing an LLM engine (v0.4.0.post1) with config: model='AI-MO/NuminaMath-7B-TIR', tokenizer='AI-MO/NuminaMath-7B-TIR', tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.float16, max_seq_len=4096, download_dir=None, load_format=auto, tensor_parallel_size=2, disable_custom_all_reduce=True, quantization=None, enforce_eager=True, kv_cache_dtype=auto, device_config=cuda, seed=0)


tokenizer_config.json:   0%|          | 0.00/1.23k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/4.61M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

INFO 10-20 16:11:42 selector.py:40] Cannot use FlashAttention backend for Volta and Turing GPUs.
INFO 10-20 16:11:42 selector.py:25] Using XFormers backend.
[36m(RayWorkerVllm pid=421)[0m INFO 10-20 16:11:43 selector.py:40] Cannot use FlashAttention backend for Volta and Turing GPUs.
[36m(RayWorkerVllm pid=421)[0m INFO 10-20 16:11:43 selector.py:25] Using XFormers backend.
[36m(RayWorkerVllm pid=421)[0m INFO 10-20 16:11:44 pynccl_utils.py:45] vLLM is using nccl==2.18.1
INFO 10-20 16:11:44 pynccl_utils.py:45] vLLM is using nccl==2.18.1
INFO 10-20 16:11:45 weight_utils.py:177] Using model weights format ['*.safetensors']
[36m(RayWorkerVllm pid=421)[0m INFO 10-20 16:11:45 weight_utils.py:177] Using model weights format ['*.safetensors']


model-00003-of-00003.safetensors:   0%|          | 0.00/3.85G [00:00<?, ?B/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

INFO 10-20 16:12:49 model_runner.py:104] Loading model weights took 6.4663 GB
[36m(RayWorkerVllm pid=421)[0m INFO 10-20 16:12:50 model_runner.py:104] Loading model weights took 6.4663 GB
INFO 10-20 16:12:53 ray_gpu_executor.py:240] # GPU blocks: 1755, # CPU blocks: 1092


# Utilites

## Extract boxed answer

In [22]:
def extract_answer(text):
    # find right most boxed answer
    def last_boxed_only_string(text):
        idx = text.rfind("\\boxed")
        if idx < 0:
            idx = text.rfind("\\fbox")
            if idx < 0:
                return None
        i = idx
        right_brace_idx = None
        num_left_braces_open = 0
        while i < len(text):
            if text[i] == "{":
                num_left_braces_open += 1
            if text[i] == "}":
                num_left_braces_open -= 1
                if num_left_braces_open == 0:
                    right_brace_idx = i
                    break
            i += 1
        if right_brace_idx is None:
            return None
        return text[idx : right_brace_idx + 1]
    # get content of boxed
    def remove_boxed(boxed):
        left = "\\boxed{"
        try:
            assert boxed[: len(left)] == left
            assert boxed[-1] == "}"
            length = len(left)
            return boxed[length:-1]
        except Exception:
            return None

    boxed = last_boxed_only_string(text)
    if boxed is None:
        return None
    answer = remove_boxed(boxed)
    return answer

## Majority vote (select the most occuring answer)

In [23]:
# Define the majority voting function to get the most common answer
def majority_vote(answers):
    answers = [answer for answer in answers if answer is not None]

    if not answers:
        return None
    # count the occurence of each answer
    counts = {}
    for answer in answers:
        if answer in counts:
            counts[answer] += 1
        else:
            counts[answer] = 1

    max_answer = None
    max_count = 0
    
    for answer, count in counts.items():
        if count > max_count:
            max_answer = answer
            max_count = count
    
    return max_answer

# TIR Agent

In [25]:
class TIRAgent:
    def __init__(self, problem_id, id, problem, tokenizer, max_depth, log):
        # problem id
        self.problem_id = problem_id
        # id of the agent
        self.id = id
        # number of LLM turns
        self.depth = 1
        # maximum number of turns allowed
        self.max_depth = max_depth
        # LLM's tokenizer
        self.tokenizer = tokenizer
        # Problem statement
        self.problem = problem
        # Chat Messages
        self.messages = [
            {
                "role": "user", 
                "content": f"""Here is a math problem in Bengali:
{self.problem}
The answer is a non-negative integer. Please reason step by step to solve the problem above. Provide python code to verify your reasoning.
Put your final integer answer within \\boxed{{}}."""
            }
        ]
        # Last response from the LLM
        self.last_response = None
        # Code blocks from the last response
        self.blocks = []
        # Answers that the LLM generated in \boxed{}
        self.answers = []
        # No python code generated in last response or max_depth reached
        self.is_complete = False
        # File to log answers
        self.log = log
        # Next prompt to the LLM
        self.next_prompt = None
        
    def complete(self):
        # is the Agent done
        return self.is_complete
    
    def add_response(self, response, executor):
        self.depth += 1
        # remember this response
        self.last_response = response
        # add this to the messages history
        self.messages.append({"role": "assistant", "content": response})
        # extract python blocks
        self.blocks = find_python_blocks(response)
        # extract answer from the generated text, if present
        answer = extract_answer(response)
        if answer is not None:
            self.answers.append(answer)
        # is it done?
        self.is_complete = not self._should_continue()
        # if not, use the python executor to create next prompt
        if not self.is_complete:
            self.next_prompt = self._next_prompt(executor)   
            self.messages.append({"role": "user", "content": self.next_prompt})
    
    def _should_continue(self):        
        # quit if max_depth number of turns reached
        if self.depth >= self.max_depth:
            return False
        # if no python code generated, we can stop now
        elif len(self.blocks) > 0:
            return True
        return False
    
    def _next_prompt(self, executor):
        assert not self.is_complete
        assert len(self.blocks) > 0
        # get code result from python execution
        output, status = execute(executor, self.blocks[-1])
        
        prompt = ''
        # if code succeeds give the output
        if status:
            prompt = f"""The python code you provided gives the following output:
```python
{self.blocks[-1]}
```
```output
{output}
```"""
        # if code fails, give the error
        else:
            prompt = f"""The python code you provided gives the following error:
```python
{self.blocks[-1]}
```
```output
{output}
```"""
        return prompt
    
    
    def next_message(self):
        assert not self.is_complete 
        # apply chat template to get the text
        text = self.tokenizer.apply_chat_template(
            self.messages,
            tokenize=False,
            add_generation_prompt=True
        )
        
        return text
        
    
    def final_answer(self):
        # if there no answers yet, we have to return None
        ans = None
        # otherwise return the latest answer
        if len(self.answers) > 0:
            ans = self.answers[-1]
        # log to file
        if self.log:
            self.log.writerow([self.problem_id, self.id, ans])
        # try to convert to integer
        try:
            ans = int(ans)
        except:
            ans = None
        
        return ans        

# Sc-TIR Agent

In [26]:
class SCTIRAgent:
    def __init__(self, problem_id, problem, tokenizer, samples, max_depth, log):
        # problem id
        self.problem_id = problem_id
        # problem statement
        self.problem = problem
        # LLM's tokenizer
        self.tokenizer = tokenizer
        # number of TIRAgents to create
        self.samples = samples
        # maximum number of turns
        self.max_depth = max_depth
        # TIR Agents
        self.agents = [TIRAgent(problem_id, i, problem, tokenizer, max_depth, log) for i in range(samples)]
        # log file
        self.log = log
    
    def complete(self):
        # only complete when all agents are done
        for agent in self.agents:
            if not agent.complete():
                return False
        return True
        
    def get_ready_agents(self):
        # return agents that are not complete yet
        ready_agents = []
        for agent in self.agents:
            if not agent.complete():
                ready_agents.append(agent)
        return ready_agents
    
    def final_answer(self):
        # majority vote agent answers
        assert self.complete()
        answers = [agent.final_answer() for agent in self.agents]
        answer = majority_vote(answers)
        if answer is None:
            return 0
        return answer

# Load Test Set

In [27]:
test_df = pd.read_csv('/kaggle/input/dlsprint3/test.csv')
test_df

Unnamed: 0,ID,Problem
0,0,একটি কেক-কে সরলরৈখিকভাবে 2 বার কেটে সর্বোচ্চ ক...
1,1,একটি পুকুরের উপর 100 টি পাথর রাখা আছে। প্রথমে ...
2,2,"ধরো $f(x) = x^{67-x^{67-x^{67-\dots}}}$, যেখান..."
3,3,সামিন ও স্বর্গ গণনার জন্য শুধু 0 আর 1 ব্যবহার ...
4,4,"A, B, C তিনটি স্ট্যান্ড। A স্ট্যান্ড থেকে 2 টি..."
...,...,...
95,95,একটি বক্সে কিছু লাল এবং নীল বল রয়েছে। যদি আরও ...
96,96,একটি চার অঙ্কের পূর্ণসংখ্যা 11 দ্বারা নিঃশেষে ...
97,97,এমন কতগুলো ধনাত্মক পূর্ণসংখ্যা আছে যারা $12^{1...
98,98,"3 টি ক্রমিক পূর্ণসংখ্যার যোগফল 216 হলে, সবচেয়..."


In [21]:
# import pandas as pd

# translated_df = pd.read_csv('/kaggle/input/translated/translation.csv')

In [22]:
# for index, row in translated_df.iterrows():
#     cleaned_problem = row['Problem'].split("English Statement:", 1)[0].rstrip()
#     translated_df.at[index, 'Problem'] = cleaned_problem
    
#     print(f"i{index}", row['Problem'], '\n')


In [23]:
# test_df = translated_df

## Load 5 problems since we are short on time

In [28]:
if DEBUG:
    test_df = test_df
    torch.cuda.empty_cache()
    gc.collect()

# Configure LLM and Python REPL

In [36]:
sampling_params = vllm.SamplingParams(max_tokens=3582, temperature=TEMPERATURE, top_p=TOP_P)
executor = PythonREPL()

# Run the Agents

## TIR Agent

In [None]:
# for row in test_df.values[:2]:
#     problem_id = row[0]
#     problem = row[1]
    
#     agent = TIRAgent(problem_id, 0, problem, tokenizer, max_depth=4, log=None)
    
#     while not agent.complete():
#         text = agent.next_message()
#         # get response from LLM
#         response = llm.generate([text], sampling_params)
#         # pass in python executor, since response might contain python code
#         agent.add_response(response[0].outputs[0].text, executor)
    
#     for message in agent.messages:
#         print(f"Role: {message['role']}\n")
#         print(f"Content:\n {message['content']}\n")
    
#     answer = agent.final_answer()
#     print(f"Final answer: {answer}")
    

## SC-TIR Agent

In [None]:
# for row in test_df.values:
#     problem_id = row[0]
#     problem = row[1]
    
#     agent = SCTIRAgent(problem_id, problem, tokenizer, samples=2, max_depth=4, log=None)
    
#     while not agent.complete():
#         ready_agents = agent.get_ready_agents()
#         texts = [a.next_message() for a in ready_agents]
#         # get response from LLM
#         responses = llm.generate(texts, sampling_params)
#         # pass response to the agents
#         for i, ready_agent in enumerate(ready_agents):
#             ready_agent.add_response(responses[i].outputs[0].text, executor)
    
#     answer = agent.final_answer()
#     print(f"Problem: {problem}")
#     print(f"Final answer: {answer}")

# Run Inference

# Create submission

## Also log agent answers

In [37]:
file = open('submission.csv', 'w', encoding='utf-8')
log_file = open('log.csv', 'w', encoding='utf-8')

submission = csv.writer(file)
log = csv.writer(log_file)

submission.writerow(['ID', 'Answer'])
log.writerow(['ID', "Agent ID", 'Answer'])

20

## Configure LLM sampling parameters and Python REPL

## Use a queue to Batch inference

In [38]:
%%time

boxed_answers = {}
agents = []

q = Queue()

iterator = iter(tqdm(test_df.values))

while True:
    for agent in agents:
        if agent.complete():
            boxed_answers[agent.problem_id] = agent.final_answer()
            print(agent.final_answer())

    agents[:] = list(filter(lambda a: not a.complete(), agents))

    while q.qsize() < BATCH_SIZE:
        try:
            row = next(iterator)
        except StopIteration:
            break

        id = row[0]
        problem = row[1]

        agent = SCTIRAgent(id, problem, tokenizer, K, DEPTH, log)
        
        agents.append(agent)

        for tir_agent in agent.get_ready_agents():
            q.put_nowait(tir_agent)
            
    if q.empty():
        break
        
    
    ready_agents = []
    texts = []
    for _ in range(BATCH_SIZE):
        try:
            agent = q.get_nowait()
            ready_agents.append(agent)
            texts.append(agent.next_message())
        except:
            break

    
    responses = llm.generate(texts, sampling_params)
    responses = [response.outputs[0].text for response in responses]
    
    for i in range(len(ready_agents)):
        agent = ready_agents[i]
        response = responses[i]
        agent.add_response(response, executor)
        if not agent.complete():
            q.put_nowait(agent)
   

  0%|          | 0/100 [00:00<?, ?it/s]
Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A
Processed prompts:   2%|▏         | 1/64 [00:30<32:16, 30.73s/it][A
Processed prompts:   3%|▎         | 2/64 [00:37<17:25, 16.86s/it][A
Processed prompts:   5%|▍         | 3/64 [00:43<12:02, 11.85s/it][A
Processed prompts:   6%|▋         | 4/64 [00:44<07:17,  7.29s/it][A
Processed prompts:   8%|▊         | 5/64 [00:49<06:29,  6.60s/it][A
Processed prompts:   9%|▉         | 6/64 [00:49<04:20,  4.49s/it][A
Processed prompts:  11%|█         | 7/64 [00:51<03:20,  3.52s/it][A
Processed prompts:  12%|█▎        | 8/64 [00:51<02:24,  2.58s/it][A
Processed prompts:  14%|█▍        | 9/64 [00:52<01:53,  2.07s/it][A
Processed prompts:  17%|█▋        | 11/64 [00:53<01:08,  1.29s/it][A
Processed prompts:  19%|█▉        | 12/64 [00:54<00:57,  1.11s/it][A
Processed prompts:  20%|██        | 13/64 [00:54<00:49,  1.02it/s][A
Processed prompts:  22%|██▏       | 14/64 [00:55<00:37,  1.32it/s][




Processed prompts:   2%|▏         | 1/64 [00:07<07:25,  7.08s/it][A
Processed prompts:   3%|▎         | 2/64 [00:13<06:50,  6.62s/it][A
Processed prompts:   5%|▍         | 3/64 [00:15<04:29,  4.41s/it][A
Processed prompts:   6%|▋         | 4/64 [00:16<03:01,  3.03s/it][A
Processed prompts:   8%|▊         | 5/64 [00:17<02:29,  2.53s/it][A
Processed prompts:   9%|▉         | 6/64 [00:18<01:56,  2.00s/it][A
Processed prompts:  11%|█         | 7/64 [00:21<02:09,  2.28s/it][A
Processed prompts:  12%|█▎        | 8/64 [00:24<02:27,  2.64s/it][A
Processed prompts:  14%|█▍        | 9/64 [00:33<04:02,  4.41s/it][A
Processed prompts:  16%|█▌        | 10/64 [00:49<07:15,  8.07s/it][A
Processed prompts:  17%|█▋        | 11/64 [00:54<06:16,  7.10s/it][A
Processed prompts:  19%|█▉        | 12/64 [00:57<05:01,  5.79s/it][A
Processed prompts:  20%|██        | 13/64 [00:58<03:49,  4.51s/it][A
Processed prompts:  22%|██▏       | 14/64 [01:01<03:20,  4.01s/it][A
Processed prompts:  23%|██▎ 




Processed prompts:  47%|████▋     | 30/64 [03:02<04:28,  7.90s/it][A
Processed prompts:  48%|████▊     | 31/64 [03:19<05:46, 10.51s/it][A
Processed prompts:  50%|█████     | 32/64 [03:33<06:16, 11.77s/it][A
Processed prompts:  52%|█████▏    | 33/64 [03:36<04:41,  9.10s/it][A
Processed prompts:  53%|█████▎    | 34/64 [03:39<03:38,  7.28s/it][A
Processed prompts:  55%|█████▍    | 35/64 [03:48<03:45,  7.79s/it][A
Processed prompts:  56%|█████▋    | 36/64 [03:49<02:42,  5.79s/it][A
Processed prompts:  58%|█████▊    | 37/64 [03:53<02:14,  4.99s/it][A
Processed prompts:  59%|█████▉    | 38/64 [03:59<02:23,  5.52s/it][A
Processed prompts:  61%|██████    | 39/64 [04:01<01:47,  4.29s/it][A
Processed prompts:  62%|██████▎   | 40/64 [04:02<01:18,  3.29s/it][A
Processed prompts:  64%|██████▍   | 41/64 [04:03<01:00,  2.63s/it][A
Processed prompts:  66%|██████▌   | 42/64 [04:05<00:53,  2.45s/it][A
Processed prompts:  67%|██████▋   | 43/64 [04:06<00:43,  2.07s/it][A
Processed prompts: 

3
10
3
225
40
0
120
334



Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A
Processed prompts:   2%|▏         | 1/64 [00:12<13:31, 12.88s/it][A
Processed prompts:   3%|▎         | 2/64 [00:18<09:02,  8.75s/it][A
Processed prompts:   5%|▍         | 3/64 [00:20<05:35,  5.50s/it][A
Processed prompts:   6%|▋         | 4/64 [00:32<08:00,  8.01s/it][A
Processed prompts:   8%|▊         | 5/64 [00:33<05:21,  5.45s/it][A
Processed prompts:   9%|▉         | 6/64 [00:37<05:02,  5.22s/it][A
Processed prompts:  12%|█▎        | 8/64 [00:40<03:10,  3.40s/it][A
Processed prompts:  14%|█▍        | 9/64 [00:41<02:24,  2.63s/it][A
Processed prompts:  16%|█▌        | 10/64 [00:45<02:48,  3.12s/it][A
Processed prompts:  17%|█▋        | 11/64 [00:47<02:24,  2.73s/it][A
Processed prompts:  19%|█▉        | 12/64 [00:51<02:44,  3.17s/it][A
Processed prompts:  20%|██        | 13/64 [00:58<03:25,  4.03s/it][A
Processed prompts:  22%|██▏       | 14/64 [01:01<03:12,  3.85s/it][A
Processed prompts:  23%|██▎       | 

14
63



Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A
Processed prompts:   2%|▏         | 1/64 [00:16<17:36, 16.78s/it][A
Processed prompts:   3%|▎         | 2/64 [00:19<08:43,  8.45s/it][A
Processed prompts:   5%|▍         | 3/64 [00:25<07:17,  7.17s/it][A
Processed prompts:   6%|▋         | 4/64 [00:29<06:06,  6.12s/it][A
Processed prompts:   8%|▊         | 5/64 [00:32<04:53,  4.97s/it][A
Processed prompts:   9%|▉         | 6/64 [00:34<03:52,  4.01s/it][A
Processed prompts:  11%|█         | 7/64 [00:38<03:49,  4.03s/it][A
Processed prompts:  12%|█▎        | 8/64 [00:39<02:44,  2.93s/it][A
Processed prompts:  14%|█▍        | 9/64 [00:43<03:04,  3.35s/it][A
Processed prompts:  16%|█▌        | 10/64 [00:49<03:48,  4.24s/it][A
Processed prompts:  17%|█▋        | 11/64 [00:59<05:07,  5.80s/it][A
Processed prompts:  19%|█▉        | 12/64 [00:59<03:40,  4.23s/it][A
Processed prompts:  20%|██        | 13/64 [01:01<03:02,  3.58s/it][A
Processed prompts:  22%|██▏       | 1

2
8
1901



Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A




Processed prompts:   2%|▏         | 1/64 [00:04<04:27,  4.25s/it][A
Processed prompts:   3%|▎         | 2/64 [00:14<07:59,  7.74s/it][A
Processed prompts:   5%|▍         | 3/64 [00:15<04:58,  4.90s/it][A
Processed prompts:   6%|▋         | 4/64 [00:34<10:12, 10.20s/it][A
Processed prompts:   8%|▊         | 5/64 [00:36<07:18,  7.43s/it][A
Processed prompts:   9%|▉         | 6/64 [00:40<05:56,  6.15s/it][A
Processed prompts:  11%|█         | 7/64 [00:49<06:51,  7.22s/it][A
Processed prompts:  12%|█▎        | 8/64 [00:51<05:07,  5.50s/it][A
Processed prompts:  14%|█▍        | 9/64 [00:55<04:24,  4.82s/it][A
Processed prompts:  16%|█▌        | 10/64 [01:06<06:05,  6.77s/it][A
Processed prompts:  17%|█▋        | 11/64 [01:14<06:29,  7.34s/it][A




Processed prompts:  19%|█▉        | 12/64 [01:16<04:49,  5.56s/it][A
Processed prompts:  20%|██        | 13/64 [01:18<03:44,  4.41s/it][A
Processed prompts:  22%|██▏       | 14/64 [01:19<02:58,  3.58s/it][A
Processed prompts:  23%|██▎       | 15/64 [01:25<03:25,  4.20s/it][A
Processed prompts:  25%|██▌       | 16/64 [01:26<02:34,  3.23s/it][A
Processed prompts:  27%|██▋       | 17/64 [01:26<01:53,  2.42s/it][A
Processed prompts:  28%|██▊       | 18/64 [01:28<01:39,  2.15s/it][A
Processed prompts:  30%|██▉       | 19/64 [01:31<01:53,  2.52s/it][A
Processed prompts:  33%|███▎      | 21/64 [01:42<02:42,  3.78s/it][A




Processed prompts:  34%|███▍      | 22/64 [01:43<02:09,  3.08s/it][A
Processed prompts:  36%|███▌      | 23/64 [01:45<01:53,  2.77s/it][A
Processed prompts:  38%|███▊      | 24/64 [01:52<02:38,  3.97s/it][A
Processed prompts:  39%|███▉      | 25/64 [01:56<02:37,  4.05s/it][A
Processed prompts:  41%|████      | 26/64 [02:00<02:31,  3.99s/it][A
Processed prompts:  42%|████▏     | 27/64 [02:02<02:06,  3.43s/it][A
Processed prompts:  44%|████▍     | 28/64 [02:03<01:37,  2.70s/it][A
Processed prompts:  45%|████▌     | 29/64 [02:12<02:45,  4.73s/it][A
Processed prompts:  47%|████▋     | 30/64 [02:17<02:40,  4.71s/it][A
Processed prompts:  48%|████▊     | 31/64 [02:24<02:55,  5.30s/it][A
Processed prompts:  50%|█████     | 32/64 [02:25<02:12,  4.13s/it][A
Processed prompts:  52%|█████▏    | 33/64 [02:31<02:25,  4.70s/it][A
Processed prompts:  53%|█████▎    | 34/64 [02:32<01:47,  3.59s/it][A
Processed prompts:  55%|█████▍    | 35/64 [02:39<02:10,  4.49s/it][A
Processed prompts: 

1000
16000
114000
11
8



Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A




Processed prompts:   2%|▏         | 1/64 [00:01<01:09,  1.10s/it][A
Processed prompts:   3%|▎         | 2/64 [00:12<07:25,  7.19s/it][A
Processed prompts:   5%|▍         | 3/64 [00:14<04:48,  4.73s/it][A
Processed prompts:   6%|▋         | 4/64 [00:16<03:32,  3.55s/it][A
Processed prompts:   8%|▊         | 5/64 [00:17<02:50,  2.90s/it][A
Processed prompts:   9%|▉         | 6/64 [00:19<02:24,  2.50s/it][A
Processed prompts:  11%|█         | 7/64 [00:20<01:48,  1.91s/it][A
Processed prompts:  12%|█▎        | 8/64 [00:24<02:35,  2.78s/it][A
Processed prompts:  14%|█▍        | 9/64 [00:28<02:46,  3.03s/it][A
Processed prompts:  16%|█▌        | 10/64 [00:31<02:45,  3.06s/it][A
Processed prompts:  19%|█▉        | 12/64 [00:35<02:12,  2.55s/it][A
Processed prompts:  20%|██        | 13/64 [00:41<02:56,  3.45s/it][A
Processed prompts:  22%|██▏       | 14/64 [00:47<03:26,  4.13s/it][A
Processed prompts:  23%|██▎       | 15/64 [00:48<02:44,  3.35s/it][A
Processed prompts:  25%|██▌ 

17
24
97



Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A




Processed prompts:   2%|▏         | 1/64 [00:04<04:45,  4.53s/it][A
Processed prompts:   5%|▍         | 3/64 [00:17<06:11,  6.09s/it][A
Processed prompts:   6%|▋         | 4/64 [00:21<05:13,  5.23s/it][A
Processed prompts:   8%|▊         | 5/64 [00:25<04:44,  4.82s/it][A
Processed prompts:   9%|▉         | 6/64 [00:32<05:30,  5.70s/it][A
Processed prompts:  11%|█         | 7/64 [00:34<04:05,  4.31s/it][A
Processed prompts:  12%|█▎        | 8/64 [00:36<03:21,  3.61s/it][A
Processed prompts:  14%|█▍        | 9/64 [00:42<03:57,  4.32s/it][A
Processed prompts:  16%|█▌        | 10/64 [00:43<03:01,  3.36s/it][A
Processed prompts:  17%|█▋        | 11/64 [00:44<02:18,  2.61s/it][A
Processed prompts:  19%|█▉        | 12/64 [00:44<01:45,  2.03s/it][A
Processed prompts:  20%|██        | 13/64 [00:46<01:30,  1.78s/it][A
Processed prompts:  22%|██▏       | 14/64 [00:48<01:39,  1.99s/it][A
Processed prompts:  23%|██▎       | 15/64 [01:01<04:14,  5.20s/it][A
Processed prompts:  25%|██▌

96
15
1
145



Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A




Processed prompts:   2%|▏         | 1/64 [00:05<05:52,  5.60s/it][A
Processed prompts:   3%|▎         | 2/64 [00:15<08:12,  7.94s/it][A
Processed prompts:   5%|▍         | 3/64 [00:23<08:08,  8.01s/it][A
Processed prompts:   6%|▋         | 4/64 [00:39<11:16, 11.28s/it][A
Processed prompts:   8%|▊         | 5/64 [00:53<12:07, 12.34s/it][A
Processed prompts:   9%|▉         | 6/64 [00:56<08:48,  9.10s/it][A
Processed prompts:  11%|█         | 7/64 [01:03<07:56,  8.37s/it][A
Processed prompts:  12%|█▎        | 8/64 [01:08<06:48,  7.29s/it][A
Processed prompts:  14%|█▍        | 9/64 [01:15<06:30,  7.11s/it][A
Processed prompts:  16%|█▌        | 10/64 [01:18<05:14,  5.83s/it][A
Processed prompts:  17%|█▋        | 11/64 [01:21<04:33,  5.16s/it][A
Processed prompts:  19%|█▉        | 12/64 [01:24<03:48,  4.40s/it][A
Processed prompts:  20%|██        | 13/64 [01:26<03:08,  3.69s/it][A
Processed prompts:  22%|██▏       | 14/64 [01:30<03:10,  3.82s/it][A
Processed prompts:  23%|██▎ 

4105
0
3



Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A




Processed prompts:   2%|▏         | 1/64 [00:05<05:39,  5.38s/it][A




Processed prompts:   5%|▍         | 3/64 [00:09<02:49,  2.79s/it][A
Processed prompts:   6%|▋         | 4/64 [00:15<03:52,  3.87s/it][A
Processed prompts:   8%|▊         | 5/64 [00:22<05:05,  5.17s/it][A
Processed prompts:   9%|▉         | 6/64 [00:26<04:22,  4.53s/it][A
Processed prompts:  11%|█         | 7/64 [00:30<04:24,  4.64s/it][A
Processed prompts:  12%|█▎        | 8/64 [00:46<07:29,  8.03s/it][A
Processed prompts:  14%|█▍        | 9/64 [00:47<05:16,  5.75s/it][A
Processed prompts:  16%|█▌        | 10/64 [00:50<04:24,  4.90s/it][A
Processed prompts:  17%|█▋        | 11/64 [00:52<03:38,  4.13s/it][A
Processed prompts:  19%|█▉        | 12/64 [00:53<02:52,  3.32s/it][A
Processed prompts:  20%|██        | 13/64 [00:56<02:44,  3.23s/it][A
Processed prompts:  22%|██▏       | 14/64 [00:57<02:07,  2.55s/it][A
Processed prompts:  23%|██▎       | 15/64 [01:05<03:20,  4.08s/it][A
Processed prompts:  25%|██▌       | 16/64 [01:07<02:40,  3.34s/it][A
Processed prompts:  27%|██

29
1
201
280



Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A
Processed prompts:   2%|▏         | 1/64 [00:20<21:00, 20.01s/it][A
Processed prompts:   3%|▎         | 2/64 [00:21<09:24,  9.10s/it][A
Processed prompts:   5%|▍         | 3/64 [00:29<08:31,  8.39s/it][A
Processed prompts:   6%|▋         | 4/64 [00:31<06:10,  6.17s/it][A
Processed prompts:   8%|▊         | 5/64 [00:41<07:23,  7.52s/it][A
Processed prompts:   9%|▉         | 6/64 [00:49<07:16,  7.53s/it][A
Processed prompts:  11%|█         | 7/64 [00:53<06:01,  6.35s/it][A
Processed prompts:  12%|█▎        | 8/64 [00:56<05:08,  5.51s/it][A
Processed prompts:  14%|█▍        | 9/64 [01:01<04:39,  5.08s/it][A
Processed prompts:  16%|█▌        | 10/64 [01:06<04:42,  5.23s/it][A
Processed prompts:  17%|█▋        | 11/64 [01:07<03:23,  3.84s/it][A
Processed prompts:  19%|█▉        | 12/64 [01:12<03:45,  4.33s/it][A
Processed prompts:  20%|██        | 13/64 [01:18<03:58,  4.68s/it][A
Processed prompts:  22%|██▏       | 1




Processed prompts:  28%|██▊       | 18/64 [01:27<01:32,  2.01s/it][A
Processed prompts:  30%|██▉       | 19/64 [01:28<01:18,  1.76s/it][A
Processed prompts:  31%|███▏      | 20/64 [01:34<02:01,  2.77s/it][A
Processed prompts:  33%|███▎      | 21/64 [01:40<02:45,  3.85s/it][A
Processed prompts:  34%|███▍      | 22/64 [01:41<02:08,  3.05s/it][A
Processed prompts:  36%|███▌      | 23/64 [01:43<01:51,  2.73s/it][A
Processed prompts:  38%|███▊      | 24/64 [01:45<01:33,  2.34s/it][A
Processed prompts:  39%|███▉      | 25/64 [01:49<02:00,  3.10s/it][A
Processed prompts:  41%|████      | 26/64 [01:51<01:45,  2.77s/it][A
Processed prompts:  42%|████▏     | 27/64 [01:52<01:19,  2.15s/it][A
Processed prompts:  44%|████▍     | 28/64 [01:54<01:12,  2.00s/it][A
Processed prompts:  45%|████▌     | 29/64 [02:00<01:56,  3.34s/it][A
Processed prompts:  47%|████▋     | 30/64 [02:14<03:39,  6.45s/it][A
Processed prompts:  48%|████▊     | 31/64 [02:18<03:05,  5.64s/it][A
Processed prompts: 

17
1224
2



Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A




Processed prompts:   2%|▏         | 1/64 [00:00<00:52,  1.20it/s][A




Processed prompts:   5%|▍         | 3/64 [00:02<00:41,  1.48it/s][A
Processed prompts:   6%|▋         | 4/64 [00:13<04:24,  4.41s/it][A
Processed prompts:   8%|▊         | 5/64 [00:14<03:14,  3.30s/it][A
Processed prompts:   9%|▉         | 6/64 [00:15<02:28,  2.56s/it][A
Processed prompts:  11%|█         | 7/64 [00:16<02:01,  2.12s/it][A
Processed prompts:  12%|█▎        | 8/64 [00:21<02:48,  3.00s/it][A
Processed prompts:  14%|█▍        | 9/64 [00:23<02:23,  2.61s/it][A
Processed prompts:  16%|█▌        | 10/64 [00:26<02:22,  2.65s/it][A
Processed prompts:  17%|█▋        | 11/64 [00:26<01:50,  2.08s/it][A
Processed prompts:  19%|█▉        | 12/64 [00:27<01:26,  1.67s/it][A
Processed prompts:  20%|██        | 13/64 [00:33<02:23,  2.81s/it][A
Processed prompts:  22%|██▏       | 14/64 [00:35<02:16,  2.74s/it][A
Processed prompts:  23%|██▎       | 15/64 [00:44<03:48,  4.66s/it][A
Processed prompts:  25%|██▌       | 16/64 [00:57<05:39,  7.06s/it][A
Processed prompts:  27%|██

12
0
50
252
9



Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A




Processed prompts:   2%|▏         | 1/64 [00:02<02:36,  2.49s/it][A
Processed prompts:   3%|▎         | 2/64 [00:29<17:40, 17.11s/it][A
Processed prompts:   5%|▍         | 3/64 [00:33<11:17, 11.11s/it][A
Processed prompts:   6%|▋         | 4/64 [00:36<07:48,  7.80s/it][A
Processed prompts:   8%|▊         | 5/64 [01:01<13:41, 13.92s/it][A
Processed prompts:   9%|▉         | 6/64 [01:03<09:36,  9.93s/it][A
Processed prompts:  11%|█         | 7/64 [01:12<09:05,  9.57s/it][A
Processed prompts:  12%|█▎        | 8/64 [01:15<06:54,  7.41s/it][A
Processed prompts:  14%|█▍        | 9/64 [01:21<06:38,  7.25s/it][A
Processed prompts:  16%|█▌        | 10/64 [01:28<06:20,  7.05s/it][A
Processed prompts:  17%|█▋        | 11/64 [01:30<04:54,  5.56s/it][A
Processed prompts:  19%|█▉        | 12/64 [01:34<04:21,  5.04s/it][A
Processed prompts:  20%|██        | 13/64 [01:48<06:33,  7.72s/it][A
Processed prompts:  22%|██▏       | 14/64 [01:52<05:22,  6.45s/it][A
Processed prompts:  23%|██▎ 

102
8
20



Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A




Processed prompts:   2%|▏         | 1/64 [00:01<01:22,  1.30s/it][A
Processed prompts:   3%|▎         | 2/64 [00:14<08:37,  8.35s/it][A
Processed prompts:   5%|▍         | 3/64 [00:17<06:07,  6.03s/it][A
Processed prompts:   6%|▋         | 4/64 [00:36<11:04, 11.07s/it][A
Processed prompts:   8%|▊         | 5/64 [00:39<07:49,  7.95s/it][A
Processed prompts:   9%|▉         | 6/64 [00:47<07:54,  8.18s/it][A
Processed prompts:  11%|█         | 7/64 [00:48<05:21,  5.64s/it][A
Processed prompts:  12%|█▎        | 8/64 [00:49<04:01,  4.32s/it][A
Processed prompts:  14%|█▍        | 9/64 [00:55<04:28,  4.88s/it][A
Processed prompts:  16%|█▌        | 10/64 [00:57<03:27,  3.84s/it][A
Processed prompts:  17%|█▋        | 11/64 [00:59<02:59,  3.39s/it][A
Processed prompts:  19%|█▉        | 12/64 [01:02<02:46,  3.20s/it][A
Processed prompts:  20%|██        | 13/64 [01:02<02:00,  2.35s/it][A
Processed prompts:  22%|██▏       | 14/64 [01:09<03:02,  3.64s/it][A
Processed prompts:  23%|██▎ 

4
857142
65



Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A




Processed prompts:   2%|▏         | 1/64 [00:09<10:11,  9.70s/it][A
Processed prompts:   3%|▎         | 2/64 [00:12<05:47,  5.61s/it][A
Processed prompts:   5%|▍         | 3/64 [00:31<11:43, 11.54s/it][A
Processed prompts:   6%|▋         | 4/64 [00:32<07:34,  7.57s/it][A
Processed prompts:   8%|▊         | 5/64 [00:37<06:24,  6.51s/it][A
Processed prompts:   9%|▉         | 6/64 [00:39<04:57,  5.13s/it][A
Processed prompts:  11%|█         | 7/64 [00:40<03:29,  3.68s/it][A
Processed prompts:  12%|█▎        | 8/64 [00:41<02:39,  2.85s/it][A
Processed prompts:  14%|█▍        | 9/64 [00:48<03:47,  4.14s/it][A
Processed prompts:  16%|█▌        | 10/64 [00:50<03:13,  3.57s/it][A
Processed prompts:  17%|█▋        | 11/64 [00:52<02:40,  3.03s/it][A
Processed prompts:  19%|█▉        | 12/64 [00:52<01:58,  2.28s/it][A
Processed prompts:  20%|██        | 13/64 [00:54<01:41,  1.98s/it][A
Processed prompts:  22%|██▏       | 14/64 [00:55<01:28,  1.78s/it][A
Processed prompts:  23%|██▎ 




Processed prompts:  36%|███▌      | 23/64 [01:24<02:04,  3.03s/it][A
Processed prompts:  38%|███▊      | 24/64 [01:30<02:34,  3.85s/it][A
Processed prompts:  39%|███▉      | 25/64 [01:33<02:19,  3.58s/it][A
Processed prompts:  41%|████      | 26/64 [01:34<01:42,  2.69s/it][A
Processed prompts:  42%|████▏     | 27/64 [01:38<01:56,  3.14s/it][A
Processed prompts:  44%|████▍     | 28/64 [01:42<02:06,  3.53s/it][A
Processed prompts:  45%|████▌     | 29/64 [01:47<02:16,  3.91s/it][A
Processed prompts:  47%|████▋     | 30/64 [01:49<01:50,  3.25s/it][A
Processed prompts:  48%|████▊     | 31/64 [01:54<02:03,  3.75s/it][A
Processed prompts:  50%|█████     | 32/64 [01:56<01:44,  3.25s/it][A
Processed prompts:  52%|█████▏    | 33/64 [02:00<01:49,  3.52s/it][A
Processed prompts:  53%|█████▎    | 34/64 [02:02<01:34,  3.15s/it][A
Processed prompts:  55%|█████▍    | 35/64 [02:10<02:10,  4.51s/it][A
Processed prompts:  56%|█████▋    | 36/64 [02:12<01:41,  3.63s/it][A
Processed prompts: 

2
75000
7
525



Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A




Processed prompts:   2%|▏         | 1/64 [00:07<07:57,  7.58s/it][A
Processed prompts:   3%|▎         | 2/64 [00:14<07:43,  7.47s/it][A
Processed prompts:   5%|▍         | 3/64 [00:16<04:46,  4.70s/it][A
Processed prompts:   6%|▋         | 4/64 [00:18<03:37,  3.63s/it][A
Processed prompts:   8%|▊         | 5/64 [00:29<06:21,  6.47s/it][A
Processed prompts:   9%|▉         | 6/64 [00:34<05:34,  5.78s/it][A
Processed prompts:  11%|█         | 7/64 [00:35<03:56,  4.15s/it][A
Processed prompts:  12%|█▎        | 8/64 [00:52<07:54,  8.47s/it][A
Processed prompts:  14%|█▍        | 9/64 [00:54<05:50,  6.38s/it][A
Processed prompts:  16%|█▌        | 10/64 [01:04<06:43,  7.47s/it][A
Processed prompts:  17%|█▋        | 11/64 [01:08<05:36,  6.35s/it][A
Processed prompts:  19%|█▉        | 12/64 [01:14<05:31,  6.38s/it][A
Processed prompts:  20%|██        | 13/64 [01:18<04:37,  5.44s/it][A
Processed prompts:  22%|██▏       | 14/64 [01:31<06:32,  7.85s/it][A
Processed prompts:  23%|██▎ 

44
53
5
36



Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A




Processed prompts:   2%|▏         | 1/64 [00:04<04:36,  4.39s/it][A




Processed prompts:   3%|▎         | 2/64 [00:05<02:43,  2.64s/it][A
Processed prompts:   6%|▋         | 4/64 [00:14<03:50,  3.84s/it][A
Processed prompts:   8%|▊         | 5/64 [00:33<08:13,  8.36s/it][A
Processed prompts:   9%|▉         | 6/64 [00:36<06:34,  6.80s/it][A
Processed prompts:  11%|█         | 7/64 [00:37<04:49,  5.07s/it][A
Processed prompts:  12%|█▎        | 8/64 [00:39<03:41,  3.95s/it][A
Processed prompts:  14%|█▍        | 9/64 [00:39<02:35,  2.83s/it][A
Processed prompts:  16%|█▌        | 10/64 [00:40<02:02,  2.27s/it][A
Processed prompts:  17%|█▋        | 11/64 [00:40<01:34,  1.78s/it][A
Processed prompts:  19%|█▉        | 12/64 [00:44<02:04,  2.39s/it][A
Processed prompts:  20%|██        | 13/64 [00:47<02:00,  2.37s/it][A
Processed prompts:  22%|██▏       | 14/64 [00:50<02:09,  2.58s/it][A
Processed prompts:  23%|██▎       | 15/64 [00:56<02:58,  3.64s/it][A
Processed prompts:  25%|██▌       | 16/64 [00:57<02:14,  2.81s/it][A
Processed prompts:  27%|██

11
7
21



Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A




Processed prompts:   2%|▏         | 1/64 [00:08<08:37,  8.22s/it][A
Processed prompts:   3%|▎         | 2/64 [00:15<07:44,  7.49s/it][A
Processed prompts:   5%|▍         | 3/64 [00:15<04:21,  4.29s/it][A
Processed prompts:   6%|▋         | 4/64 [00:16<03:00,  3.01s/it][A
Processed prompts:   8%|▊         | 5/64 [00:20<03:10,  3.23s/it][A
Processed prompts:   9%|▉         | 6/64 [00:21<02:22,  2.46s/it][A
Processed prompts:  11%|█         | 7/64 [00:23<02:08,  2.26s/it][A
Processed prompts:  12%|█▎        | 8/64 [00:26<02:16,  2.45s/it][A
Processed prompts:  14%|█▍        | 9/64 [00:27<01:58,  2.15s/it][A
Processed prompts:  16%|█▌        | 10/64 [00:29<01:55,  2.14s/it][A
Processed prompts:  17%|█▋        | 11/64 [00:30<01:33,  1.77s/it][A
Processed prompts:  19%|█▉        | 12/64 [00:42<04:16,  4.93s/it][A
Processed prompts:  20%|██        | 13/64 [00:47<04:09,  4.89s/it][A
Processed prompts:  22%|██▏       | 14/64 [00:49<03:27,  4.15s/it][A
Processed prompts:  23%|██▎ 




Processed prompts:  42%|████▏     | 27/64 [01:39<02:16,  3.69s/it][A
Processed prompts:  44%|████▍     | 28/64 [01:40<01:42,  2.84s/it][A
Processed prompts:  45%|████▌     | 29/64 [01:45<02:02,  3.49s/it][A
Processed prompts:  47%|████▋     | 30/64 [01:50<02:15,  3.98s/it][A
Processed prompts:  48%|████▊     | 31/64 [01:56<02:27,  4.46s/it][A
Processed prompts:  50%|█████     | 32/64 [02:00<02:16,  4.28s/it][A
Processed prompts:  52%|█████▏    | 33/64 [02:02<01:54,  3.69s/it][A
Processed prompts:  53%|█████▎    | 34/64 [02:07<02:05,  4.19s/it][A
Processed prompts:  55%|█████▍    | 35/64 [02:11<01:58,  4.08s/it][A
Processed prompts:  56%|█████▋    | 36/64 [02:18<02:16,  4.86s/it][A
Processed prompts:  58%|█████▊    | 37/64 [02:26<02:37,  5.84s/it][A
Processed prompts:  59%|█████▉    | 38/64 [02:35<02:59,  6.92s/it][A
Processed prompts:  61%|██████    | 39/64 [02:37<02:12,  5.28s/it][A
Processed prompts:  62%|██████▎   | 40/64 [02:42<02:05,  5.23s/it][A
Processed prompts: 

0
4
12
1083



Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A




Processed prompts:   2%|▏         | 1/64 [00:04<05:12,  4.96s/it][A
Processed prompts:   3%|▎         | 2/64 [00:12<06:57,  6.73s/it][A




Processed prompts:   5%|▍         | 3/64 [00:13<04:10,  4.10s/it][A
Processed prompts:   6%|▋         | 4/64 [00:16<03:21,  3.36s/it][A
Processed prompts:   8%|▊         | 5/64 [00:24<04:56,  5.03s/it][A
Processed prompts:   9%|▉         | 6/64 [00:25<03:35,  3.72s/it][A




Processed prompts:  11%|█         | 7/64 [00:26<02:37,  2.76s/it][A
Processed prompts:  12%|█▎        | 8/64 [00:33<03:55,  4.20s/it][A
Processed prompts:  14%|█▍        | 9/64 [00:36<03:27,  3.77s/it][A
Processed prompts:  16%|█▌        | 10/64 [00:37<02:41,  2.98s/it][A
Processed prompts:  17%|█▋        | 11/64 [00:38<02:09,  2.44s/it][A
Processed prompts:  19%|█▉        | 12/64 [00:48<04:11,  4.84s/it][A
Processed prompts:  20%|██        | 13/64 [00:53<03:57,  4.66s/it][A
Processed prompts:  22%|██▏       | 14/64 [00:59<04:19,  5.19s/it][A
Processed prompts:  23%|██▎       | 15/64 [01:09<05:21,  6.55s/it][A
Processed prompts:  25%|██▌       | 16/64 [01:15<05:12,  6.50s/it][A
Processed prompts:  27%|██▋       | 17/64 [01:16<03:38,  4.65s/it][A
Processed prompts:  28%|██▊       | 18/64 [01:22<03:52,  5.05s/it][A
Processed prompts:  30%|██▉       | 19/64 [01:32<04:57,  6.60s/it][A
Processed prompts:  31%|███▏      | 20/64 [01:34<03:50,  5.24s/it][A
Processed prompts:  33

10
22335577
15
1349



Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A




Processed prompts:   2%|▏         | 1/64 [00:05<06:07,  5.83s/it][A
Processed prompts:   5%|▍         | 3/64 [00:14<04:40,  4.59s/it][A
Processed prompts:   6%|▋         | 4/64 [00:15<03:29,  3.48s/it][A
Processed prompts:   8%|▊         | 5/64 [00:15<02:24,  2.45s/it][A
Processed prompts:   9%|▉         | 6/64 [00:18<02:19,  2.41s/it][A
Processed prompts:  11%|█         | 7/64 [00:19<01:56,  2.04s/it][A
Processed prompts:  12%|█▎        | 8/64 [00:20<01:34,  1.69s/it][A
Processed prompts:  14%|█▍        | 9/64 [00:22<01:32,  1.67s/it][A
Processed prompts:  16%|█▌        | 10/64 [00:26<02:08,  2.39s/it][A
Processed prompts:  17%|█▋        | 11/64 [00:30<02:32,  2.89s/it][A
Processed prompts:  19%|█▉        | 12/64 [00:35<03:05,  3.58s/it][A
Processed prompts:  20%|██        | 13/64 [00:36<02:30,  2.95s/it][A
Processed prompts:  22%|██▏       | 14/64 [00:38<02:09,  2.58s/it][A
Processed prompts:  23%|██▎       | 15/64 [00:41<02:12,  2.70s/it][A
Processed prompts:  25%|██▌

0
999
465



Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A




Processed prompts:   2%|▏         | 1/64 [00:01<01:39,  1.58s/it][A
Processed prompts:   5%|▍         | 3/64 [00:15<05:36,  5.52s/it][A
Processed prompts:   6%|▋         | 4/64 [00:40<12:09, 12.17s/it][A
Processed prompts:   8%|▊         | 5/64 [00:49<11:00, 11.19s/it][A
Processed prompts:   9%|▉         | 6/64 [00:52<08:19,  8.61s/it][A
Processed prompts:  11%|█         | 7/64 [00:55<06:33,  6.90s/it][A
Processed prompts:  12%|█▎        | 8/64 [00:57<04:55,  5.28s/it][A
Processed prompts:  14%|█▍        | 9/64 [01:06<05:50,  6.38s/it][A
Processed prompts:  16%|█▌        | 10/64 [01:08<04:41,  5.21s/it][A
Processed prompts:  17%|█▋        | 11/64 [01:10<03:45,  4.26s/it][A
Processed prompts:  19%|█▉        | 12/64 [01:15<03:47,  4.37s/it][A
Processed prompts:  20%|██        | 13/64 [01:17<03:02,  3.58s/it][A
Processed prompts:  22%|██▏       | 14/64 [01:19<02:39,  3.18s/it][A
Processed prompts:  23%|██▎       | 15/64 [01:20<02:04,  2.55s/it][A
Processed prompts:  25%|██▌

11
9
16
3130



Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A




Processed prompts:   2%|▏         | 1/64 [00:03<04:00,  3.81s/it][A




Processed prompts:   3%|▎         | 2/64 [00:06<03:08,  3.04s/it][A
Processed prompts:   5%|▍         | 3/64 [00:14<05:35,  5.50s/it][A
Processed prompts:   6%|▋         | 4/64 [00:15<03:31,  3.52s/it][A
Processed prompts:   8%|▊         | 5/64 [00:20<03:54,  3.98s/it][A
Processed prompts:   9%|▉         | 6/64 [00:21<03:09,  3.26s/it][A
Processed prompts:  11%|█         | 7/64 [00:24<02:59,  3.16s/it][A




Processed prompts:  12%|█▎        | 8/64 [00:25<02:20,  2.50s/it][A
Processed prompts:  14%|█▍        | 9/64 [00:34<04:03,  4.42s/it][A
Processed prompts:  16%|█▌        | 10/64 [00:36<03:25,  3.80s/it][A
Processed prompts:  17%|█▋        | 11/64 [00:42<03:48,  4.31s/it][A
Processed prompts:  19%|█▉        | 12/64 [00:45<03:25,  3.96s/it][A
Processed prompts:  20%|██        | 13/64 [00:48<02:59,  3.53s/it][A
Processed prompts:  22%|██▏       | 14/64 [00:58<04:33,  5.47s/it][A
Processed prompts:  23%|██▎       | 15/64 [01:00<03:45,  4.61s/it][A
Processed prompts:  25%|██▌       | 16/64 [01:05<03:45,  4.69s/it][A
Processed prompts:  27%|██▋       | 17/64 [01:06<02:49,  3.61s/it][A
Processed prompts:  28%|██▊       | 18/64 [01:07<02:06,  2.75s/it][A
Processed prompts:  30%|██▉       | 19/64 [01:08<01:35,  2.13s/it][A
Processed prompts:  31%|███▏      | 20/64 [01:09<01:28,  2.00s/it][A
Processed prompts:  33%|███▎      | 21/64 [01:12<01:31,  2.12s/it][A
Processed prompts:  3

16
529
16
-11



Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A




Processed prompts:   2%|▏         | 1/64 [00:06<06:46,  6.46s/it][A
Processed prompts:   5%|▍         | 3/64 [00:15<05:04,  4.99s/it][A
Processed prompts:   6%|▋         | 4/64 [00:16<03:28,  3.47s/it][A
Processed prompts:   8%|▊         | 5/64 [00:19<03:26,  3.50s/it][A
Processed prompts:   9%|▉         | 6/64 [00:25<04:04,  4.22s/it][A
Processed prompts:  11%|█         | 7/64 [00:27<03:15,  3.43s/it][A
Processed prompts:  12%|█▎        | 8/64 [00:34<04:25,  4.73s/it][A
Processed prompts:  14%|█▍        | 9/64 [00:37<03:49,  4.17s/it][A
Processed prompts:  16%|█▌        | 10/64 [00:44<04:28,  4.97s/it][A
Processed prompts:  17%|█▋        | 11/64 [00:45<03:26,  3.89s/it][A
Processed prompts:  19%|█▉        | 12/64 [00:53<04:22,  5.04s/it][A
Processed prompts:  20%|██        | 13/64 [01:03<05:37,  6.63s/it][A




Processed prompts:  22%|██▏       | 14/64 [01:05<04:10,  5.01s/it][A
Processed prompts:  23%|██▎       | 15/64 [01:06<03:10,  3.89s/it][A
Processed prompts:  25%|██▌       | 16/64 [01:13<03:48,  4.77s/it][A
Processed prompts:  27%|██▋       | 17/64 [01:16<03:24,  4.35s/it][A
Processed prompts:  28%|██▊       | 18/64 [01:17<02:36,  3.39s/it][A
Processed prompts:  30%|██▉       | 19/64 [01:19<02:13,  2.97s/it][A
Processed prompts:  31%|███▏      | 20/64 [01:20<01:44,  2.38s/it][A
Processed prompts:  33%|███▎      | 21/64 [01:21<01:24,  1.96s/it][A
Processed prompts:  34%|███▍      | 22/64 [01:23<01:24,  2.01s/it][A
Processed prompts:  36%|███▌      | 23/64 [01:33<02:53,  4.23s/it][A
Processed prompts:  38%|███▊      | 24/64 [01:34<02:13,  3.33s/it][A
Processed prompts:  39%|███▉      | 25/64 [01:40<02:42,  4.17s/it][A
Processed prompts:  41%|████      | 26/64 [01:41<02:01,  3.21s/it][A
Processed prompts:  42%|████▏     | 27/64 [01:42<01:39,  2.69s/it][A
Processed prompts: 

120
6
57
62281



Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A




Processed prompts:   2%|▏         | 1/64 [00:01<01:29,  1.42s/it][A
Processed prompts:   6%|▋         | 4/64 [00:14<03:54,  3.92s/it][A
Processed prompts:   8%|▊         | 5/64 [00:17<03:21,  3.42s/it][A
Processed prompts:   9%|▉         | 6/64 [00:19<03:09,  3.27s/it][A
Processed prompts:  11%|█         | 7/64 [00:28<04:27,  4.69s/it][A
Processed prompts:  12%|█▎        | 8/64 [00:28<03:09,  3.38s/it][A
Processed prompts:  14%|█▍        | 9/64 [00:32<03:23,  3.70s/it][A
Processed prompts:  16%|█▌        | 10/64 [00:33<02:31,  2.81s/it][A
Processed prompts:  17%|█▋        | 11/64 [00:44<04:37,  5.23s/it][A
Processed prompts:  19%|█▉        | 12/64 [00:45<03:20,  3.86s/it][A
Processed prompts:  20%|██        | 13/64 [00:45<02:29,  2.93s/it][A
Processed prompts:  22%|██▏       | 14/64 [00:49<02:35,  3.10s/it][A
Processed prompts:  23%|██▎       | 15/64 [00:50<02:05,  2.57s/it][A
Processed prompts:  25%|██▌       | 16/64 [00:51<01:35,  1.99s/it][A
Processed prompts:  27%|██

110
45620
0



Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A




Processed prompts:   2%|▏         | 1/64 [00:02<02:47,  2.66s/it][A




Processed prompts:   3%|▎         | 2/64 [00:04<02:26,  2.36s/it][A
Processed prompts:   6%|▋         | 4/64 [00:26<07:45,  7.76s/it][A
Processed prompts:   8%|▊         | 5/64 [00:31<06:50,  6.96s/it][A
Processed prompts:   9%|▉         | 6/64 [00:52<10:47, 11.17s/it][A
Processed prompts:  11%|█         | 7/64 [00:54<07:52,  8.29s/it][A
Processed prompts:  12%|█▎        | 8/64 [00:56<06:01,  6.46s/it][A
Processed prompts:  14%|█▍        | 9/64 [00:58<04:32,  4.96s/it][A
Processed prompts:  16%|█▌        | 10/64 [01:01<04:07,  4.59s/it][A
Processed prompts:  17%|█▋        | 11/64 [01:04<03:35,  4.06s/it][A
Processed prompts:  19%|█▉        | 12/64 [01:07<03:11,  3.67s/it][A
Processed prompts:  20%|██        | 13/64 [01:29<07:54,  9.31s/it][A
Processed prompts:  22%|██▏       | 14/64 [01:43<08:45, 10.51s/it][A
Processed prompts:  23%|██▎       | 15/64 [01:45<06:38,  8.12s/it][A
Processed prompts:  25%|██▌       | 16/64 [01:52<06:10,  7.72s/it][A
Processed prompts:  27%|██

128
24
5
20



Processed prompts:   0%|          | 0/64 [00:00<?, ?it/s][A




Processed prompts:   2%|▏         | 1/64 [00:05<05:27,  5.20s/it][A




Processed prompts:   3%|▎         | 2/64 [00:07<03:33,  3.44s/it][A
Processed prompts:   5%|▍         | 3/64 [00:14<05:04,  4.99s/it][A
Processed prompts:   6%|▋         | 4/64 [00:15<03:28,  3.48s/it][A
Processed prompts:   8%|▊         | 5/64 [00:17<02:48,  2.86s/it][A




Processed prompts:   9%|▉         | 6/64 [00:18<02:12,  2.29s/it][A
Processed prompts:  11%|█         | 7/64 [00:19<01:56,  2.04s/it][A
Processed prompts:  12%|█▎        | 8/64 [00:20<01:36,  1.72s/it][A
Processed prompts:  14%|█▍        | 9/64 [00:25<02:25,  2.65s/it][A
Processed prompts:  16%|█▌        | 10/64 [00:29<02:44,  3.05s/it][A
Processed prompts:  17%|█▋        | 11/64 [00:39<04:35,  5.20s/it][A
Processed prompts:  19%|█▉        | 12/64 [00:43<04:14,  4.90s/it][A
Processed prompts:  20%|██        | 13/64 [00:46<03:37,  4.26s/it][A
Processed prompts:  22%|██▏       | 14/64 [00:52<04:04,  4.89s/it][A
Processed prompts:  23%|██▎       | 15/64 [00:54<03:07,  3.82s/it][A
Processed prompts:  25%|██▌       | 16/64 [00:54<02:16,  2.84s/it][A
Processed prompts:  27%|██▋       | 17/64 [01:01<03:02,  3.89s/it][A
Processed prompts:  28%|██▊       | 18/64 [01:02<02:23,  3.12s/it][A
Processed prompts:  30%|██▉       | 19/64 [01:03<01:56,  2.58s/it][A
Processed prompts:  31%

25
132
105
28



Processed prompts:   0%|          | 0/51 [00:00<?, ?it/s]



Processed prompts:   2%|▏         | 1/51 [00:02<02:14,  2.70s/it]



Processed prompts:   6%|▌         | 3/51 [00:06<01:36,  2.00s/it]



Processed prompts: 100%|██████████| 51/51 [02:55<00:00,  3.44s/it]


169
3
24


Processed prompts:   0%|          | 0/28 [00:00<?, ?it/s]



Processed prompts: 100%|██████████| 28/28 [02:05<00:00,  4.47s/it]


1012
9
93192


Processed prompts: 100%|██████████| 6/6 [02:35<00:00, 25.91s/it]

5757
71
4
CPU times: user 2h 12min 49s, sys: 34.3 s, total: 2h 13min 24s
Wall time: 2h 25min 44s





## Write to submission file

In [39]:
sorted_boxed_answers = {id: abs(answer) for id, answer in sorted(boxed_answers.items())}

# Write the sorted and modified answers to your CSV
for id, answer in sorted_boxed_answers.items():
    submission.writerow([id, answer])

In [30]:
for id, answer in boxed_answers.items():
    submission.writerow([id, answer])

In [40]:
sorted_boxed_answers

{0: 3,
 1: 10,
 2: 3,
 3: 225,
 4: 40,
 5: 0,
 6: 120,
 7: 334,
 8: 14,
 9: 63,
 10: 2,
 11: 8,
 12: 1901,
 13: 1000,
 14: 16000,
 15: 114000,
 16: 11,
 17: 8,
 18: 17,
 19: 24,
 20: 97,
 21: 96,
 22: 15,
 23: 1,
 24: 145,
 25: 4105,
 26: 0,
 27: 3,
 28: 29,
 29: 1,
 30: 201,
 31: 280,
 32: 17,
 33: 1224,
 34: 2,
 35: 12,
 36: 0,
 37: 50,
 38: 252,
 39: 9,
 40: 102,
 41: 8,
 42: 20,
 43: 4,
 44: 857142,
 45: 65,
 46: 2,
 47: 75000,
 48: 7,
 49: 525,
 50: 44,
 51: 53,
 52: 5,
 53: 36,
 54: 11,
 55: 7,
 56: 21,
 57: 0,
 58: 4,
 59: 12,
 60: 1083,
 61: 10,
 62: 22335577,
 63: 15,
 64: 1349,
 65: 0,
 66: 999,
 67: 465,
 68: 11,
 69: 9,
 70: 16,
 71: 3130,
 72: 16,
 73: 529,
 74: 16,
 75: 11,
 76: 120,
 77: 6,
 78: 57,
 79: 62281,
 80: 110,
 81: 45620,
 82: 0,
 83: 128,
 84: 24,
 85: 5,
 86: 20,
 87: 25,
 88: 132,
 89: 105,
 90: 28,
 91: 169,
 92: 3,
 93: 24,
 94: 1012,
 95: 9,
 96: 93192,
 97: 5757,
 98: 71,
 99: 4}

## Close files

In [41]:
   
file.close()
log_file.close()