In [4]:
import random
from jinja2 import Environment, Template, meta
import sys
from itertools import product
from datasets import load_dataset
import polars as pl
from vllm import LLM, SamplingParams
from typing import Dict
import numpy as np
import ray
from transformers import AutoTokenizer
from typing import Any, Dict

In [5]:
# model_name = 'mistralai/Mistral-7B-Instruct-v0.2'
model_name = '/home/lawrence/.cache/huggingface/hub/models--mistralai--Mistral-7B-Instruct-v0.2/snapshots/41b61a33a2483885c981aa79e0df6b32407ed873'
# model_name = 'google/gemma-7b-it'

tokenizer = AutoTokenizer.from_pretrained(
    model_name
) 

## import the dataset

## load the dataset into ray

In [6]:
def get_chat_input(row: Dict[str, Any]) -> Dict[str, Any]:
    
    #original text prefix
    orig_prefix = "Original Text:"
    #modified text prefix
    rewrite_prefix = "Rewritten Text:"
    # response start
    response_start = "The prompt was:"

    sys_prompt = """You are an expert in "Reverse Prompt Engineering". You are able to reverse-engineer prompts used to rewrite text.\n\nI will be providing you with an "original text" and "rewritten text". Please try to be as specific as possible and come up with a prompt that is based on the tone, style, and any other properties you consider relevant."""

    #first example
    ex_text_1 = "The aroma of freshly baked bread wafted through the bustling marketplace, mingling with the shouts of vendors and the excited chatter of customers. A lone scout, her cloak dusted with desert sand, weaved through the crowd, her hand instinctively resting on the hilt of her dagger."
    ex_rewrite_1 = "A scent of opportunity fills the air. Trust your instincts and a hidden path will reveal itself."
    ex_prompt_1 = "Craft this as a fortune cookie message"

    #second example
    ex_text_2 = "The old lighthouse keeper shuffled down the rickety stairs, his pockets jangling with forgotten keys. He peered out at the churning sea, the salty spray stinging his weathered face. A lone seagull circled overhead, its mournful cry echoing in the wind."
    ex_rewrite_2 = "The lighthouse walls creaked with each of the keeper's slow steps, the rhythm a familiar lullaby to the young apprentice perched by the window. He watched the old man descend, a wizened silhouette framed by the churning sea. The salty spray, invisible from his vantage point, seemed to dance on the keeper's face, etching new lines into the weathered map. The cry of the lone seagull was a melody only the old man understood, a language that spoke of years spent guarding this solitary beacon."
    ex_prompt_2 = "Reimagine this scene from a different perspective, offering a fresh insight into the events"

    #second example
    ex_text_3 = "The population of the earth is estimated to be around 8 billion people, but there are far more insects! Scientists believe that there are over 10 quintillion (that's 10, followed by 18 zeros) individual insects alive at any given time, outweighing all humans and animals combined by a staggering amount."
    ex_rewrite_3 = "Wow, have you ever stopped to think about just how many insects there are? Our planet may be teeming with 8 billion people, but that's nothing compared to the incredible number of insects buzzing, crawling, and flitting around us. Scientists estimate there are over 10 quintillion, that's a 10 with 18 zeros, individual insects alive at any given moment! That's mind-boggling!  And get this, all those insects together weigh more than all the humans and animals on Earth combined.  Pretty incredible, right?"
    ex_prompt_3 = "Rewrite the following as a speech"

    messages = [
        #first example
        {"role": "user", "content": f"{sys_prompt}\n{orig_prefix} {ex_text_1}\n{rewrite_prefix} {ex_rewrite_1}"},
        {"role": "assistant", "content": f"{response_start} {ex_prompt_1}"},

        #second example
        {"role": "user", "content": f"{orig_prefix} {ex_text_2}\n{rewrite_prefix} {ex_rewrite_2}"},
        {"role": "assistant", "content": f"{response_start} {ex_prompt_2}"},

        #actual prompt
        {"role": "user", "content": f"{orig_prefix}{ex_text_3}\n{rewrite_prefix} {ex_rewrite_3}"},
        {"role": "assistant", "content": f"{response_start} {ex_prompt_3}"},
        
        #actual prompt
        {"role": "user", "content": f"{orig_prefix} {row['original_text']}\n{rewrite_prefix} {row['rewritten_text']}"},
        {"role": "assistant", "content": response_start},
    ]
        
    #give it to Mistral
    row['input'] = tokenizer.apply_chat_template(messages, tokenize=False)
    
    return row


    
    
# ds = (
#     ray.data.read_csv('data/3rd_party_ds/llm_dataset_1.csv')
#     .map(get_chat_input)
# )

In [7]:
import pandas as pd

df = pd.read_csv('/home/lawrence/Projects/contests/llm_prompt_recovery/data/3rd_party_ds/gemma-rewrite-nbroad/nbroad-v2.csv')
df.rename(columns={'rewrite_prompt': 'gt_rewrite_prompt'}, inplace=True)
ds = (
    ray.data.from_pandas(df)
    .map(get_chat_input)
).repartition(2) # need to force into multiple blocks <https://discuss.ray.io/t/single-node-4x-gpu-map-batches-only-using-1/12313/2>


Usage stats collection is enabled by default for nightly wheels. To disable this, run the following command: `ray disable-usage-stats` before starting Ray. See https://docs.ray.io/en/master/cluster/usage-stats.html for more details.


2024-04-06 22:03:01,258	INFO worker.py:1752 -- Started a local Ray instance.


In [8]:
df

Unnamed: 0,id,original_text,gt_rewrite_prompt,rewritten_text
0,lZGdiueMer,"`` Well, there are healthier ways to tell me y...",Rewrite the story where the writer asks the re...,"Well, there are healthier ways to tell me you ..."
1,DfTJVFKrUk,Rory ran his shaky fingers through his wife's ...,Rewrite the essay as a dramatic play,## The Final Curtain\n\n[FADE IN]\n\n**Setting...
2,LmJvKranXK,As I made my way on foot across town to the Po...,Rewrite the story with all the themes and sett...,As I made my way through the Tatooine desert o...
3,PpnqXQAdGH,`` Hello. We come in peace.'' \n \n The first ...,Rewrite the essay if the advanced aliens didn'...,`` Hello. We come in peace.''\n\nThe first enc...
4,qOeXTfqgAM,"`` Karen, what the helllllll izzz...'' says my...",Rewrite the story as a court room drama starri...,The courtroom erupted in an uproar as District...
...,...,...,...,...
2395,ljSaWTXjFc,"`` Dave, have you seen this?'' There is no ans...",Rewrite this as a Shakespearean tragedy,"Alas, poor Dave, a victim of fate's cruel hand..."
2396,nwZeXhsofO,"I posted something like this a while ago, I gu...",Rewrite the story as if you are a famous celeb...,"I'm the epitome of stardom, yet I can't shake ..."
2397,ozaryEVFvK,"`` Oh Darling, it was absolutely dreadful!'' M...",Rewrite the story as an upbeat mystery story (...,"Hey folks, buckle up for a mystery that will h..."
2398,WTvBtRNlQS,Ubwuwuaua! I exclaim shaking my head back and ...,Rewrite the story from the perspective of the ...,Ubwuwuaua. The whispers of the wind dance thro...


In [9]:
# Create a sampling params object.
sampling_params = SamplingParams(max_tokens=100)


# Create a class to do batch inference.
class LLMPredictor:

    def __init__(self):
        self.llm = LLM(
            model=model_name,
            gpu_memory_utilization=0.92,
            max_model_len=1500,
            trust_remote_code=True, 
        )

    def __call__(self, batch: Dict[str, np.ndarray]) -> Dict[str, list]:
        outputs = self.llm.generate(batch["input"], sampling_params)
        
        generated_text = []
        for output in outputs:
            generated_text.append(' '.join([o.text for o in output.outputs]))
        batch["rewrite_prompt"] = generated_text

        return batch

ds = ds.map_batches(
    LLMPredictor,
    # Set the concurrency to the number of LLM instances.
    concurrency=2,
    # Specify the number of GPUs required per LLM instance.
    # NOTE: Do NOT set `num_gpus` when using vLLM with tensor-parallelism
    # (i.e., `tensor_parallel_size`).
    num_gpus=1,
    # Specify the batch size for inference.
    batch_size=2,
)

In [10]:
ds.write_parquet("./data/exp_test/nboard_v2/")

2024-04-06 22:03:04,031	INFO streaming_executor.py:115 -- Starting execution of Dataset. Full log is in /tmp/ray/session_2024-04-06_22-02-59_228988_439769/logs/ray-data.log
2024-04-06 22:03:04,032	INFO streaming_executor.py:116 -- Execution plan of Dataset: InputDataBuffer[Input] -> TaskPoolMapOperator[Map(get_chat_input)] -> AllToAllOperator[Repartition] -> ActorPoolMapOperator[MapBatches(LLMPredictor)] -> TaskPoolMapOperator[Write]



[36m(_MapWorker pid=441463)[0m INFO 04-06 22:03:07 llm_engine.py:87] Initializing an LLM engine with config: model='/home/lawrence/.cache/huggingface/hub/models--mistralai--Mistral-7B-Instruct-v0.2/snapshots/41b61a33a2483885c981aa79e0df6b32407ed873', tokenizer='/home/lawrence/.cache/huggingface/hub/models--mistralai--Mistral-7B-Instruct-v0.2/snapshots/41b61a33a2483885c981aa79e0df6b32407ed873', tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, max_seq_len=1500, download_dir=None, load_format=auto, tensor_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, device_config=cuda, seed=0)
[36m(_MapWorker pid=441463)[0m INFO 04-06 22:03:15 llm_engine.py:357] # GPU blocks: 3727, # CPU blocks: 2048
[36m(_MapWorker pid=441462)[0m INFO 04-06 22:03:07 llm_engine.py:87] Initializing an LLM engine with config: model='/home/lawrence/.cache/huggingface/hub/models--mistralai--Mistral

- Map(get_chat_input) 1:   0%|          | 0/1 [00:00<?, ?it/s]

- Repartition 2:   0%|          | 0/2 [00:00<?, ?it/s]

Split Repartition 3:   0%|          | 0/2 [00:00<?, ?it/s]

- MapBatches(LLMPredictor) 4:   0%|          | 0/2 [00:00<?, ?it/s]

- Write 5:   0%|          | 0/2 [00:00<?, ?it/s]

Running 0:   0%|          | 0/2 [00:00<?, ?it/s]

Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s]m 




Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.84it/s]
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.23s/it]
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8648.05it/s]
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8081.51it/s]
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 9489.38it/s]
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8952.62it/s]




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 6x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.84it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.24s/it][32m [repeated 7x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 9467.95it/s][32m [repeated 18x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 6x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.87it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00,  2.73it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8603.70it/s][32m [repeated 20x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 5x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.67it/s][32m [repeated 4x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.04s/it][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.37s/it]
Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 5x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.67it/s][32m [repeated 3x across cluster][0m




Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8004.40it/s]
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.34s/it][32m [repeated 2x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8551.08it/s]
Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 6x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.86it/s][32m [repeated 7x across cluster][0m




Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 9049.20it/s][32m [repeated 3x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:01<00:00,  1.06it/s][32m [repeated 7x across cluster][0m
Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 5x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  3.27it/s][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8971.77it/s][32m [repeated 7x across cluster][0m




Processed prompts: 100%|██████████| 2/2 [00:01<00:00,  1.53it/s][32m [repeated 5x across cluster][0m
Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 6x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  3.89it/s][32m [repeated 7x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 9127.97it/s][32m [repeated 6x across cluster][0m




Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.19s/it][32m [repeated 6x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 8x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:02<00:02,  2.21s/it][32m [repeated 7x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 7564.12it/s][32m [repeated 7x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.44s/it][32m [repeated 6x across cluster][0m
Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 5x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.85it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 10046.24it/s][32m [repeated 8x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.24s/it][32m [repeated 5x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 4x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  3.29it/s][32m [repeated 4x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 9788.34it/s][32m [repeated 19x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.20s/it][32m [repeated 5x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 6x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:01<00:01,  1.36s/it][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8783.88it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.19s/it][32m [repeated 5x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 7x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.44it/s][32m [repeated 7x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8297.34it/s][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:01<00:00,  1.83it/s][32m [repeated 7x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 6x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.84it/s][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 9157.87it/s][32m [repeated 13x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.23s/it][32m [repeated 6x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 5x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:01<00:01,  1.26s/it][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8525.01it/s][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.08s/it][32m [repeated 5x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 6x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.68it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 10155.70it/s][32m [repeated 9x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.24s/it][32m [repeated 6x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 4x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.54it/s][32m [repeated 4x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 9720.29it/s][32m [repeated 7x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.25s/it][32m [repeated 4x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.41s/it]




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 7x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.86it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8104.94it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:01<00:00,  1.57it/s][32m [repeated 6x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 5x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.64it/s][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 6232.25it/s][32m [repeated 2x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.25s/it][32m [repeated 6x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 7x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  3.26it/s][32m [repeated 7x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8224.13it/s][32m [repeated 7x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.24s/it][32m [repeated 6x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 6x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  3.28it/s][32m [repeated 7x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8322.03it/s][32m [repeated 4x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00,  2.55it/s][32m [repeated 6x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 7x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  3.13it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8322.03it/s][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:01<00:00,  1.70it/s][32m [repeated 7x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 5x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.66it/s][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 10565.00it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:01<00:00,  1.45it/s][32m [repeated 5x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 3x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:02<00:02,  2.13s/it][32m [repeated 3x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8397.01it/s][32m [repeated 3x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.22s/it][32m [repeated 3x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 9404.27it/s]
Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 7x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  1.15it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 9686.61it/s][32m [repeated 3x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.24s/it][32m [repeated 6x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 5x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  3.52it/s][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 11096.04it/s][32m [repeated 10x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.26s/it][32m [repeated 5x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 5x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:01<00:01,  1.32s/it][32m [repeated 4x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 9414.82it/s][32m [repeated 3x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:03<00:00,  1.56s/it][32m [repeated 5x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 6x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.57it/s][32m [repeated 7x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 9788.34it/s]
Processed prompts: 100%|██████████| 2/2 [00:01<00:00,  1.43it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8710.91it/s]
Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 7x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.56it/s][32m [repeated 7x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00,  2.09it/s][32m [repeated 7x across cluster][0m




Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 9269.18it/s][32m [repeated 6x across cluster][0m
Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 4x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:01<00:01,  1.64s/it][32m [repeated 4x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.21s/it][32m [repeated 4x across cluster][0m




Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8081.51it/s][32m [repeated 5x across cluster][0m
Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 7x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.80it/s][32m [repeated 7x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.26s/it][32m [repeated 7x across cluster][0m




Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 9310.33it/s][32m [repeated 5x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 7x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  3.36it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.22s/it][32m [repeated 7x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8943.08it/s][32m [repeated 14x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.42s/it]




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 4x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.42it/s][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.21s/it][32m [repeated 4x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 7463.17it/s][32m [repeated 4x across cluster][0m




Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.46s/it]
Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 4x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.55it/s][32m [repeated 3x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.27s/it][32m [repeated 3x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 9675.44it/s][32m [repeated 3x across cluster][0m
Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 7x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  3.30it/s][32m [repeated 7x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.22s/it][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 7084.97it/s][32m [repeated 3x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 5x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.81it/s][32m [repeated 4x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.32s/it][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 6497.76it/s][32m [repeated 7x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 5x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:01<00:01,  1.20s/it][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00,  2.05it/s][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 9446.63it/s][32m [repeated 7x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:03<00:00,  1.53s/it]
Processed prompts: 100%|██████████| 2/2 [00:00<00:00,  3.93it/s]




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 7x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  3.32it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.27s/it][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8289.14it/s][32m [repeated 5x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 6x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.98it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:01<00:00,  1.77it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 9208.13it/s][32m [repeated 2x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 6x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  3.49it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00,  2.50it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 9157.87it/s][32m [repeated 6x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 6x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.46it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.24s/it][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8683.86it/s][32m [repeated 5x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 5x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  3.32it/s][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00,  2.59it/s][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8586.09it/s][32m [repeated 11x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 6x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.84it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:01<00:00,  1.87it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 9927.35it/s][32m [repeated 8x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 5x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  3.19it/s][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.22s/it][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8516.35it/s][32m [repeated 3x across cluster][0m




Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.38s/it]
Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 3x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.06it/s][32m [repeated 3x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.27s/it][32m [repeated 3x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8525.01it/s][32m [repeated 6x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 7x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  3.29it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.13s/it][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8962.19it/s][32m [repeated 5x across cluster][0m




Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.42s/it]
Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 4x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  3.30it/s][32m [repeated 2x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.26s/it][32m [repeated 2x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8674.88it/s][32m [repeated 4x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:03<00:00,  1.52s/it]




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 7x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  3.80it/s][32m [repeated 7x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.32s/it][32m [repeated 7x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8811.56it/s][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00,  4.52it/s]




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 5x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:02<00:02,  2.74s/it][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00,  3.24it/s][32m [repeated 4x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8355.19it/s][32m [repeated 7x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.41s/it]




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 8x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.89it/s][32m [repeated 7x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:01<00:00,  1.01it/s][32m [repeated 7x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8363.52it/s][32m [repeated 10x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 5x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.83it/s][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00,  3.06it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8830.11it/s][32m [repeated 2x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 5x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:01<00:01,  1.37s/it][32m [repeated 4x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:01<00:00,  1.02it/s][32m [repeated 4x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8876.83it/s][32m [repeated 13x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 9068.77it/s]
Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 6x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.63it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.21s/it][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8338.58it/s][32m [repeated 14x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 6x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.44it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:01<00:00,  1.21it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8322.03it/s][32m [repeated 9x across cluster][0m
Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 4x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  3.28it/s][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.24s/it][32m [repeated 4x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 5265.92it/s][32m [repeated 8x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 6x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.66it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:01<00:00,  1.81it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8971.77it/s][32m [repeated 10x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 4x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.21it/s][32m [repeated 4x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.27s/it][32m [repeated 4x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8490.49it/s][32m [repeated 6x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 5x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:01<00:01,  1.95s/it][32m [repeated 4x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.31s/it][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 9446.63it/s][32m [repeated 3x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:03<00:00,  1.58s/it]




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 6x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  3.30it/s][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.26s/it][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8289.14it/s][32m [repeated 11x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:01<00:00,  1.43it/s]




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 6x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  1.90it/s][32m [repeated 7x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00,  3.33it/s][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8035.07it/s][32m [repeated 4x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 6x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  3.02it/s][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.24s/it][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8630.26it/s][32m [repeated 13x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 4x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  1.94it/s][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:03<00:00,  1.58s/it][32m [repeated 4x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8208.03it/s][32m [repeated 7x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 7x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  1.08it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:01<00:00,  1.81it/s][32m [repeated 7x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 10551.71it/s][32m [repeated 12x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 4x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  3.29it/s][32m [repeated 4x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.31s/it][32m [repeated 4x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8568.55it/s][32m [repeated 3x across cluster][0m
Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 5x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.38it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.18s/it][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8136.38it/s][32m [repeated 2x across cluster][0m




Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.43s/it]
Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 6x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.38it/s][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:01<00:00,  1.86it/s][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8848.74it/s][32m [repeated 7x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 6x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.50it/s][32m [repeated 4x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.40s/it][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8112.77it/s][32m [repeated 8x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.41s/it]




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 6x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:01<00:01,  1.74s/it][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:03<00:00,  1.61s/it][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 10058.28it/s][32m [repeated 5x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 6x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.35it/s][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.22s/it][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 9608.94it/s][32m [repeated 10x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 6x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  3.48it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.49s/it][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 7796.10it/s][32m [repeated 8x across cluster][0m
Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 6x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:02<00:02,  2.35s/it][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:03<00:00,  1.57s/it][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 9269.18it/s][32m [repeated 4x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 5x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  3.35it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.32s/it][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 9039.45it/s][32m [repeated 6x across cluster][0m




Processed prompts: 100%|██████████| 2/2 [00:03<00:00,  1.61s/it]




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 5x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  3.31it/s][32m [repeated 3x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.21s/it][32m [repeated 4x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 9049.20it/s][32m [repeated 7x across cluster][0m
Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 5x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.84it/s][32m [repeated 7x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.48s/it][32m [repeated 5x across cluster][0m




Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8747.25it/s]
Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 6x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:01<00:01,  1.65s/it][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.32s/it][32m [repeated 6x across cluster][0m




Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8447.74it/s][32m [repeated 8x across cluster][0m
Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 3x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.84it/s][32m [repeated 4x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.33s/it][32m [repeated 4x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8876.83it/s][32m [repeated 3x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 5x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.84it/s][32m [repeated 4x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.29s/it][32m [repeated 4x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:03<00:00,  1.54s/it]
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 9383.23it/s][32m [repeated 8x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 5x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.57it/s][32m [repeated 4x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00,  2.11it/s][32m [repeated 4x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8473.34it/s][32m [repeated 2x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 5x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:01<00:01,  1.21s/it][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00,  2.78it/s][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.40s/it]
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.24s/it]




Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 6302.49it/s][32m [repeated 3x across cluster][0m
Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 6x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.85it/s][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.24s/it][32m [repeated 5x across cluster][0m




Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8710.91it/s][32m [repeated 7x across cluster][0m
Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 6x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.90it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.24s/it][32m [repeated 6x across cluster][0m




Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 9039.45it/s][32m [repeated 6x across cluster][0m
Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 6x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.18it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:01<00:00,  1.01it/s][32m [repeated 6x across cluster][0m




Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 9218.25it/s][32m [repeated 8x across cluster][0m
Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 5x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.30it/s][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00,  2.08it/s][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8363.52it/s][32m [repeated 6x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 5x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  3.54it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.22s/it][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8322.03it/s][32m [repeated 4x across cluster][0m




Processed prompts: 100%|██████████| 2/2 [00:00<00:00,  4.70it/s]
Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 5x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:01<00:01,  1.48s/it][32m [repeated 4x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.10s/it][32m [repeated 4x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.40s/it]
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8577.31it/s][32m [repeated 8x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8120.63it/s]




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 6x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:02<00:02,  2.12s/it][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.21s/it][32m [repeated 4x across cluster][0m




Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8568.55it/s]
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 9478.65it/s]
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 9187.96it/s]
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 9320.68it/s]
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8603.70it/s]
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 7810.62it/s]
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 9597.95it/s]
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8413.85it/s]
Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 4x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  3.23it/s][32m [repeated 4x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.21s/it][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 7854.50it/s]
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8783.88it/s][32m [repeated 2x across clus



Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 6x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  3.50it/s][32m [repeated 4x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.20s/it][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8152.19it/s][32m [repeated 9x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 5x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:02<00:02,  2.40s/it][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.44s/it][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 4578.93it/s][32m [repeated 3x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 6x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.73it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:01<00:00,  1.00it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 7876.63it/s][32m [repeated 2x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 4x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.89it/s][32m [repeated 4x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.24s/it][32m [repeated 4x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 7884.03it/s]




Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8656.97it/s]
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 9020.01it/s]
Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 5x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  3.30it/s][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:01<00:00,  1.15it/s][32m [repeated 5x across cluster][0m




Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 9187.96it/s][32m [repeated 2x across cluster][0m
Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 6x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  3.33it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.35s/it][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 9279.43it/s][32m [repeated 6x across cluster][0m




Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 5x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.35it/s][32m [repeated 4x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.33s/it][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.45s/it]
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8397.01it/s][32m [repeated 6x across cluster][0m
Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 4x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:02<00:02,  2.55s/it][32m [repeated 4x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:01<00:00,  1.24it/s][32m [repeated 3x across cluster][0m




Processed prompts: 100%|██████████| 2/2 [00:03<00:00,  1.55s/it]
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 7307.15it/s][32m [repeated 13x across cluster][0m
Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 6x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.37it/s][32m [repeated 6x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.25s/it][32m [repeated 6x across cluster][0m




Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8568.55it/s][32m [repeated 5x across cluster][0m
Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 6x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  3.29it/s][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00,  2.01it/s][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 10106.76it/s]
Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 9010.32it/s][32m [repeated 9x across cluster][0m
Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 8x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.68it/s][32m [repeated 8x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.29s/it][32m [repeated 7x across cluster][0m




Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 9078.58it/s][32m [repeated 9x across cluster][0m
Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 6x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.15it/s][32m [repeated 5x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.20s/it][32m [repeated 6x across cluster][0m




Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8430.76it/s][32m [repeated 3x across cluster][0m
Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 7x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  2.56it/s][32m [repeated 7x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:01<00:00,  1.50it/s][32m [repeated 7x across cluster][0m
[36m(MapWorker(MapBatches(LLMPredictor)) pid=441463)[0m 




Processed prompts: 100%|██████████| 2/2 [00:00<00:00, 8371.86it/s][32m [repeated 7x across cluster][0m
Processed prompts:   0%|          | 0/2 [00:00<?, ?it/s][32m [repeated 4x across cluster][0m
Processed prompts:  50%|█████     | 1/2 [00:00<00:00,  3.40it/s][32m [repeated 4x across cluster][0m
Processed prompts: 100%|██████████| 2/2 [00:02<00:00,  1.21s/it][32m [repeated 5x across cluster][0m


In [11]:
train_df = pl.read_parquet("./data/exp_test/nboard_v2/*.parquet").to_pandas()

In [12]:
train_df

Unnamed: 0,id,original_text,gt_rewrite_prompt,rewritten_text,input,rewrite_prompt
0,KisiIFfldx,"Listen, people seem to think that monsters are...",Rewrite the story as a lighthearted and funny ...,Have you ever been scared by a monster? Oh boy...,"<s>[INST] You are an expert in ""Reverse Prompt...",
1,UyqBvEQOkQ,Like stealing candy from a baby. Mika was the ...,Rewrite the story to have a plot twist halfway...,Like stealing candy from a baby. Mika was the ...,"<s>[INST] You are an expert in ""Reverse Prompt...",Rewrite the text as if it's a confession or a...
2,CwZGipQWvD,`` BILLY MAYS HERE!'' The fireplace burst into...,Rewrite the essay as a philosophical argument ...,The sales process inherently raises ethical qu...,"<s>[INST] You are an expert in ""Reverse Prompt...",
3,pKXubhhwgC,"** [ This is my first writing here, and I am n...",Rewrite the story as a superhero comic from Ma...,**The Crimson Guardian**\n\nIn the heart of a ...,"<s>[INST] You are an expert in ""Reverse Prompt...",
4,IwboJVgxTC,"`` I do n't like it, this things to hot to kee...",Rewrite the scene so that the hijackers are pr...,"The bank was abuzz with life. People lined up,...","<s>[INST] You are an expert in ""Reverse Prompt...",
...,...,...,...,...,...,...
2395,hQtnmdqLsZ,"“ I ’ m telling you, the house has been empty ...",Rewrite the story from the perspective of the ...,The scent of bleach and damp earth lingered in...,"<s>[INST] You are an expert in ""Reverse Prompt...",
2396,xZSLwkEnEc,`` OW! WHAT THE FUCK IS WRONG WITH YOU?'' Jeni...,Rewrite it as a story about a monster who is f...,"In the realm of terror and devastation, a mons...","<s>[INST] You are an expert in ""Reverse Prompt...",Transform the following text into a mythologi...
2397,qlVmrrkVcL,The woman spoke first. `` Help me! He's with t...,Rewrite the essay as a murder mystery where yo...,A murder mystery unfolds as you read this narr...,"<s>[INST] You are an expert in ""Reverse Prompt...",
2398,EbBxajyzHq,Chijin hastened his team again. The barricade ...,Rewrite the essay in the style of an episode o...,"""The Bullpen Bar was buzzing with the scent of...","<s>[INST] You are an expert in ""Reverse Prompt...",


## score the dataset

In [13]:
import numpy as np
from sentence_transformers import SentenceTransformer, util
from sklearn.metrics.pairwise import cosine_similarity

st_model = SentenceTransformer('sentence-transformers/sentence-t5-base')

def get_sharpened_cosine_similarity(text1, text2):
    embeddings1 = st_model.encode(text1)
    embeddings2 = st_model.encode(text2)
    cosine_score = util.cos_sim(embeddings1, embeddings2)
    # print(cosine_score) 
    return (cosine_score[0] ** 3).numpy()[0]

def calc_prompt_similarity(row):
    return get_sharpened_cosine_similarity(row['gt_rewrite_prompt'], row['rewrite_prompt'])

train_df['score'] = train_df.apply(lambda row: calc_prompt_similarity(row), axis=1)

train_df['score'].mean()

0.48421794

In [14]:
train_df.to_csv('./data/predictions/nboard_v2.csv')