## **Arena Learning**

In [1]:
pip install "distilabel[hf-transformers, openai]>=1.0.0"

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


Importing the libraries

In [2]:
from distilabel.pipeline import Pipeline
from distilabel.steps import (
    CombineColumns,
    KeepColumns,
    LoadDataFromHub,
    LoadDataFromDicts
)
from distilabel.steps.tasks import TextGeneration, UltraFeedback
from distilabel.llms import TransformersLLM, OpenAILLM
from distilabel.steps import Step, StepInput
from distilabel.steps.typing import StepOutput
from typing import List

In [3]:
HF_AUTH_TOKEN='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'
from huggingface_hub import login
login(token=HF_AUTH_TOKEN)

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


Defining the Clean Data step

In [4]:
def clean_text(text):
    text = text.replace('\n', ' ')
    text = ' '.join(text.split())
    return text

def clean_generations(generations):
    return [clean_text(item) for item in generations]

In [5]:
class CleanGenerations(Step):

    @property
    def inputs(self) -> List[str]:
        return ['generations']

    @property
    def outputs(self) -> List[str]:
        return ['generations']

    def process(self, inputs: StepInput) -> StepOutput:
        for input in inputs:
          input['generations'] = clean_generations(input['generations'])
        yield inputs

Defining select best generation step

In [6]:
def select_best_generation(row):
    base_rating = row['ratings'][0]
    # print(base_rating)
    competitor_ratings = row['ratings'][1:]
    # print(competitor_ratings)
    base_answer = row['generations'][0]
    answers = row['generations'][1:]

    max_competitor_rating = max(competitor_ratings)
    # print(max_competitor_rating)
    if max_competitor_rating > base_rating:
        # print("in if")
        best_competitor_index = competitor_ratings.index(max_competitor_rating) + 1
        # print(best_competitor_index)
        row['generations'][0] = row['generations'][best_competitor_index]
        row['generations'][best_competitor_index] = base_answer
    row['generation'] = row['generations'][0]
    return row

In [7]:
class SelectBestGeneration(Step):

    @property
    def inputs(self) -> List[str]:
        return ['generations', 'ratings']

    @property
    def outputs(self) -> List[str]:
        return ['generation']

    def process(self, inputs: StepInput) -> StepOutput:
        for input in inputs:
          input = select_best_generation(input)
        yield inputs

## Creating the Pipelines
### 1. Battle of LLMs (Answers generation)

In [8]:
with Pipeline(name="Battle of LLMs") as pipeline:
    load_dataset = LoadDataFromHub(
        name="dataset_for_arena_learning",
    )

    text_generation_1 = TextGeneration(
        name = "text_generation_01",
        llm = TransformersLLM(model="microsoft/Phi-3-mini-4k-instruct", device= "cuda:0"),
        # llm = TransformersLLM(model="Doctor-Shotgun/TinyLlama-1.1B-32k-Instruct", device= "cuda:0"),
        input_batch_size=1,
        add_raw_output=False,
    )

    text_generation_2 = TextGeneration(
        name = "text_generation_02",
        # llm = TransformersLLM(model="mistralai/Mistral-7B-v0.1", device= "cuda:0"),
        llm = TransformersLLM(model="Doctor-Shotgun/TinyLlama-1.1B-32k-Instruct", device= "cuda:0"),
        input_batch_size=1,
        add_raw_output=False,
    )

    text_generation_3 = TextGeneration(
        name = "text_generation_04",
        llm = TransformersLLM(model="Qwen/Qwen2-7B-Instruct", device= "cuda:0"),
        input_batch_size=1,
        add_raw_output=False,
    )

    combine_columns = CombineColumns(
        name="combine_columns",
        columns=["generation", "model_name"],
        output_columns=["generations", "model_name"],
        input_batch_size=1
    )

    keep_columns_1 = KeepColumns(
        columns = ["instruction", "generations"]
    )

    clean = CleanGenerations(
        name="clean_generations"
    )

    ultrafeedback = UltraFeedback(
        llm=OpenAILLM(model="gpt-4o-mini", api_key="sk-None-ODWdiqGBT8hipSb13ynBT3BlbkFJgiasGXdbhT3h4hCT4EpO"),
        input_batch_size=1,
        add_raw_output=False,
        aspect="overall-rating",
        output_mappings={"model_name": "ultrafeedback_model"}
    )

    best_gen = SelectBestGeneration(
        name="select_best_gen"
    )

    keep_columns = KeepColumns(
        columns=["instruction", "generation"]
    )

    load_dataset >> [text_generation_1, text_generation_2, text_generation_3] >> combine_columns >> keep_columns_1 >> clean >> ultrafeedback >> best_gen >> keep_columns

  combine_columns = CombineColumns(


In [9]:
distiset = pipeline.run(
        parameters={
            load_dataset.name: {
                "repo_id": "ahsanirfan961/arena-dataset",
                "split": "train",
            },
            text_generation_1.name: {
                "llm": {
                    "generation_kwargs": {
                        "temperature": 0.7,
                        "max_new_tokens": 256,
                    }
                }
            },
            text_generation_2.name: {
                "llm": {
                    "generation_kwargs": {
                        "temperature": 0.7,
                        "max_new_tokens": 256,
                    }
                }
            },
            text_generation_3.name: {
                "llm": {
                    "generation_kwargs": {
                        "temperature": 0.7,
                        "max_new_tokens": 256,
                    }
                }
            },
            ultrafeedback.name: {
                "llm": {
                    "generation_kwargs": {
                        "max_new_tokens": 256,
                        "temperature": 0.7,
                    }
                }
            },
        },
        use_cache = False
    )

  return [self.format_input(input) for input in inputs]


  return [self.format_input(input) for input in inputs]


  return [self.format_input(input) for input in inputs]


You are not running the flash-attention implementation, expect numerical differences.


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


Generating train split: 0 examples [00:00, ? examples/s]

In [10]:
print(distiset)

Distiset({
    default: DatasetDict({
        train: Dataset({
            features: ['instruction', 'generation'],
            num_rows: 48
        })
    })
})


In [11]:
print(distiset['default']['train'].to_pandas())

                                          instruction  \
0   1. How do recent enhancements in photovoltaic ...   
1   2. What is the comprehensive life cycle analys...   
2   3. In what manner has integration of support m...   
3   1. How will evolving international diplomatic ...   
4   2. What could be the comparative effects of la...   
5   3. In what ways do anticipated progressions in...   
6   1. How do recent developments in composite mat...   
7   2. What implications could emerge regarding gl...   
8   3. In what ways might cutting-edge machine lea...   
9   1. How do fluctuating climatic conditions infl...   
10  2. What fiscal outcomes might materialize for ...   
11  3. In what ways could novel nanoengineering ac...   
12  1. How might differentiated tariff structures ...   
13  2. In what ways do partnerships among city adm...   
14  3. Can comprehensive pedagogical outreach prog...   
15  1. What is the correlation between individuals...   
16  2. Analyze the interplay be

In [12]:
distiset.push_to_hub(
    "ahsanirfan961/arena_output",
    token = "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"
)

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

README.md:   0%|          | 0.00/3.80k [00:00<?, ?B/s]

No files have been modified since last commit. Skipping to prevent empty commit.
--- Logging error ---
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/handlers.py", line 1475, in emit
    self.enqueue(self.prepare(record))
  File "/usr/lib/python3.10/logging/handlers.py", line 1436, in enqueue
    self.queue.put_nowait(record)
  File "/usr/lib/python3.10/multiprocessing/queues.py", line 138, in put_nowait
    return self.put(obj, False)
  File "/usr/lib/python3.10/multiprocessing/queues.py", line 88, in put
    raise ValueError(f"Queue {self!r} is closed")
ValueError: Queue <multiprocessing.queues.Queue object at 0x7f679907a860> is closed
Call stack:
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.10/dist-packages/ipykernel_launcher.py", line 17, in <module>
    app.launch_ne