In [1]:
import argilla as rg 
localuser = "argilla"
password = "1234"
apikey = "argilla.apikey"
url = "http://aicontroller:6900/"

rg.init(api_key=apikey,
        api_url=url)

This may lead to potential compatibility issues during your experience.
To ensure a seamless and optimized connection, we highly recommend aligning your client version with the server version.


In [3]:
localgilla = "localgilla"
rg.set_workspace(localgilla)

In [4]:
# creating the dataset as a framework

dataset_fw = rg.FeedbackDataset(
    guidelines="Please read the prompt carefully",
    questions=[
        rg.TextQuestion(
            name="prompt",
            title="Please write a harmless reply",
            required=True,
        )
    ],
    fields=[
        rg.TextField(name="prompt", required=True)
    ]
)

In [None]:
# there are following ways to collect the datasets 

# The steps here can include: 
# (1) finding an open dataset that might contain prompts related to your use 
# case
# (2) performing** exploratory data** analysis and topic extraction** to understand
#  the data
# (3) filtering and selecting prompts based on topic, quality,
# text descriptiveness, etc.
# (4) Asking humans to write prompts for your usecase

In [5]:
# this will be populated from the list of writing topics you create
fields = [
    rg.TextField(name="writing-topic", required=True)
]

# we will ask the labeler to write a possible prompt or instruction
question = rg.TextQuestion(
	name="prompt",
	title="Imagine and write a possible instruction for the given topic:",
	required=True
)

In [6]:
from datasets import load_dataset

prompts = load_dataset("HuggingFaceH4/mt_bench_prompts", split="train")

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
prompts[0]

In [7]:
records = [
    rg.FeedbackRecord(fields={"prompt": rek['prompt'][0]}) for rek in prompts
]
records
dataset_fw.add_records(records)

In [8]:
# This publishes the dataset with its records to Argilla and returns the dataset in Argilla
remote_dataset = dataset_fw.push_to_argilla(name="rlhf_demo", workspace=localgilla)

In [9]:
# Assume we distribute the workload in one dataset with several labelers
feedback_five = rg.FeedbackDataset.from_argilla(
	name="rlhf_demo",
	workspace=localgilla
)

In [None]:
feedback_five

In [None]:
feedback_five.filter_by(response_status="submitted")

In [10]:
### Create the datasets to rank the responses

questions = [
    rg.RankingQuestion(
        name="response_ranking",
        title="order the responses based on their accuracy & helpfulness",
        required=True,
        values={"res1":"Nice", "res2": "Okay"}
    )
]

In [11]:
question = [
    rg.RatingQuestion(
        name="rate_resp",
        title="Select accurate response between (2) and (3). If same then select (1).",
        required=True,
        values=[1, 2, 3]
    )
]

In [12]:
response_collect_ds = rg.FeedbackDataset(
    guidelines="Please read prompt, its response below and provide feedback",
    questions=question,
    fields=[
        rg.TextField(name="prompt1", required=True),
        rg.TextField(name="response1", required=True),
        rg.TextField(name="response2", required=True),
    ]
)

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch

# Load the model and tokenizer
model = AutoModelForCausalLM.from_pretrained("google/gemma-2b")
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b")

In [None]:

# Create a pipeline for text generation
gen_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

records = []
for record in prompts:
    prompt = record["prompt"]

    # Generate two responses in one call
    outputs = gen_pipeline(
        prompt,
        max_length=100,
        do_sample=True,
        top_k=10,
        num_return_sequences=2,
        eos_token_id=tokenizer.eos_token_id,
    )
    responses = [output["generated_text"] for output in outputs]

    record = rg.FeedbackRecord(fields={"prompt": prompt, "response 1": responses[0], "response 2": responses[1]})
    records.append(record)

# Add records to the dataset
response_collect_ds.add_records(records)

In [None]:
to_rem_ds = response_collect_ds.push_to_argilla(name="response_collect", workspace="argilla")

In [None]:
feedback_ds = rg.FeedbackDataset.from_argilla(
        name="response_collect",
        workspace="argilla"
    )

In [None]:
# Define an empty list to store the triplets
triplets = []

# Loop over all records in the dataset
for record in feedback_ds.records:
    # Ensure that the record has responses
    if record.responses is None or len(record.responses) == 0:
        continue

    # Ensure the response has been submitted (not discarded)
    response = record.responses[0]

    if response.status == 'submitted':
        # Get the ranking value from the response for the preferred and least preferred
        # responses, assuming there are no ties
        preferred_rank = response.values["response_ranking"].value[0]["value"]
        least_preferred_rank = response.values["response_ranking"].value[1]["value"]

        # Construct the triplet and append to the list
        triplets.append({
            "prompt": record.fields["prompt"],
            "preferred_response": record.fields[preferred_rank],
            "least_preferred_response": record.fields[least_preferred_rank],
        })

# Now, "triplets" is a list of dictionaries, each containing a prompt and the associated
# preferred and less preferred responses