In [1]:
import argilla as rg 
localuser = "argilla"
password = "1234"
apikey = "argilla.apikey"
url = "http://aicontroller:6900/"

rg.init(api_key=apikey,
        api_url=url)



In [2]:
localgilla = "localgilla"
rg.set_workspace(localgilla)

In [3]:
# creating the dataset as a framework

dataset_fw = rg.FeedbackDataset(
    guidelines="Please read the prompt carefully",
    questions=[
        rg.TextQuestion(
            name="prompt",
            title="Please write a harmless reply",
            required=True,
        )
    ],
    fields=[
        rg.TextField(name="prompt", required=True)
    ]
)

In [None]:
# there are following ways to collect the datasets 

# The steps here can include: 
# (1) finding an open dataset that might contain prompts related to your use 
# case
# (2) performing** exploratory data** analysis and topic extraction** to understand
#  the data
# (3) filtering and selecting prompts based on topic, quality,
# text descriptiveness, etc.
# (4) Asking humans to write prompts for your usecase

In [17]:
# this will be populated from the list of writing topics you create
fields = [
    rg.TextField(name="writing-topic", required=True)
]

# we will ask the labeler to write a possible prompt or instruction
question = rg.TextQuestion(
	name="prompt",
	title="Imagine and write a possible instruction for the given topic:",
	required=True
)

In [4]:
from datasets import load_dataset

prompts = load_dataset("HuggingFaceH4/mt_bench_prompts", split="train")

In [None]:
prompts[0]

In [7]:
records = [
    rg.FeedbackRecord(fields={"prompt": rek['prompt'][0]}) for rek in prompts
]
records
dataset_fw.add_records(records)

In [8]:
# This publishes the dataset with its records to Argilla and returns the dataset in Argilla
remote_dataset = dataset_fw.push_to_argilla(name="rlhf_demo", workspace=localgilla)

In [6]:
# Assume we distribute the workload in one dataset with several labelers
feedback_five = rg.FeedbackDataset.from_argilla(
	name="rlhf_demo",
	workspace=localgilla
)

In [7]:
feedback_five

RemoteFeedbackDataset(
   id=480681ab-0a8b-48dc-ac20-65672ddb0973
   name=rlhf_demo
   workspace=Workspace(id=d56c5067-b7d6-46ac-a5e1-759124e3542e, name=localgilla, inserted_at=2024-03-30 11:11:30.153391, updated_at=2024-03-30 11:11:30.153391)
   url=http://aicontroller:6900/dataset/480681ab-0a8b-48dc-ac20-65672ddb0973/annotation-mode
   fields=[RemoteTextField(id=UUID('5a2e15e7-32c5-42a8-bcfe-016d9fcd61a5'), client=None, name='prompt', title='Prompt', required=True, type='text', use_markdown=False)]
   questions=[RemoteTextQuestion(id=UUID('dd696635-556f-4314-9c91-3742736cc6d4'), client=None, name='prompt', title='Please write a harmless reply', description=None, required=True, type='text', use_markdown=False)]
   guidelines=Please read the prompt carefully
   metadata_properties=[]
   vectors_settings=[]
)

In [8]:
feedback_five.filter_by(response_status="submitted")

RemoteFeedbackDataset(
   id=480681ab-0a8b-48dc-ac20-65672ddb0973
   name=rlhf_demo
   workspace=Workspace(id=d56c5067-b7d6-46ac-a5e1-759124e3542e, name=localgilla, inserted_at=2024-03-30 11:11:30.153391, updated_at=2024-03-30 11:11:30.153391)
   url=http://aicontroller:6900/dataset/480681ab-0a8b-48dc-ac20-65672ddb0973/annotation-mode
   fields=[RemoteTextField(id=UUID('5a2e15e7-32c5-42a8-bcfe-016d9fcd61a5'), client=None, name='prompt', title='Prompt', required=True, type='text', use_markdown=False)]
   questions=[RemoteTextQuestion(id=UUID('dd696635-556f-4314-9c91-3742736cc6d4'), client=None, name='prompt', title='Please write a harmless reply', description=None, required=True, type='text', use_markdown=False)]
   guidelines=Please read the prompt carefully
   metadata_properties=[]
   vectors_settings=[]
)

In [9]:
### Create the datasets to rank the responses

questions = [
    rg.RankingQuestion(
        name="response_ranking",
        title="order the responses based on their accuracy & helpfulness",
        required=True,
        values={"res1":"Nice", "res2": "Okay"}
    )
]

In [5]:
question = [
    rg.RatingQuestion(
        name="rate_resp",
        title="Select accurate response between (2) and (3). If same then select (1).",
        required=True,
        values=[1, 2, 3]
    )
]

In [6]:
response_collect_ds = rg.FeedbackDataset(
    guidelines="Please read prompt, its response below and provide feedback",
    questions=question,
    fields=[
        rg.TextField(name="prompt1", required=True),
        rg.TextField(name="response1", required=True),
        rg.TextField(name="response2", required=True),
    ]
)

In [7]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch

# Load the model and tokenizer
model = AutoModelForCausalLM.from_pretrained("google/gemma-2b-it",
                                             resume_download=True,
                                            device_map="cuda")
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it")

Gemma's activation function should be approximate GeLU and not exact GeLU.
Changing the activation function to `gelu_pytorch_tanh`.if you want to use the legacy `gelu`, edit the `model.config` to set `hidden_activation=gelu`   instead of `hidden_act`. See https://github.com/huggingface/transformers/pull/29402 for more details.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [8]:
# Create a pipeline for text generation
gen_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    # device='cuda',
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

In [25]:
for rec in prompts:
    print(rec['prompt'])

['Compose an engaging travel blog post about a recent trip to Hawaii, highlighting cultural experiences and must-see attractions.', 'Rewrite your previous response. Start every sentence with the letter A.']
["Draft a professional email seeking your supervisor's feedback on the 'Quarterly Financial Report' you prepared. Ask specifically about the data analysis, presentation style, and the clarity of conclusions drawn. Keep the email short and to the point.", 'Take a moment to evaluate and critique your own response.']
['Imagine you are writing a blog post comparing two popular smartphone models. Develop an outline for the blog post, including key points and subheadings to effectively compare and contrast the features, performance, and user experience of the two models. Please answer in fewer than 200 words.', 'Take your previous response and rephrase it as a limerick.']
['Write a persuasive email to convince your introverted friend, who dislikes public speaking, to volunteer as a guest 

In [12]:
records = []
for record in prompts:
    prompt = record["prompt"]
    # print(f'This is prompt: {prompt}')
    # Generate two responses in one call
    outputs = gen_pipeline(
        prompt,
        max_length=512,
        do_sample=True,
        top_k=10,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
    )
    # print(outputs)
    responses = [output[0]["generated_text"] for output in outputs]
    # print(f"This is response: {responses}")
    try:
        record = rg.FeedbackRecord(fields={"prompt1": prompt[0],
                                           "response1": responses[0],
                                           "response2": responses[1]})
        response_collect_ds.add_records([record])
    except Exception as e:
        print(f"The prompt {prompt} created error due to : {e}")

# Add records to the dataset

In [13]:
to_rem_ds = response_collect_ds.push_to_argilla(name="response_collect",
                                                workspace=localgilla)

Output()

In [15]:
feedback_ds = rg.FeedbackDataset.from_argilla(
        name="response_collect",
        workspace=localgilla
    )

In [27]:
# Define an empty list to store the triplets
triplets = []

# Loop over all records in the dataset
for record in feedback_ds.records:
    print(record.fields)
    # Ensure that the record has responses
    if record.responses is None or len(record.responses) == 0:
        continue

    # Ensure the response has been submitted (not discarded)
    # print(len(record.responses))
    response = record.responses[0]
    print(response)
    if response.status == 'submitted':
        print(response.values['rate_resp'])
        # Get the ranking value from the response for the preferred and least preferred
        # responses, assuming there are no ties
        preferred_rank = response.values["rate_resp"].value
        # least_preferred_rank = response.values["response_ranking"].value[1]["value"]

        # Construct the triplet and append to the list
        triplets.append({
            "prompt": record.fields["prompt1"],
            "preferred_response": preferred_rank #  record.fields[preferred_rank],
            # "least_preferred_response": record.fields[least_preferred_rank],
        })

# Now, "triplets" is a list of dictionaries, each containing a prompt and the associated
# preferred and less preferred responses

{'prompt1': 'Compose an engaging travel blog post about a recent trip to Hawaii, highlighting cultural experiences and must-see attractions.', 'response1': 'Compose an engaging travel blog post about a recent trip to Hawaii, highlighting cultural experiences and must-see attractions.', 'response2': 'Rewrite your previous response. Start every sentence with the letter A.'}
{'prompt1': "Draft a professional email seeking your supervisor's feedback on the 'Quarterly Financial Report' you prepared. Ask specifically about the data analysis, presentation style, and the clarity of conclusions drawn. Keep the email short and to the point.", 'response1': "Draft a professional email seeking your supervisor's feedback on the 'Quarterly Financial Report' you prepared. Ask specifically about the data analysis, presentation style, and the clarity of conclusions drawn. Keep the email short and to the point.", 'response2': 'Take a moment to evaluate and critique your own response.'}
{'prompt1': 'Imagi

In [28]:
triplets

[{'prompt': 'Describe a vivid and unique character, using strong imagery and creative language. Please answer in fewer than two paragraphs.',
  'preferred_response': 3},
 {'prompt': 'Given that f(x) = 4x^3 - 9x - 14, find the value of f(2).',
  'preferred_response': 3},
 {'prompt': 'How do the stages of life shape our understanding of time and mortality?',
  'preferred_response': 3}]