In [1]:
import ollama

def llm_generate(prompt, model="qwen2.5:14b", options={}):
    response = ollama.generate(model=model, prompt=prompt, raw=True, options=options)
    return response["response"]

def llm(messages, model="qwen2.5:14b"):
    response = ollama.chat(model=model, messages=messages)
    return response["message"]["content"]

In [2]:
PROMPT_QUESTION_GENERATION = """<|im_start|>system
You are a helpful assistant. The user ends its message with "<|im_end|>" suffix.<|im_end|>
<|im_start|>user
"""

def generate_prompt(model="qwen2.5:14b"):
    return llm_generate(PROMPT_QUESTION_GENERATION, model=model, options={"stop": ["<|im_end|>", "<|im_start|>"], "temperature": 1})

In [3]:
from tqdm.auto import tqdm

ds = []

for i in tqdm(range(100)):
    prompt = generate_prompt()
    responses = []
    for j in range(3):
        response = llm([{"role": "user", "content": prompt}])
        responses.append(response)
    ds.append({"prompt": prompt, "responses": responses})

  0%|          | 0/100 [00:00<?, ?it/s]

In [8]:
from reward import reward
rewards = reward(ds[0]["prompt"], ds[0]["responses"])
print(rewards)

[-0.244140625, -1.40625, -7.8125]


In [12]:
ds_with_rewards = []

for example in tqdm(ds):
    new_example = {
        "prompt": example["prompt"],
        "responses": example["responses"],
        "rewards": reward(example["prompt"], example["responses"]),
    }
    ds_with_rewards.append(new_example)

ds = ds_with_rewards

  0%|          | 0/100 [00:00<?, ?it/s]

In [14]:
print(ds[0]["prompt"])
print("##########################################################################")
print(ds[0]["responses"][0])
print("--- reward: ", ds[0]["rewards"][0])
print("--------------------------------------------------------------------------")
print(ds[0]["responses"][1])
print("--- reward: ", ds[0]["rewards"][1])
print("--------------------------------------------------------------------------")
print(ds[0]["responses"][2])
print("--- reward: ", ds[0]["rewards"][2])
print("--------------------------------------------------------------------------")

What is the significance of the 13 stripes on the American flag?
##########################################################################
The 13 stripes on the American flag represent the original 13 colonies that declared their independence from Great Britain and became the first states in the United States. These colonies were:

1. Delaware
2. Pennsylvania
3. New Jersey
4. Georgia
5. Connecticut
6. Massachusetts
7. Maryland
8. South Carolina
9. New Hampshire
10. Virginia
11. New York
12. North Carolina
13. Rhode Island

The red and white stripes symbolize the struggle and unity of these colonies against British rule, with each stripe representing one of the original states. The alternating pattern of red and white is a traditional design for flags, but in this context, it also serves as a visual reminder of the historical union and the principles upon which the United States was founded.

The flag's design has evolved over time to reflect changes within the country, such as the add

In [33]:
preference_ds = []

for example in ds:
    chosen = example["responses"][example["rewards"].index(max(example["rewards"]))]
    rejected = example["responses"][example["rewards"].index(min(example["rewards"]))]

    preference_ds.append({
            "prompt": example["prompt"],
            "chosen": chosen,
            "rejected": rejected,
            "chosen_score": example["rewards"][example["rewards"].index(max(example["rewards"]))],
            "rejected_score": example["rewards"][example["rewards"].index(min(example["rewards"]))],
    })

In [37]:
from datasets import Dataset

ds = Dataset.from_list(preference_ds)
ds

Dataset({
    features: ['prompt', 'chosen', 'rejected', 'chosen_score', 'rejected_score'],
    num_rows: 100
})

In [38]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [39]:
ds.push_to_hub("OpenEndedLM/OpenEndReward-v0.1")

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

CommitInfo(commit_url='https://huggingface.co/datasets/OpenEndedLM/OpenEndReward-v0.1/commit/6b79537761c36b4650ede1a4b14be3466a56ce58', commit_message='Upload dataset', commit_description='', oid='6b79537761c36b4650ede1a4b14be3466a56ce58', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/OpenEndedLM/OpenEndReward-v0.1', endpoint='https://huggingface.co', repo_type='dataset', repo_id='OpenEndedLM/OpenEndReward-v0.1'), pr_revision=None, pr_num=None)