In [1]:
import ollama

def llm_generate(prompt, model="qwen2.5:14b", options={}):
    response = ollama.generate(model=model, prompt=prompt, raw=True, options=options)
    return response["response"]

def llm(messages, model="qwen2.5:14b"):
    response = ollama.chat(model=model, messages=messages)
    return response["message"]["content"]

In [2]:
PROMPT_QUESTION_GENERATION = """<|im_start|>system
You are a helpful assistant. The user ends its message with "<|im_end|>" suffix.<|im_end|>
<|im_start|>user
"""

def generate_prompt(model="qwen2.5:14b"):
    return llm_generate(PROMPT_QUESTION_GENERATION, model=model, options={"stop": ["<|im_end|>", "<|im_start|>"], "temperature": 1})

In [None]:
from tqdm.auto import tqdm

ds = []

for i in tqdm(range(2000)):
    prompt = generate_prompt()
    responses = []
    for j in range(3):
        response = llm([{"role": "user", "content": prompt}])
        responses.append(response)
    ds.append({"prompt": prompt, "responses": responses})

In [None]:
import json
len(ds)
with open("ds.json", "w") as f:
    f.write(json.dumps(ds))

In [4]:
ds = json.loads(open("ds.json").read())

In [None]:
from reward import reward
rewards = reward(ds[0]["prompt"], ds[0]["responses"])
print(rewards)

In [None]:
from tqdm.auto import tqdm

ds_with_rewards = []

for example in tqdm(ds):
    new_example = {
        "prompt": example["prompt"],
        "responses": example["responses"],
        "rewards": reward(example["prompt"], example["responses"]),
    }
    ds_with_rewards.append(new_example)

ds = ds_with_rewards

In [None]:
print(ds[0]["prompt"])
print("##########################################################################")
print(ds[0]["responses"][0])
print("--- reward: ", ds[0]["rewards"][0])
print("--------------------------------------------------------------------------")
print(ds[0]["responses"][1])
print("--- reward: ", ds[0]["rewards"][1])
print("--------------------------------------------------------------------------")
print(ds[0]["responses"][2])
print("--- reward: ", ds[0]["rewards"][2])
print("--------------------------------------------------------------------------")

In [10]:
preference_ds = []

for example in ds:
    best = example["responses"][example["rewards"].index(max(example["rewards"]))]

    preference_ds.append({
            "prompt": example["prompt"],
            "response": best,
    })

In [None]:
from datasets import Dataset

ds = Dataset.from_list(preference_ds)
ds

In [None]:
from huggingface_hub import notebook_login
notebook_login()

In [None]:
ds.push_to_hub("OpenEndedLM/OpenEnded-v0.2")