## Donwloading the Data

In [45]:
import os 
from openai import OpenAI
from dotenv import load_dotenv
from datasets import load_dataset
import verifiers as vf


load_dotenv()

client = OpenAI(base_url = os.getenv("DEEPINFRA_API_LINK"), api_key = os.getenv("DEEPINFRA_API_KEY"));

dataset = load_dataset('willcb/V3-wordle', split = "train",  cache_dir=None).map(lambda x: {'question': x['answer'], 'answer': "".join(sorted(x['answer']))})

dataset = dataset.remove_columns([c for c in dataset.column_names if c not in ['question', 'answer']]) #type: ignore

## REMOVE THIS TO GENERATE DATA FROM THE ENTIRE DATASET
dataset = dataset.select(range(10)) #type:ignore

print(dataset)

Dataset({
    features: ['answer', 'question'],
    num_rows: 1000
})


In [46]:
## View the Datasets
dataset[0]

{'answer': 'aelnp', 'question': 'plane'}

In [47]:
## Setting the Parsers, Rubrics and Environment

parser = vf.XMLParser(['think', 'answer'], answer_field="answer")

system_prompt = f"""Respond in the following format:
{parser.get_format_str()}

Sort the string lexographically without using code and give your final answer (the sorted letters) inside <answer></answer> tags"""

def sort_reward_func(completion, answer, **kwargs) -> float:
    """
    Check if the completion is sorted    
    """
    
    return 1.0 if parser.parse_answer(completion) == answer else 0.0


rubric = vf.Rubric(funcs=[
    sort_reward_func,
    parser.get_format_reward_func(),
], weights=[1.0, 0.2])


vf_env = vf.SingleTurnEnv(
    dataset=dataset, 
    eval_dataset=dataset,
    system_prompt=system_prompt,
    parser=parser,
    rubric=rubric,
    max_concurrent=100
)

Map (num_proc=32): 100%|██████████| 1000/1000 [00:00<00:00, 3780.99 examples/s]


In [None]:
## Sanity Check with API Client
response = client.chat.completions.create(
    model = "deepseek-ai/DeepSeek-V3-0324", 
    messages = [{"role": "system", "content": system_prompt}, 
    {"role": "user", "content": "place"}]
)

completion = response.choices[0].message.content

print(completion)

<think>
To sort the string "place" lexicographically (which is similar to alphabetical order), we'll need to arrange its letters in order from smallest to largest based on their Unicode/ASCII values. 

First, let's list the letters in "place" and their corresponding ASCII values:
- 'p' → 112
- 'l' → 108
- 'a' → 97
- 'c' → 99
- 'e' → 101

Now, let's sort these ASCII values in ascending order:
1. 'a' (97)
2. 'c' (99)
3. 'e' (101)
4. 'l' (108)
5. 'p' (112)

So, the sorted order of the letters is 'a', 'c', 'e', 'l', 'p'.
</think>
<answer>acehp</answer>


In [35]:
# Sanity Check with Parser and Rubrics
answer = parser.parse_answer([{"role": "assistant", "content": completion}]) #type: ignore

print(answer)

reward = await rubric.score_rollout(system_prompt, [{"role": "assistant", "content": completion}], "acehp", []) #type: ignore

print(reward)

acehp
{'sort_reward_func': 1.0, 'format_reward_func': 1.0, 'reward': 1.2}


In [48]:
results = vf_env.evaluate(client, model="deepseek-ai/DeepSeek-V3-0324", num_samples = 1000, max_concurrent = 128)

Running 1000 rollouts: 100%|██████████| 1000/1000 [01:21<00:00, 12.33it/s]
Evaluating 1000 rollouts: 100%|██████████| 1000/1000 [00:00<00:00, 1808.67it/s]


In [39]:
for k,v in results.items():
    print(k)
    print(v)

answer
['aelnp', 'aegnr', 'ceipr', 'elops', 'ainpt', 'ceimr', 'aelnp', 'cdlou', 'inopt', 'ainpt']
question
['plane', 'range', 'price', 'slope', 'paint', 'crime', 'plane', 'cloud', 'point', 'paint']
prompt
[[{'content': 'Respond in the following format:\n<think>\n...\n</think>\n<answer>\n...\n</answer>\n\nSort the string lexographically without using code and give your final answer (the sorted letters) inside <answer></answer> tags', 'role': 'system'}, {'content': 'plane', 'role': 'user'}], [{'content': 'Respond in the following format:\n<think>\n...\n</think>\n<answer>\n...\n</answer>\n\nSort the string lexographically without using code and give your final answer (the sorted letters) inside <answer></answer> tags', 'role': 'system'}, {'content': 'range', 'role': 'user'}], [{'content': 'Respond in the following format:\n<think>\n...\n</think>\n<answer>\n...\n</answer>\n\nSort the string lexographically without using code and give your final answer (the sorted letters) inside <answer></

In [49]:
dataset_dsv3 = vf_env.make_dataset(results)

In [50]:
dataset_dsv3

Dataset({
    features: ['prompt', 'completion', 'answer', 'reward', 'task'],
    num_rows: 1000
})

In [51]:
dataset_dsv3 = dataset_dsv3.sort("reward", reverse=True)

In [52]:
dataset_dsv3.push_to_hub("V3-lexo-sort")

Creating parquet from Arrow format: 100%|██████████| 1/1 [00:00<00:00, 19.55ba/s]
Uploading the dataset shards: 100%|██████████| 1/1 [00:01<00:00,  1.16s/it]


CommitInfo(commit_url='https://huggingface.co/datasets/vijay-ravichander/V3-lexo-sort/commit/2237e418fef0c9dcd6eeb64f09481fcdaeed2ad0', commit_message='Upload dataset', commit_description='', oid='2237e418fef0c9dcd6eeb64f09481fcdaeed2ad0', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/vijay-ravichander/V3-lexo-sort', endpoint='https://huggingface.co', repo_type='dataset', repo_id='vijay-ravichander/V3-lexo-sort'), pr_revision=None, pr_num=None)