In [4]:
from datasets import load_dataset, Dataset
import anthropic
import dotenv
import os

In [8]:
problems = load_dataset("immindich/math500-reasoning-steps")

print(problems)

def steps_labeled(problem):
    steps = []
    for i, step in enumerate(problem["steps"]):
        steps.append(f"[{i}] {step}\n")
    return "\n".join(steps)

def is_completed(problem):
    return problem["model_answer"] != ""

problems_filtered = [problem for problem in problems["test"] if is_completed(problem)]

DatasetDict({
    test: Dataset({
        features: ['problem', 'solution', 'answer', 'subject', 'level', 'unique_id', 'prompt', 'steps', 'model_answer', 'raw_response'],
        num_rows: 500
    })
})


In [9]:
dotenv.load_dotenv()
import os

client = anthropic.Anthropic(
    # defaults to os.environ.get("ANTHROPIC_API_KEY")
    api_key=os.environ.get("ANTHROPIC_API_KEY"),
)

In [10]:
from anthropic.types.message_create_params import MessageCreateParamsNonStreaming
from anthropic.types.messages.batch_create_params import Request

# create batch of requests
# Create a batch of requests for all problems in the dataset
batch_requests = []

for i, problem in enumerate(problems_filtered):
    steps = steps_labeled(problem)
    
    user_prompt = f"""
    Attached to this prompt is the output of a language model which reasons step by step by step. The individual steps are labeled with numbers in brackets. Please identify the following two types of steps.
    
    1. Verification steps. In these steps, the model has just computed the answer, but now decides to check its work by computing the answer again, possibly using a different approach. You should only identify steps that clearly start in a way that indicates they are doing this, for example steps that start with "Wait, but let me make sure I'm not missing anything," or "Let me double-check my steps," or "Wait, to make sure".
    
    2. Conclusion steps. In these steps, the model concludes that it has figured out the final answer. These steps should start with something like "I think I'm confident".  The reasoning steps will typically end with a statement of the final answer, following the text "**Final Answer**". The conclusion steps should be before this text.
    
    Please output your analysis in the following JSON format.
    
    {{  "verification_steps": [2, 5], "conclusion_steps": [10] }}
    
    Do not respond with anything other than the JSON.
    
    Here are the steps of the reasoning
    
    {steps}
    """
    
    batch_requests.append(Request(
        custom_id = str(i),
        params=MessageCreateParamsNonStreaming(
            model="claude-3-7-sonnet-20250219",
            max_tokens=1000,
            temperature=1,
            system="You are a helpful assistant labeling data generated by a language model",
            messages=[
                {
                    "role": "user",
                    "content": [
                        {
                            "type": "text",
                            "text": "<examples>\n<example>\n<STEPS>\n[0] Alright, so I need to convert the rectangular coordinates (0, 3) to polar coordinates. Hmm, okay. Let me think about how to approach this. I remember that polar coordinates are represented as (r, θ), where r is the distance from the origin to the point, and θ is the angle made with the positive x-axis. \n\n[1] First, I think I should find r. I recall the formula for r is the square root of (x squared plus y squared). Since the given point is (0, 3), that means x is 0 and y is 3. So plugging those into the formula, r should be sqrt(0^2 + 3^2). Let me compute that: 0 squared is 0, 3 squared is 9, so 0 + 9 is 9. The square root of 9 is 3. So r is 3. That seems straightforward.\n\n[2] Now, onto θ. I remember that θ is the angle from the positive x-axis to the point. Since the point is (0, 3), it's located on the positive y-axis, right? So in the rectangular coordinate system, that would correspond to an angle of π/2 radians. Let me confirm that. In the unit circle, 0 radians is along the positive x-axis, π/2 is straight up along the positive y-axis, π is to the left along the negative x-axis, and 3π/2 is straight down along the negative y-axis. So yes, (0, 3) is at π/2.\n\n[3] Wait, but let me make sure I'm not missing anything. Sometimes, when dealing with negative r or angles, we have to consider the direction. But in this case, since r is positive, we don't have to worry about negative r. Also, since x is 0 and y is positive, it's in the first quadrant, which aligns with θ being π/2. \n\n[4] Is there another way to think about this? Maybe using trigonometry. The coordinates (0, 3) can be thought of as the point on the y-axis, 3 units above the origin. So in terms of polar coordinates, the radius is the length from the origin to that point, which we've already calculated as 3. The angle θ is the angle made with the positive x-axis, which is 90 degrees or π/2 radians. \n\n[5] Wait, let me visualize this. If I plot the point (0, 3) on a coordinate plane, it's directly above the origin. So, if I draw a line from the origin to that point, it forms a right angle with the x-axis. That angle is π/2, which is 90 degrees. \n\n[6] Is there any other angle that would also correspond to this point? Well, if I consider angles beyond π/2, like 3π/2, that would be pointing directly downward, which is (0, -3). But since our point is (0, 3), which is upwards, θ can't be 3π/2. So, π/2 is the correct angle.\n\n[7] Let me double-check my steps. First, I found r by using the Pythagorean theorem: sqrt(x^2 + y^2) = sqrt(0 + 9) = 3. That seems right. Then, for θ, since x is 0, that means the point is on the y-axis. So, the angle is either π/2 or 3π/2. But since the y-coordinate is positive, it's π/2. \n\n[8] I wonder if there's a formula for θ when x is 0. Maybe θ = arctan(y/x), but if x is 0, that would be arctan(infinity), which is undefined. So, in such cases, we have to know the position relative to the axes. Since x is 0 and y is positive, it's on the positive y-axis, so θ is π/2.\n\n[9] Alternatively, using the arctangent function might lead to an undefined result, but in this case, we can determine θ based on the quadrant. Since the point is on the positive y-axis, it's in the first quadrant (if we consider the positive x-axis as 0 radians). Wait, actually, in standard position, 0 radians is along the positive x-axis, and angles increase counterclockwise. So, the positive y-axis is π/2 radians.\n\n[10] Wait, to make sure, let me recall the unit circle. At 0 radians, it's (1, 0); at π/2, it's (0, 1); at π, it's (-1, 0); and at 3π/2, it's (0, -1). So, yes, (0, 3) is at π/2. \n\n[11] Another thought: if we consider the point (0, 3) in polar coordinates, it's at a distance of 3 from the origin, and the angle is π/2. So, r = 3, θ = π/2. \n\n[12] Is there a chance that I might have messed up the angle? For example, if x were negative or something, we'd have to adjust θ accordingly, but in this case, x is 0 and y is positive, so no issue.\n\n[13] Wait, just to think about another point: if the point was (0, -3), then θ would be 3π/2. But since it's (0, 3), θ is π/2. So, that makes sense.\n\n[14] Is there a way to represent this angle in a different range? Well, θ is given as 0 ≤ θ < 2π, so π/2 is within that range, so that's fine.\n\n[15] Let me also think about how to graph this in my mind. If I draw a circle with radius 3, the point (0, 3) would be at the top of the circle. So, that's 90 degrees from the x-axis. So, the angle is π/2. So, yeah, that seems correct.\n\n[16] I think I'm confident that r is 3 and θ is π/2. So, putting it all together, the polar coordinates are (3, π/2).\n\n[17] **Final Answer**\nThe polar coordinates are \\boxed{(3, \\dfrac{\\pi}{2})}.\n</STEPS>\n<ideal_output>\n{\n\"verification_steps\": [3, 7, 10],\n\"conclusion_steps\": [16]\n}\n</ideal_output>\n</example>\n</examples>\n\n"
                        },
                        {
                            "type": "text",
                            "text": user_prompt
                        }
                    ]
                }
            ]
        )
    ))

print(f"Created batch of {len(batch_requests)} requests")

Created batch of 347 requests


Pressing the run button on the next cell costs about $2.

In [11]:
batch = client.messages.batches.create(requests=batch_requests)

In [12]:
results = list(client.messages.batches.results(
    batch.id,
))

for result in results:
    if result.result.type != "succeeded":
        print('failure:', result.custom_id)

import json

# convert results into a list sorted by custom_id
results_sorted = list(map(lambda x: json.loads(x.result.message.content[0].text), sorted(results, key=lambda x: int(x.custom_id))))
print(len(results_sorted))

347


In [13]:
labeled_traces = []
for problem, result in zip(problems_filtered, results_sorted):
    example = {
        "problem": problem["prompt"],
        "steps": problem["steps"],
        "verification_steps": result["verification_steps"],
        "conclusion_steps": result["conclusion_steps"]
    }
    labeled_traces.append(example)

labeled_dataset = Dataset.from_list(labeled_traces)

In [14]:
from huggingface_hub import notebook_login

notebook_login()

labeled_dataset.push_to_hub("immindich/reasoning-steps-labeled")

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

Uploading the dataset shards:   0%|          | 0/1 [00:00<?, ?it/s]

Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

CommitInfo(commit_url='https://huggingface.co/datasets/immindich/reasoning-steps-labeled/commit/6dbd11f524427d53439ab5882fbc8e0b369d8d56', commit_message='Upload dataset', commit_description='', oid='6dbd11f524427d53439ab5882fbc8e0b369d8d56', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/immindich/reasoning-steps-labeled', endpoint='https://huggingface.co', repo_type='dataset', repo_id='immindich/reasoning-steps-labeled'), pr_revision=None, pr_num=None)