# Example Usage of Evolving Instructions on Math/Coding/Alignment Data

In [35]:
import os
import logging

from camel.logger import enable_logging, set_log_level, get_logger
os.environ["CAMEL_LOGGING_DISABLED"] = "false" 

enable_logging()  
set_log_level(logging.INFO)  

logger = get_logger("evol-instruct")
logger.info("let's evolve some ideas.")

2025-03-08 15:54:43,029 - camel.evol-instruct - INFO - let's evolve some ideas.


In [2]:
import os
from getpass import getpass

openai_api_key = getpass('Enter your API key: ')
os.environ["OPENAI_API_KEY"] = openai_api_key

## HuggingFace Dataset Preparation

In [None]:
from datasets import load_dataset  # !pip install datasets

# note: it is better to write specific ins template for each ability
# for now we will just use the default ins template in `camel/datagem/templates.py`
dataset = load_dataset("PRIME-RL/Eurus-2-RL-Data")
coding_dataset = dataset["train"].filter(lambda example: example["ability"] == "code")
math_dataset = dataset["train"].filter(lambda example: example["ability"] == "math")
alignment_dataset = load_dataset("openbmb/UltraFeedback")["train"]

def extract_prompts(
    dataset, 
    prompt_field="prompt", 
    role_field="role", 
    content_field="content", 
    user_role="user"
):
    """A helper function for the eurus dataset to extract user prompts."""
    user_prompts = [
        [entry[content_field] for entry in prompt if entry[role_field] == user_role]
        for prompt in dataset[prompt_field]
    ]
    
    return [content for sublist in user_prompts for content in sublist]

coding_prompts = extract_prompts(coding_dataset)
math_prompts = extract_prompts(math_dataset)
alignment_prompts = alignment_dataset["instruction"]

In [26]:
# Get 5 random prompts from each category in this example 
import random 
random.seed(8964)  # for reproducibility
coding_prompts_list = sorted(random.sample(coding_prompts, 5), key=len)
math_prompts_list = sorted(random.sample(math_prompts, 5), key=len)
alignment_prompts_list = sorted(random.sample(alignment_prompts, 5), key=len)

# take a look
print(f'> ultrafeedback:\n{alignment_prompts_list[0]}\n')  
print(f'> eurus-math:\n{math_prompts_list[0]}\n')
print(f'> eurus-coding:\n{repr(coding_prompts_list[0])}\n')

> ultrafeedback:
List five of the essential components for making a good cup of coffee.

> eurus-math:
How far apart are the midpoints of two skew edges of a regular octahedron with edge length $a$?

Present the answer in LaTex format: \boxed{Your answer}

> eurus-coding:
'Read problem statements in [Mandarin], [Vietnamese], and [Russian] as well.\n\nChef loves Chess and has thus invented a new piece named "Disabled King".\n\nLet\'s denote the cell at the intersection of the i-th column from the left and j-th row from the top by (i, j).\n\nIf he is currently in cell (x,y), the disabled king can move to the following positions in one move (provided that he remains in the chessboard): \n\n(x,y+1)\n(x,y-1)\n(x+1,y+1)\n(x+1,y-1)\n(x-1,y+1)\n(x-1,y-1)\n\nIn short, the Disabled King cannot move horizontally.\n\nIn an N \\times N chessboard, the Disabled King is currently situated at the top-left corner (cell (1, 1)) and wants to reach the top-right corner (cell (N, 1)). Determine the minimum

## CAMEL Agent Preparation

In [None]:
from camel.models import ModelFactory
from camel.types import ModelPlatformType, ModelType
from camel.agents import ChatAgent

model = ModelFactory.create(
    model_platform=ModelPlatformType.OPENAI,
    model_type=ModelType.GPT_4O_MINI,
    model_config_dict={
        "temperature": 0.7,
        "max_tokens": 2048,
    },
)

system_msg = "You are a creative agent for creating new prompts."

agent = ChatAgent(system_msg, model=model)

## Pipeline for Evolving Instructions

In [None]:
from camel.datagen.evol_instruct import EvolInstructPipeline

pipeline = EvolInstructPipeline(agent=agent)

### Example 0.0: Single Prompt Evolving

In [39]:
set_log_level(logging.CRITICAL)

In [41]:
prompt_single = math_prompts_list[0]

result_single = pipeline._generate_single(
    prompt=prompt_single,
    method="in-depth",
    return_method=True,  # for test purpose
)

prompt, method = result_single
print(f"--- Original ---\n{prompt_single}\n")
print(f"--- Method: `{method}` ---\n{prompt}")

--- Original ---
How far apart are the midpoints of two skew edges of a regular octahedron with edge length $a$?

Present the answer in LaTex format: \boxed{Your answer}

--- Method: `expansion` ---
In the context of geometric properties and spatial reasoning, determine the Euclidean distance between the midpoints of two non-parallel edges of a regular octahedron, considering an edge length denoted as $a$. Additionally, provide a comparative analysis by exploring how varying the edge length influences the distance between these midpoints, and present your findings in a structured format using LaTeX, specifically: \boxed{Your answer}.


### Example 0.1: Single Prompt Evolving -- Batch Generation

In [42]:
results_multiple = pipeline._generate_multiple(
    prompt=prompt_single,
    method="in-depth",
    num_generations=2,
    keep_original=False,
)

print(f"--- Original ---\n{prompt_single}\n")
for result in results_multiple:
    prompt, method = result
    print(f"--- Evolved by `{method}` ---\n{prompt}\n")

--- Original ---
How far apart are the midpoints of two skew edges of a regular octahedron with edge length $a$?

Present the answer in LaTex format: \boxed{Your answer}

--- Evolved by `constraints` ---
Determine the distance between the midpoints of two skew edges of a regular octahedron, given that the octahedron has an edge length denoted as $a$. However, assume that the edge length is subject to a scaling factor $k$, which alters the length to $ka$. Furthermore, provide a detailed explanation of the geometric relationships involved, and present the final answer in LaTeX format: \boxed{Your answer}.

--- Evolved by `concretizing` ---
What is the precise distance between the centroids of two non-intersecting edges of a regular octahedron characterized by an edge length of \( a \)? 

Please provide the solution in LaTeX format as follows: \boxed{Your answer}



### Example 0.2: Single Prompt Evolving -- Iterative Batch Generation

In [44]:
# set the number of evolutions and generations
num_evolutions = 4
num_generations = 2

# set the method for each iteration 
# (you can only just use strings not the dict, like 'in-depth', 'in-breadth', 'uniform', if you are lazy)
method_dict = {0: 'in-breadth', 1: 'in-depth', 2: 'in-depth', 3: 'in-breadth'}
assert len(method_dict) == num_evolutions

In [45]:
results_iter = pipeline._generate_iter(
    prompt=prompt_single,
    method=method_dict,
    num_evolutions=num_evolutions,
    num_generations=num_generations,
    keep_original=False,
)

In [46]:
for iter, results in results_iter.items():
    print(f"=========== Iteration {iter} ===========")
    for result in results:
        prompt, method = result
        print(f"--- `{method}` ---\n{prompt}\n")

--- `shift-out` ---
What is the distance between the centers of two non-parallel faces of a regular dodecahedron with edge length \( b \)?

Present the answer in LaTeX format: \boxed{Your answer}

--- `mix` ---
What is the distance between the centers of two non-parallel sides of a triangular prism with a base edge length of $b$?

Present the answer in LaTex format: \boxed{Your answer}

--- `abstract` ---
Calculate the length separating the midpoints of two non-intersecting surfaces of a regular polyhedron defined by edge length \( x \). 

Present the answer in LaTeX format: \boxed{Your answer}

--- `shift-out` ---
What is the height of a cylindrical water tank with a radius \( r \) that is filled to a depth of \( d \)?

Present the answer in LaTeX format: \boxed{Your answer}

--- `abstract` ---
Determine the distance between the centroids of two non-overlapping geometric shapes, characterized by a uniform measurement \( y \). 

Express your result in LaTeX format: \boxed{Your answer}


### Example 1: Dataset Evolving -- Iterative Batch Generation

In [None]:
# use the math dataset as an example
prompts = math_prompts_list  # or prompts = json.load(open("input.json", "r", encoding="utf-8"))

# set the parameters
method = {0: 'in-breadth', 1: 'in-depth', 2: 'in-depth', 3: 'in-breadth'}
num_evolutions = 4
num_generations = 2
keep_original = True
scorer = "uniform"
num_chunks = 1
retry_limit = 3
retry_delay = 30

In [51]:
results = pipeline.generate(
    prompts=prompts,
    method=method_dict,
    num_generations=num_generations,
    num_evolutions=num_evolutions,
    keep_original=keep_original,
    num_chunks=num_chunks, 
    retry_limit=retry_limit,
    retry_delay=retry_delay,
)

In [53]:
# Show results
for idx, iter_results in enumerate(results):
    print(f"============ Results for Prompt {idx + 1} ============")
    original_prompt = prompts[idx]
    print(f"Original Prompt:\n{original_prompt}\n")

    for i, result in iter_results.items():
        print(f"------------ Iteration {i} ------------")
        for evolved_prompt, method in result:
            if method != "original":
                print(f"--- Method: {method_dict[i]} ({method}) ---\nEvolved Prompt: {evolved_prompt}\n")

    print("\n\n")

Original Prompt:
How far apart are the midpoints of two skew edges of a regular octahedron with edge length $a$?

Present the answer in LaTex format: \boxed{Your answer}

------------ Iteration 0 ------------
--- Method: in-breadth (abstract) ---
Evolved Prompt: Determine the distance between the centroids of two non-parallel edges in a three-dimensional polyhedron with equal edge lengths. 

Present the answer in LaTeX format: \boxed{Your answer}

--- Method: in-breadth (persona) ---
Evolved Prompt: In a geometric puzzle involving a regular octahedron with each edge measuring $a$, can you calculate the distance between the midpoints of two non-intersecting edges? Please provide your solution in LaTeX format: \boxed{Your answer}

------------ Iteration 1 ------------
--- Method: in-depth (shift-in) ---
Evolved Prompt: In a geometric challenge featuring a regular tetrahedron with each edge measuring $b$, can you determine the length of the line segment that connects the centroids of two 

In [None]:
# save the results to a json file
import json

def save_results_to_json(results, filename="results.json"):
    with open(filename, mode="w", encoding="utf-8") as file:
        json.dump(results, file, indent=4, ensure_ascii=False)

save_results_to_json(results)