Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 42 additions & 37 deletions examples/function_minimization/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
import importlib.util
import numpy as np
import time
import multiprocessing
import concurrent.futures
import traceback
import signal


def run_with_timeout(func, args=(), kwargs={}, timeout_seconds=5):
Expand All @@ -22,31 +23,13 @@ def run_with_timeout(func, args=(), kwargs={}, timeout_seconds=5):
Returns:
Result of the function or raises TimeoutError
"""

def wrapper(queue, func, args, kwargs):
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
future = executor.submit(func, *args, **kwargs)
try:
result = func(*args, **kwargs)
queue.put(("success", result))
except Exception as e:
queue.put(("error", e))

queue = multiprocessing.Queue()
process = multiprocessing.Process(target=wrapper, args=(queue, func, args, kwargs))
process.start()
process.join(timeout=timeout_seconds)

if process.is_alive():
process.terminate()
process.join()
raise TimeoutError(f"Function timed out after {timeout_seconds} seconds")

if queue.empty():
raise TimeoutError("Function ended without returning a result")

status, result = queue.get()
if status == "error":
raise result
return result
result = future.result(timeout=timeout_seconds)
return result
except concurrent.futures.TimeoutError:
raise TimeoutError(f"Function timed out after {timeout_seconds} seconds")


def safe_float(value):
Expand Down Expand Up @@ -107,15 +90,27 @@ def evaluate(program_path):
# Run with timeout
result = run_with_timeout(program.run_search, timeout_seconds=5)

# Check if we got a tuple of 3 values
if not isinstance(result, tuple) or len(result) != 3:
# Handle different result formats
if isinstance(result, tuple):
if len(result) == 3:
x, y, value = result
elif len(result) == 2:
# Assume it's (x, y) and calculate value
x, y = result
# Calculate the function value since it wasn't returned
value = np.sin(x) * np.cos(y) + np.sin(x * y) + (x**2 + y**2) / 20
print(f"Trial {trial}: Got 2 values, calculated function value: {value}")
else:
print(
f"Trial {trial}: Invalid result format, expected tuple of 2 or 3 values but got {len(result)}"
)
continue
else:
print(
f"Trial {trial}: Invalid result format, expected tuple of 3 values but got {type(result)}"
f"Trial {trial}: Invalid result format, expected tuple but got {type(result)}"
)
continue

x, y, value = result

end_time = time.time()

# Ensure all values are float
Expand Down Expand Up @@ -264,15 +259,25 @@ def evaluate_stage1(program_path):
# Run a single trial with timeout
result = run_with_timeout(program.run_search, timeout_seconds=5)

# Check if we got a tuple of 3 values
if not isinstance(result, tuple) or len(result) != 3:
print(
f"Stage 1: Invalid result format, expected tuple of 3 values but got {type(result)}"
)
# Handle different result formats
if isinstance(result, tuple):
if len(result) == 3:
x, y, value = result
elif len(result) == 2:
# Assume it's (x, y) and calculate value
x, y = result
# Calculate the function value since it wasn't returned
value = np.sin(x) * np.cos(y) + np.sin(x * y) + (x**2 + y**2) / 20
print(f"Stage 1: Got 2 values, calculated function value: {value}")
else:
print(
f"Stage 1: Invalid result format, expected tuple of 2 or 3 values but got {len(result)}"
)
return {"runs_successfully": 0.0, "error": "Invalid result format"}
else:
print(f"Stage 1: Invalid result format, expected tuple but got {type(result)}")
return {"runs_successfully": 0.0, "error": "Invalid result format"}

x, y, value = result

# Ensure all values are float
x = safe_float(x)
y = safe_float(y)
Expand Down
1 change: 1 addition & 0 deletions examples/function_minimization/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
scipy
24 changes: 10 additions & 14 deletions openevolve/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@
parse_evolve_blocks,
parse_full_rewrite,
)
from openevolve.utils.format_utils import (
format_metrics_safe,
format_improvement_safe,
)

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -304,9 +308,7 @@ async def run(
logger.info(
f"🌟 New best solution found at iteration {i+1}: {child_program.id}"
)
logger.info(
f"Metrics: {', '.join(f'{name}={value:.4f}' for name, value in child_program.metrics.items())}"
)
logger.info(f"Metrics: {format_metrics_safe(child_program.metrics)}")

# Save checkpoint
if (i + 1) % self.config.checkpoint_interval == 0:
Expand Down Expand Up @@ -361,7 +363,7 @@ async def run(
if best_program:
logger.info(
f"Evolution complete. Best program has metrics: "
f"{', '.join(f'{name}={value:.4f}' for name, value in best_program.metrics.items())}"
f"{format_metrics_safe(best_program.metrics)}"
)

# Save the best program (using our tracked best program)
Expand Down Expand Up @@ -389,19 +391,13 @@ def _log_iteration(
child: Child program
elapsed_time: Elapsed time in seconds
"""
# Calculate improvement
improvement = {}
for metric, value in child.metrics.items():
if metric in parent.metrics:
diff = value - parent.metrics[metric]
improvement[metric] = diff

improvement_str = ", ".join(f"{name}={diff:+.4f}" for name, diff in improvement.items())
# Calculate improvement using safe formatting
improvement_str = format_improvement_safe(parent.metrics, child.metrics)

logger.info(
f"Iteration {iteration+1}: Child {child.id} from parent {parent.id} "
f"in {elapsed_time:.2f}s. Metrics: "
f"{', '.join(f'{name}={value:.4f}' for name, value in child.metrics.items())} "
f"{format_metrics_safe(child.metrics)} "
f"(Δ: {improvement_str})"
)

Expand Down Expand Up @@ -457,7 +453,7 @@ def _save_checkpoint(self, iteration: int) -> None:

logger.info(
f"Saved best program at checkpoint {iteration} with metrics: "
f"{', '.join(f'{name}={value:.4f}' for name, value in best_program.metrics.items())}"
f"{format_metrics_safe(best_program.metrics)}"
)

logger.info(f"Saved checkpoint at iteration {iteration} to {checkpoint_path}")
Expand Down
31 changes: 13 additions & 18 deletions openevolve/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

from openevolve.config import DatabaseConfig
from openevolve.utils.code_utils import calculate_edit_distance
from openevolve.utils.metrics_utils import safe_numeric_average

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -227,10 +228,10 @@ def get_best_program(self, metric: Optional[str] = None) -> Optional[Program]:
if sorted_programs:
logger.debug(f"Found best program by combined_score: {sorted_programs[0].id}")
else:
# Sort by average of all metrics as fallback
# Sort by average of all numeric metrics as fallback
sorted_programs = sorted(
self.programs.values(),
key=lambda p: sum(p.metrics.values()) / max(1, len(p.metrics)),
key=lambda p: safe_numeric_average(p.metrics),
reverse=True,
)
if sorted_programs:
Expand Down Expand Up @@ -281,10 +282,10 @@ def get_top_programs(self, n: int = 10, metric: Optional[str] = None) -> List[Pr
reverse=True,
)
else:
# Sort by average of all metrics
# Sort by average of all numeric metrics
sorted_programs = sorted(
self.programs.values(),
key=lambda p: sum(p.metrics.values()) / max(1, len(p.metrics)),
key=lambda p: safe_numeric_average(p.metrics),
reverse=True,
)

Expand Down Expand Up @@ -436,7 +437,7 @@ def _calculate_feature_coords(self, program: Program) -> List[int]:
if not program.metrics:
bin_idx = 0
else:
avg_score = sum(program.metrics.values()) / len(program.metrics)
avg_score = safe_numeric_average(program.metrics)
bin_idx = min(int(avg_score * self.feature_bins), self.feature_bins - 1)
coords.append(bin_idx)
elif dim in program.metrics:
Expand Down Expand Up @@ -487,9 +488,9 @@ def _is_better(self, program1: Program, program2: Program) -> bool:
if "combined_score" in program1.metrics and "combined_score" in program2.metrics:
return program1.metrics["combined_score"] > program2.metrics["combined_score"]

# Fallback to average of all metrics
avg1 = sum(program1.metrics.values()) / len(program1.metrics)
avg2 = sum(program2.metrics.values()) / len(program2.metrics)
# Fallback to average of all numeric metrics
avg1 = safe_numeric_average(program1.metrics)
avg2 = safe_numeric_average(program2.metrics)

return avg1 > avg2

Expand All @@ -507,9 +508,7 @@ def _update_archive(self, program: Program) -> None:

# Otherwise, find worst program in archive
archive_programs = [self.programs[pid] for pid in self.archive]
worst_program = min(
archive_programs, key=lambda p: sum(p.metrics.values()) / max(1, len(p.metrics))
)
worst_program = min(archive_programs, key=lambda p: safe_numeric_average(p.metrics))

# Replace if new program is better
if self._is_better(program, worst_program):
Expand Down Expand Up @@ -716,7 +715,7 @@ def _enforce_population_limit(self) -> None:
# Sort by average metric (worst first)
sorted_programs = sorted(
all_programs,
key=lambda p: sum(p.metrics.values()) / max(1, len(p.metrics)) if p.metrics else 0.0,
key=lambda p: safe_numeric_average(p.metrics),
)

# Remove worst programs, but never remove the best program
Expand Down Expand Up @@ -811,9 +810,7 @@ def migrate_programs(self) -> None:

# Sort by fitness (using combined_score or average metrics)
island_programs.sort(
key=lambda p: p.metrics.get(
"combined_score", sum(p.metrics.values()) / max(1, len(p.metrics))
),
key=lambda p: p.metrics.get("combined_score", safe_numeric_average(p.metrics)),
reverse=True,
)

Expand Down Expand Up @@ -858,9 +855,7 @@ def get_island_stats(self) -> List[dict]:

if island_programs:
scores = [
p.metrics.get(
"combined_score", sum(p.metrics.values()) / max(1, len(p.metrics))
)
p.metrics.get("combined_score", safe_numeric_average(p.metrics))
for p in island_programs
]

Expand Down
3 changes: 2 additions & 1 deletion openevolve/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from openevolve.config import EvaluatorConfig
from openevolve.llm.ensemble import LLMEnsemble
from openevolve.utils.async_utils import TaskPool, run_in_executor
from openevolve.utils.format_utils import format_metrics_safe

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -119,7 +120,7 @@ async def evaluate_program(
elapsed = time.time() - start_time
logger.info(
f"Evaluated program{program_id_str} in {elapsed:.2f}s: "
f"{', '.join(f'{name}={value:.4f}' for name, value in metrics.items())}"
f"{format_metrics_safe(metrics)}"
)

return metrics
Expand Down
Loading