From 549b39c299ef8b7e5b2114ccdc3ade761c8c0981 Mon Sep 17 00:00:00 2001 From: Julian Date: Mon, 9 Jun 2025 22:08:39 +0200 Subject: [PATCH 01/13] _llm_evaluate returns artifacts if LLM returned string responses --- openevolve/evaluator.py | 43 ++++++++++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 11 deletions(-) diff --git a/openevolve/evaluator.py b/openevolve/evaluator.py index e57b01224..aea2f4069 100644 --- a/openevolve/evaluator.py +++ b/openevolve/evaluator.py @@ -125,16 +125,32 @@ async def evaluate_program( eval_result = self._process_evaluation_result(result) # Add LLM feedback if configured + llm_eval_result = None if self.config.use_llm_feedback and self.llm_ensemble: - feedback_metrics = await self._llm_evaluate(program_code) + llm_result = await self._llm_evaluate(program_code) + llm_eval_result = self._process_evaluation_result(llm_result) # Combine metrics - for name, value in feedback_metrics.items(): + for name, value in llm_result.metrics.items(): eval_result.metrics[f"llm_{name}"] = value * self.config.llm_feedback_weight # Store artifacts if enabled and present - if artifacts_enabled and eval_result.has_artifacts() and program_id: - self._pending_artifacts[program_id] = eval_result.artifacts + if ( + artifacts_enabled + and ( + eval_result.has_artifacts() + or (llm_eval_result and llm_eval_result.has_artifacts()) + ) + and program_id + ): + self._pending_artifacts[program_id] = {} + + # Merge eval_result artifacts with llm artifacts if they exist + if eval_result.has_artifacts(): + self._pending_artifacts[program_id].update(eval_result.artifacts) + + if llm_eval_result and llm_eval_result.has_artifacts(): + self._pending_artifacts[program_id].update(llm_eval_result.artifacts) elapsed = time.time() - start_time logger.info( @@ -396,6 +412,7 @@ async def _llm_evaluate(self, program_code: str) -> Dict[str, float]: json_pattern = r"```json\n(.*?)\n```" import re + artifacts = {} avg_metrics = {} for i, response in enumerate(responses): json_match = re.search(json_pattern, response, re.DOTALL) @@ -414,12 +431,13 @@ async def _llm_evaluate(self, program_code: str) -> Dict[str, float]: # Parse JSON result = json.loads(json_str) - # Filter all non-numeric values - metrics = { - name: float(value) - for name, value in result.items() - if isinstance(value, (int, float)) - } + # All non-numeric values are artifacts, all numeric values are metrics + metrics = {} + for key, value in result.items(): + if not isinstance(value, (int, float)): + artifacts[key] = value + else: + metrics[key] = float(value) # Weight of the model in the ensemble weight = self.llm_ensemble.weights[i] if self.llm_ensemble.weights else 1.0 @@ -431,7 +449,10 @@ async def _llm_evaluate(self, program_code: str) -> Dict[str, float]: else: avg_metrics[name] = value * weight - return avg_metrics + return EvaluationResult( + metrics=avg_metrics, + artifacts=artifacts, + ) except Exception as e: logger.warning(f"Error parsing LLM response: {str(e)}") From ef07099a687926b5e5bd490f33dc4ceb638ef4a2 Mon Sep 17 00:00:00 2001 From: Julian Date: Sat, 14 Jun 2025 20:19:52 +0200 Subject: [PATCH 02/13] README formatting fix, added LLM feedback description --- README.md | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index beb8b58c5..ab7f46ff5 100644 --- a/README.md +++ b/README.md @@ -185,7 +185,7 @@ See the [Configuration Guide](configs/default_config.yaml) for a full list of op ## Artifacts Channel -OpenEvolve includes a **artifacts side-channel** that allows evaluators to capture build errors, profiling results, etc. to provide better feedback to the LLM in subsequent generations. This feature enhances the evolution process by giving the LLM context about what went wrong and how to fix it. +OpenEvolve includes an **artifacts side-channel** that allows evaluators to capture build errors, profiling results, etc. to provide better feedback to the LLM in subsequent generations. This feature enhances the evolution process by giving the LLM context about what went wrong and how to fix it. The artifacts channel operates alongside the traditional fitness metrics. @@ -205,17 +205,28 @@ return EvaluationResult( ``` The next generation prompt will include: -``` +```markdown ## Last Execution Output ### Stderr -``` SyntaxError: invalid syntax (line 15) -``` + ### Traceback -``` ... ``` + +## Example: LLM Feedback + +An example for an LLM artifact side channel is part of the default evaluation template, which ends with +```markdown +Return your evaluation as a JSON object with the following format: +{{ + "readability": [score], + "maintainability": [score], + "efficiency": [score], + "reasoning": "[brief explanation of scores]" +}} ``` +The non-float values, in this case the "reasoning" key of the json response that the evaluator LLM generates, will be available within the next generation prompt. ### Configuration @@ -240,7 +251,7 @@ export ENABLE_ARTIFACTS=false ### Benefits - **Faster convergence** - LLMs can see what went wrong and fix it directly -- **Better error handling** - Compilation and runtime failures become learning opportunities +- **Better error handling** - Compilation and runtime failures become learning opportunities - **Rich debugging context** - Full stack traces and error messages guide improvements - **Zero overhead** - When disabled, no performance impact on evaluation From 0490d6e7da4e59050d6f482723b669a181e42743 Mon Sep 17 00:00:00 2001 From: Julian Date: Sun, 8 Jun 2025 20:29:58 +0200 Subject: [PATCH 03/13] Prompt logging to database added --- configs/default_config.yaml | 1 + openevolve/config.py | 4 +++ openevolve/controller.py | 25 +++++++++++++++- openevolve/database.py | 58 +++++++++++++++++++++++++++++++++++-- openevolve/evaluator.py | 27 +++++++++++++++-- 5 files changed, 109 insertions(+), 6 deletions(-) diff --git a/configs/default_config.yaml b/configs/default_config.yaml index 22f086b81..26168f84c 100644 --- a/configs/default_config.yaml +++ b/configs/default_config.yaml @@ -71,6 +71,7 @@ database: # General settings db_path: null # Path to persist database (null = in-memory only) in_memory: true # Keep database in memory for faster access + log_prompts: true # If true, log all prompts and responses into the database # Evolutionary parameters population_size: 1000 # Maximum number of programs to keep in memory diff --git a/openevolve/config.py b/openevolve/config.py index f8f1d8d77..56aff0415 100644 --- a/openevolve/config.py +++ b/openevolve/config.py @@ -142,6 +142,9 @@ class DatabaseConfig: db_path: Optional[str] = None # Path to store database on disk in_memory: bool = True + # Prompt and response logging to programs/.json + log_prompts: bool = True + # Evolutionary parameters population_size: int = 1000 archive_size: int = 100 @@ -308,6 +311,7 @@ def to_dict(self) -> Dict[str, Any]: "migration_interval": self.database.migration_interval, "migration_rate": self.database.migration_rate, "random_seed": self.database.random_seed, + "log_prompts": self.database.log_prompts, }, "evaluator": { "timeout": self.evaluator.timeout, diff --git a/openevolve/controller.py b/openevolve/controller.py index 84e1683d0..135863f84 100644 --- a/openevolve/controller.py +++ b/openevolve/controller.py @@ -9,7 +9,8 @@ import time import uuid from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import List, Optional, Tuple, Union +import traceback from openevolve.config import Config, load_config from openevolve.database import Program, ProgramDatabase @@ -114,6 +115,7 @@ def __init__( evaluation_file, self.llm_evaluator_ensemble, self.evaluator_prompt_sampler, + database=self.database, ) logger.info(f"Initialized OpenEvolve with {initial_program_path} " f"and {evaluation_file}") @@ -307,10 +309,30 @@ async def run( # Add to database (will be added to current island) self.database.add(child_program, iteration=i + 1) + # Log prompts + self.database.log_prompt( + template_key=( + "full_rewrite_user" if self.config.allow_full_rewrites else "diff_user" + ), + program_id=child_id, + prompt=prompt, + responses=[llm_response], + ) + # Store artifacts if they exist if artifacts: self.database.store_artifacts(child_id, artifacts) + # Log prompts + self.database.log_prompt( + template_key=( + "full_rewrite_user" if self.config.allow_full_rewrites else "diff_user" + ), + program_id=child_id, + prompt=prompt, + responses=[llm_response], + ) + # Increment generation for current island self.database.increment_island_generation() @@ -347,6 +369,7 @@ async def run( except Exception as e: logger.error(f"Error in iteration {i+1}: {str(e)}") + traceback.print_exc() continue # Get the best program using our tracking mechanism diff --git a/openevolve/database.py b/openevolve/database.py index 48527c384..c791edf1a 100644 --- a/openevolve/database.py +++ b/openevolve/database.py @@ -104,6 +104,9 @@ def __init__(self, config: DatabaseConfig): if config.db_path and os.path.exists(config.db_path): self.load(config.db_path) + # Prompt log + self.prompts_by_program: Dict[str, Dict[str, Dict[str, str]]] = None + # Set random seed for reproducible sampling if specified if config.random_seed is not None: import random @@ -314,7 +317,14 @@ def save(self, path: Optional[str] = None, iteration: int = 0) -> None: # Save each program for program in self.programs.values(): - self._save_program(program, save_path) + prompts = None + if ( + self.config.log_prompts + and self.prompts_by_program + and program.id in self.prompts_by_program + ): + prompts = self.prompts_by_program[program.id] + self._save_program(program, save_path, prompts=prompts) # Save metadata metadata = { @@ -382,13 +392,19 @@ def load(self, path: str) -> None: logger.info(f"Loaded database with {len(self.programs)} programs from {path}") - def _save_program(self, program: Program, base_path: Optional[str] = None) -> None: + def _save_program( + self, + program: Program, + base_path: Optional[str] = None, + prompts: Optional[Dict[str, Dict[str, str]]] = None, + ) -> None: """ Save a program to disk Args: program: Program to save base_path: Base path to save to (uses config.db_path if None) + prompts: Optional prompts to save with the program, in the format {template_key: { 'system': str, 'user': str }} """ save_path = base_path or self.config.db_path if not save_path: @@ -399,9 +415,13 @@ def _save_program(self, program: Program, base_path: Optional[str] = None) -> No os.makedirs(programs_dir, exist_ok=True) # Save program + program_dict = program.to_dict() + if prompts: + program_dict["prompts"] = prompts program_path = os.path.join(programs_dir, f"{program.id}.json") + with open(program_path, "w") as f: - json.dump(program.to_dict(), f) + json.dump(program_dict, f) def _calculate_feature_coords(self, program: Program) -> List[int]: """ @@ -1079,3 +1099,35 @@ def _load_artifact_dir(self, artifact_dir: str) -> Dict[str, Union[str, bytes]]: logger.warning(f"Failed to list artifact directory {artifact_dir}: {e}") return artifacts + + def log_prompt( + self, + program_id: str, + template_key: str, + prompt: Dict[str, str], + responses: Optional[List[str]] = None, + ) -> None: + """ + Log a prompt for a program. + Only logs if self.config.log_prompts is True. + + Args: + program_id: ID of the program to log the prompt for + template_key: Key for the prompt template + prompt: Prompts in the format {template_key: { 'system': str, 'user': str }}. + responses: Optional list of responses to the prompt, if available. + """ + + if not self.config.log_prompts: + return + + if responses is None: + responses = [] + prompt["responses"] = responses + + if self.prompts_by_program is None: + self.prompts_by_program = {} + + if program_id not in self.prompts_by_program: + self.prompts_by_program[program_id] = {} + self.prompts_by_program[program_id][template_key] = prompt diff --git a/openevolve/evaluator.py b/openevolve/evaluator.py index aea2f4069..db65b3111 100644 --- a/openevolve/evaluator.py +++ b/openevolve/evaluator.py @@ -18,7 +18,9 @@ import traceback from openevolve.config import EvaluatorConfig +from openevolve.database import ProgramDatabase from openevolve.evaluation_result import EvaluationResult +from openevolve.database import ProgramDatabase from openevolve.llm.ensemble import LLMEnsemble from openevolve.utils.async_utils import TaskPool, run_in_executor from openevolve.prompt.sampler import PromptSampler @@ -41,11 +43,13 @@ def __init__( evaluation_file: str, llm_ensemble: Optional[LLMEnsemble] = None, prompt_sampler: Optional[PromptSampler] = None, + database: Optional[ProgramDatabase] = None, ): self.config = config self.evaluation_file = evaluation_file self.llm_ensemble = llm_ensemble self.prompt_sampler = prompt_sampler + self.database = database # Create a task pool for parallel evaluation self.task_pool = TaskPool(max_concurrency=config.parallel_evaluations) @@ -127,7 +131,7 @@ async def evaluate_program( # Add LLM feedback if configured llm_eval_result = None if self.config.use_llm_feedback and self.llm_ensemble: - llm_result = await self._llm_evaluate(program_code) + llm_result = await self._llm_evaluate(program_code, program_id=program_id) llm_eval_result = self._process_evaluation_result(llm_result) # Combine metrics @@ -148,9 +152,17 @@ async def evaluate_program( # Merge eval_result artifacts with llm artifacts if they exist if eval_result.has_artifacts(): self._pending_artifacts[program_id].update(eval_result.artifacts) + logger.debug( + f"Program{program_id_str} returned artifacts: " + f"{eval_result.artifacts}" + ) if llm_eval_result and llm_eval_result.has_artifacts(): self._pending_artifacts[program_id].update(llm_eval_result.artifacts) + logger.debug( + f"Program{program_id_str} returned LLM artifacts: " + f"{llm_eval_result.artifacts}" + ) elapsed = time.time() - start_time logger.info( @@ -166,6 +178,7 @@ async def evaluate_program( logger.warning( f"Evaluation attempt {attempt + 1}/{self.config.max_retries + 1} failed for program{program_id_str}: {str(e)}" ) + traceback.print_exc() # Capture failure artifacts if enabled if artifacts_enabled and program_id: @@ -382,12 +395,13 @@ async def _cascade_evaluate( }, ) - async def _llm_evaluate(self, program_code: str) -> Dict[str, float]: + async def _llm_evaluate(self, program_code: str, program_id: str = "") -> Dict[str, float]: """ Use LLM to evaluate code quality Args: program_code: Code to evaluate + program_id: Optional ID for logging Returns: Dictionary of metric name to score @@ -406,6 +420,15 @@ async def _llm_evaluate(self, program_code: str) -> Dict[str, float]: prompt["system"], [{"role": "user", "content": prompt["user"]}] ) + # Log prompt and response to database + if self.database and program_id: + self.database.log_prompt( + program_id=program_id, + template_key="evaluation", + prompt=prompt, + responses=responses, + ) + # Extract JSON from response try: # Try to find JSON block From f25c3cef6e965b0c3871cc2b7a1fc163795c62f1 Mon Sep 17 00:00:00 2001 From: Julian Date: Mon, 9 Jun 2025 18:03:03 +0200 Subject: [PATCH 04/13] visualizer UI: Fixed handling of prompt dicts, added select box for picking the prompt of interest --- scripts/static/js/main.js | 9 ++++-- scripts/static/js/sidebar.js | 57 ++++++++++++++++++++++++++++++++++-- 2 files changed, 61 insertions(+), 5 deletions(-) diff --git a/scripts/static/js/main.js b/scripts/static/js/main.js index b06961e2b..7ffd50559 100644 --- a/scripts/static/js/main.js +++ b/scripts/static/js/main.js @@ -83,8 +83,8 @@ function loadAndRenderData(data) { renderNodeList(data.nodes); document.getElementById('checkpoint-label').textContent = "Checkpoint: " + (data.checkpoint_dir || 'static export'); - // Populate metric-select options const metricSelect = document.getElementById('metric-select'); + const prevMetric = metricSelect.value || localStorage.getItem('selectedMetric') || null; metricSelect.innerHTML = ''; const metrics = new Set(); data.nodes.forEach(node => { @@ -98,9 +98,14 @@ function loadAndRenderData(data) { option.textContent = metric; metricSelect.appendChild(option); }); - if (metricSelect.options.length > 0) { + if (prevMetric && metrics.has(prevMetric)) { + metricSelect.value = prevMetric; + } else if (metricSelect.options.length > 0) { metricSelect.selectedIndex = 0; } + metricSelect.addEventListener('change', function() { + localStorage.setItem('selectedMetric', metricSelect.value); + }); } if (window.STATIC_DATA) { diff --git a/scripts/static/js/sidebar.js b/scripts/static/js/sidebar.js index da9f6ca2b..a9cc536c4 100644 --- a/scripts/static/js/sidebar.js +++ b/scripts/static/js/sidebar.js @@ -43,11 +43,36 @@ export function showSidebarContent(d, fromHover = false) { return ``; } if (tabName === 'Prompts') { - let html = ''; + // --- Prompt select logic --- + let promptOptions = []; + let promptMap = {}; for (const [k, v] of Object.entries(d.prompts)) { - html += `
${k}:
`; + if (v && typeof v === 'object' && !Array.isArray(v)) { + for (const [subKey, subVal] of Object.entries(v)) { + const optLabel = `${k} - ${subKey}`; + promptOptions.push(optLabel); + promptMap[optLabel] = subVal; + } + } else { + const optLabel = `${k}`; + promptOptions.push(optLabel); + promptMap[optLabel] = v; + } + } + // Get last selected prompt from localStorage, or default to first + let lastPromptKey = localStorage.getItem('sidebarPromptSelect') || promptOptions[0] || ''; + if (!promptMap[lastPromptKey]) lastPromptKey = promptOptions[0] || ''; + // Build select box + let selectHtml = ''; + if (promptOptions.length > 1) { + selectHtml = ``; } - return html; + // Show only the selected prompt + let promptVal = promptMap[lastPromptKey]; + let promptHtml = ``; + return selectHtml + promptHtml; } if (tabName === 'Children') { const metric = (document.getElementById('metric-select') && document.getElementById('metric-select').value) || 'combined_score'; @@ -103,6 +128,20 @@ export function showSidebarContent(d, fromHover = false) { lastSidebarTab = tabName; const tabContent = document.getElementById('sidebar-tab-content'); tabContent.innerHTML = renderSidebarTabContent(tabName, d, children); + // Add prompt select event if Prompts tab + if (tabName === 'Prompts') { + const promptSelect = document.getElementById('sidebar-prompt-select'); + if (promptSelect) { + promptSelect.onchange = function() { + localStorage.setItem('sidebarPromptSelect', promptSelect.value); + // Re-render Prompts tab with new selection + tabContent.innerHTML = renderSidebarTabContent('Prompts', d, children); + // Re-attach event + const newPromptSelect = document.getElementById('sidebar-prompt-select'); + if (newPromptSelect) newPromptSelect.onchange = promptSelect.onchange; + }; + } + } setTimeout(() => { document.querySelectorAll('.child-link').forEach(link => { link.onclick = function(e) { @@ -128,6 +167,18 @@ export function showSidebarContent(d, fromHover = false) { }); } setTimeout(() => { + const promptSelect = document.getElementById('sidebar-prompt-select'); + if (promptSelect) { + promptSelect.onchange = function() { + localStorage.setItem('sidebarPromptSelect', promptSelect.value); + // Re-render Prompts tab with new selection + const tabContent = document.getElementById('sidebar-tab-content'); + tabContent.innerHTML = renderSidebarTabContent('Prompts', d, children); + // Re-attach event + const newPromptSelect = document.getElementById('sidebar-prompt-select'); + if (newPromptSelect) newPromptSelect.onchange = promptSelect.onchange; + }; + } document.querySelectorAll('.child-link').forEach(link => { link.onclick = function(e) { e.preventDefault(); From 07e3e9f9c89fad97e6cb5c603b7f70917728ba16 Mon Sep 17 00:00:00 2001 From: Julian Date: Fri, 13 Jun 2025 16:46:18 +0200 Subject: [PATCH 05/13] Show artifacts_json in sidebar and in /program/ pages --- scripts/static/js/sidebar.js | 73 +++++++++++++++-------------- scripts/templates/program_page.html | 4 ++ scripts/visualizer.py | 6 ++- 3 files changed, 48 insertions(+), 35 deletions(-) diff --git a/scripts/static/js/sidebar.js b/scripts/static/js/sidebar.js index a9cc536c4..f4ee8fdcb 100644 --- a/scripts/static/js/sidebar.js +++ b/scripts/static/js/sidebar.js @@ -32,7 +32,7 @@ export function showSidebarContent(d, fromHover = false) { let tabContentHtml = ''; let tabNames = []; if (d.code && typeof d.code === 'string' && d.code.trim() !== '') tabNames.push('Code'); - if (d.prompts && typeof d.prompts === 'object' && Object.keys(d.prompts).length > 0) tabNames.push('Prompts'); + if ((d.prompts && typeof d.prompts === 'object' && Object.keys(d.prompts).length > 0) || (d.artifacts_json && typeof d.artifacts_json === 'object' && Object.keys(d.artifacts_json).length > 0)) tabNames.push('Prompts'); const children = allNodeData.filter(n => n.parent_id === d.id); if (children.length > 0) tabNames.push('Children'); let activeTab = lastSidebarTab && tabNames.includes(lastSidebarTab) ? lastSidebarTab : tabNames[0]; @@ -46,19 +46,28 @@ export function showSidebarContent(d, fromHover = false) { // --- Prompt select logic --- let promptOptions = []; let promptMap = {}; - for (const [k, v] of Object.entries(d.prompts)) { - if (v && typeof v === 'object' && !Array.isArray(v)) { - for (const [subKey, subVal] of Object.entries(v)) { - const optLabel = `${k} - ${subKey}`; + // Prompts + if (d.prompts && typeof d.prompts === 'object') { + for (const [k, v] of Object.entries(d.prompts)) { + if (v && typeof v === 'object' && !Array.isArray(v)) { + for (const [subKey, subVal] of Object.entries(v)) { + const optLabel = `${k} - ${subKey}`; + promptOptions.push(optLabel); + promptMap[optLabel] = subVal; + } + } else { + const optLabel = `${k}`; promptOptions.push(optLabel); - promptMap[optLabel] = subVal; + promptMap[optLabel] = v; } - } else { - const optLabel = `${k}`; - promptOptions.push(optLabel); - promptMap[optLabel] = v; } } + // Artifacts + if (d.artifacts_json) { + const optLabel = `artifacts`; + promptOptions.push(optLabel); + promptMap[optLabel] = d.artifacts_json; + } // Get last selected prompt from localStorage, or default to first let lastPromptKey = localStorage.getItem('sidebarPromptSelect') || promptOptions[0] || ''; if (!promptMap[lastPromptKey]) lastPromptKey = promptOptions[0] || ''; @@ -118,6 +127,24 @@ export function showSidebarContent(d, fromHover = false) { Metrics:
${formatMetrics(d.metrics)}

${tabHtml}${tabContentHtml} `; + + // Helper to attach prompt select handler + function attachPromptSelectHandler() { + const promptSelect = document.getElementById('sidebar-prompt-select'); + if (promptSelect) { + promptSelect.onchange = function() { + localStorage.setItem('sidebarPromptSelect', promptSelect.value); + // Only re-render the Prompts tab, not the whole sidebar + const tabContent = document.getElementById('sidebar-tab-content'); + if (tabContent) { + tabContent.innerHTML = renderSidebarTabContent('Prompts', d, children); + attachPromptSelectHandler(); + } + }; + } + } + attachPromptSelectHandler(); + if (tabNames.length > 1) { const tabBar = document.getElementById('sidebar-tab-bar'); Array.from(tabBar.children).forEach(tabEl => { @@ -128,19 +155,8 @@ export function showSidebarContent(d, fromHover = false) { lastSidebarTab = tabName; const tabContent = document.getElementById('sidebar-tab-content'); tabContent.innerHTML = renderSidebarTabContent(tabName, d, children); - // Add prompt select event if Prompts tab if (tabName === 'Prompts') { - const promptSelect = document.getElementById('sidebar-prompt-select'); - if (promptSelect) { - promptSelect.onchange = function() { - localStorage.setItem('sidebarPromptSelect', promptSelect.value); - // Re-render Prompts tab with new selection - tabContent.innerHTML = renderSidebarTabContent('Prompts', d, children); - // Re-attach event - const newPromptSelect = document.getElementById('sidebar-prompt-select'); - if (newPromptSelect) newPromptSelect.onchange = promptSelect.onchange; - }; - } + attachPromptSelectHandler(); } setTimeout(() => { document.querySelectorAll('.child-link').forEach(link => { @@ -167,18 +183,7 @@ export function showSidebarContent(d, fromHover = false) { }); } setTimeout(() => { - const promptSelect = document.getElementById('sidebar-prompt-select'); - if (promptSelect) { - promptSelect.onchange = function() { - localStorage.setItem('sidebarPromptSelect', promptSelect.value); - // Re-render Prompts tab with new selection - const tabContent = document.getElementById('sidebar-tab-content'); - tabContent.innerHTML = renderSidebarTabContent('Prompts', d, children); - // Re-attach event - const newPromptSelect = document.getElementById('sidebar-prompt-select'); - if (newPromptSelect) newPromptSelect.onchange = promptSelect.onchange; - }; - } + attachPromptSelectHandler(); document.querySelectorAll('.child-link').forEach(link => { link.onclick = function(e) { e.preventDefault(); diff --git a/scripts/templates/program_page.html b/scripts/templates/program_page.html index 2c5d70ffa..815883894 100644 --- a/scripts/templates/program_page.html +++ b/scripts/templates/program_page.html @@ -31,5 +31,9 @@

Prompts:

  • {{ key }}:
    {{ value }}
  • {% endfor %} + {% if artifacts_json %} +

    Artifacts:

    +
    {{ artifacts_json }}
    + {% endif %} \ No newline at end of file diff --git a/scripts/visualizer.py b/scripts/visualizer.py index 081f8efc9..7314e0c41 100644 --- a/scripts/visualizer.py +++ b/scripts/visualizer.py @@ -96,9 +96,13 @@ def program_page(program_id): data = load_evolution_data(checkpoint_dir) program_data = next((p for p in data["nodes"] if p["id"] == program_id), None) program_data = {"code": "", "prompts": {}, **program_data} + artifacts_json = program_data.get("artifacts_json", None) return render_template( - "program_page.html", program_data=program_data, checkpoint_dir=checkpoint_dir + "program_page.html", + program_data=program_data, + checkpoint_dir=checkpoint_dir, + artifacts_json=artifacts_json, ) From c219d4bb785bd0d6a6c6326478a6ea2da579c280 Mon Sep 17 00:00:00 2001 From: Julian Date: Fri, 13 Jun 2025 15:15:04 +0200 Subject: [PATCH 06/13] Page scroll lock on graph pages --- scripts/static/js/mainUI.js | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/static/js/mainUI.js b/scripts/static/js/mainUI.js index 8f652f892..79f05944c 100644 --- a/scripts/static/js/mainUI.js +++ b/scripts/static/js/mainUI.js @@ -46,6 +46,12 @@ tabs.forEach(tab => { showSidebarContent(window._lastSelectedNodeData || null); } } + // Disable page scroll for graph tabs + if (tab === 'branching' || tab === 'performance') { + document.body.style.overflow = 'hidden'; + } else { + document.body.style.overflow = ''; + } }); }); From db847a896a9264eae34844dc985dedc84ba18b03 Mon Sep 17 00:00:00 2001 From: Julian Date: Sat, 14 Jun 2025 05:06:25 +0200 Subject: [PATCH 07/13] Makefile help updated --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index 80d73d0cc..cb52e8e19 100644 --- a/Makefile +++ b/Makefile @@ -16,6 +16,7 @@ help: @echo " test - Run tests" @echo " docker-build - Build the Docker image" @echo " docker-run - Run the Docker container with the example" + @echo " visualizer - Run the visualization script" .PHONY: all all: install test From 4dba30dbfd7e9a9ffec7b80b248649d7e52a1afc Mon Sep 17 00:00:00 2001 From: Julian Date: Mon, 16 Jun 2025 03:32:06 +0200 Subject: [PATCH 08/13] UI bug fix: Performance graph did not automatically refresh during live data update --- scripts/static/js/main.js | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/scripts/static/js/main.js b/scripts/static/js/main.js index 7ffd50559..46f09abbb 100644 --- a/scripts/static/js/main.js +++ b/scripts/static/js/main.js @@ -106,6 +106,13 @@ function loadAndRenderData(data) { metricSelect.addEventListener('change', function() { localStorage.setItem('selectedMetric', metricSelect.value); }); + const perfTab = document.getElementById('tab-performance'); + const perfView = document.getElementById('view-performance'); + if (perfTab && perfView && (perfTab.classList.contains('active') || perfView.style.display !== 'none')) { + if (window.updatePerformanceGraph) { + window.updatePerformanceGraph(data.nodes); + } + } } if (window.STATIC_DATA) { From 244708c18f899769a47d86943883c0d40fee109f Mon Sep 17 00:00:00 2001 From: Julian Date: Mon, 16 Jun 2025 03:33:22 +0200 Subject: [PATCH 09/13] UI bug fix: Graceful handling of migrated programs -- different artificial program IDs are assigned, new Clones tab shows relations --- scripts/static/js/sidebar.js | 54 ++++++++++++++++++++++++++++++++++++ scripts/visualizer.py | 18 ++++++++++++ 2 files changed, 72 insertions(+) diff --git a/scripts/static/js/sidebar.js b/scripts/static/js/sidebar.js index f4ee8fdcb..b362f769c 100644 --- a/scripts/static/js/sidebar.js +++ b/scripts/static/js/sidebar.js @@ -35,6 +35,15 @@ export function showSidebarContent(d, fromHover = false) { if ((d.prompts && typeof d.prompts === 'object' && Object.keys(d.prompts).length > 0) || (d.artifacts_json && typeof d.artifacts_json === 'object' && Object.keys(d.artifacts_json).length > 0)) tabNames.push('Prompts'); const children = allNodeData.filter(n => n.parent_id === d.id); if (children.length > 0) tabNames.push('Children'); + + // --- CLONES TAB LOGIC --- + function getBaseId(id) { + return id.includes('-copy') ? id.split('-copy')[0] : id; + } + const baseId = getBaseId(d.id); + const clones = allNodeData.filter(n => getBaseId(n.id) === baseId && n.id !== d.id); + if (clones.length > 0) tabNames.push('Clones'); + let activeTab = lastSidebarTab && tabNames.includes(lastSidebarTab) ? lastSidebarTab : tabNames[0]; // Helper to render tab content @@ -99,6 +108,13 @@ export function showSidebarContent(d, fromHover = false) { }).join('') + ``; } + if (tabName === 'Clones') { + return `
      ` + + clones.map(clone => + `
    • ${clone.id}
    • ` + ).join('') + + `
    `; + } return ''; } @@ -178,6 +194,25 @@ export function showSidebarContent(d, fromHover = false) { } }; }); + document.querySelectorAll('.clone-link').forEach(link => { + link.onclick = function(e) { + e.preventDefault(); + const cloneNode = allNodeData.find(n => n.id == link.dataset.clone); + if (cloneNode) { + window._lastSelectedNodeData = cloneNode; + const perfTabBtn = document.getElementById('tab-performance'); + const perfTabView = document.getElementById('view-performance'); + if ((perfTabBtn && perfTabBtn.classList.contains('active')) || (perfTabView && perfTabView.classList.contains('active'))) { + import('./performance.js').then(mod => { + mod.selectPerformanceNodeById(cloneNode.id); + showSidebar(); + }); + } else { + scrollAndSelectNodeById(cloneNode.id); + } + } + }; + }); }, 0); }; }); @@ -204,6 +239,25 @@ export function showSidebarContent(d, fromHover = false) { } }; }); + document.querySelectorAll('.clone-link').forEach(link => { + link.onclick = function(e) { + e.preventDefault(); + const cloneNode = allNodeData.find(n => n.id == link.dataset.clone); + if (cloneNode) { + window._lastSelectedNodeData = cloneNode; + const perfTabBtn = document.getElementById('tab-performance'); + const perfTabView = document.getElementById('view-performance'); + if ((perfTabBtn && perfTabBtn.classList.contains('active')) || (perfTabView && perfTabView.classList.contains('active'))) { + import('./performance.js').then(mod => { + mod.selectPerformanceNodeById(cloneNode.id); + showSidebar(); + }); + } else { + scrollAndSelectNodeById(cloneNode.id); + } + } + }; + }); }, 0); const closeBtnEl = document.getElementById('sidebar-close-btn'); if (closeBtnEl) closeBtnEl.onclick = function() { diff --git a/scripts/visualizer.py b/scripts/visualizer.py index 7314e0c41..f8b90215b 100644 --- a/scripts/visualizer.py +++ b/scripts/visualizer.py @@ -37,12 +37,30 @@ def load_evolution_data(checkpoint_folder): nodes = [] id_to_program = {} + pids = set() for island_idx, id_list in enumerate(meta.get("islands", [])): for pid in id_list: prog_path = os.path.join(programs_dir, f"{pid}.json") + + # Keep track of PIDs and if one is double, append "-copyN" to the PID + if pid in pids: + base_pid = pid + + # If base_pid already has a "-copyN" suffix, strip it + if "-copy" in base_pid: + base_pid = base_pid.rsplit("-copy", 1)[0] + + # Find the next available copy number + copy_num = 1 + while f"{base_pid}-copy{copy_num}" in pids: + copy_num += 1 + pid = f"{base_pid}-copy{copy_num}" + pids.add(pid) + if os.path.exists(prog_path): with open(prog_path) as pf: prog = json.load(pf) + prog["id"] = pid prog["island"] = island_idx nodes.append(prog) id_to_program[pid] = prog From 6957cd89bd08867d810e6de7b0a038ea1259b527 Mon Sep 17 00:00:00 2001 From: Julian Date: Mon, 16 Jun 2025 03:41:51 +0200 Subject: [PATCH 10/13] UI: Show number of the list item in big font --- scripts/static/js/list.js | 13 ++++++++++++- scripts/static/js/sidebar.js | 5 ++--- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/scripts/static/js/list.js b/scripts/static/js/list.js index b491e5113..c3c88bbef 100644 --- a/scripts/static/js/list.js +++ b/scripts/static/js/list.js @@ -61,11 +61,22 @@ export function renderNodeList(nodes) { `; container.innerHTML = ''; - filtered.forEach(node => { + filtered.forEach((node, idx) => { const row = document.createElement('div'); row.className = 'node-list-item' + (selectedProgramId === node.id ? ' selected' : '') + (highlightIds.has(node.id) ? ' highlighted' : ''); row.setAttribute('data-node-id', node.id); row.tabIndex = 0; + + const numDiv = document.createElement('div'); + numDiv.textContent = `#${idx + 1}`; + numDiv.style.fontSize = '2.2em'; + numDiv.style.fontWeight = 'bold'; + numDiv.style.color = '#444'; + numDiv.style.flex = '0 0 70px'; + numDiv.style.display = 'flex'; + numDiv.style.alignItems = 'center'; + numDiv.style.justifyContent = 'center'; + row.appendChild(numDiv); let selectedMetricRow = ''; if (node.metrics && metric in node.metrics) { let val = (typeof node.metrics[metric] === 'number' && isFinite(node.metrics[metric])) ? node.metrics[metric].toFixed(4) : node.metrics[metric]; diff --git a/scripts/static/js/sidebar.js b/scripts/static/js/sidebar.js index b362f769c..366700d71 100644 --- a/scripts/static/js/sidebar.js +++ b/scripts/static/js/sidebar.js @@ -36,7 +36,7 @@ export function showSidebarContent(d, fromHover = false) { const children = allNodeData.filter(n => n.parent_id === d.id); if (children.length > 0) tabNames.push('Children'); - // --- CLONES TAB LOGIC --- + // Handle nodes with "-copyN" IDs function getBaseId(id) { return id.includes('-copy') ? id.split('-copy')[0] : id; } @@ -52,10 +52,9 @@ export function showSidebarContent(d, fromHover = false) { return ``; } if (tabName === 'Prompts') { - // --- Prompt select logic --- + // Prompt select logic let promptOptions = []; let promptMap = {}; - // Prompts if (d.prompts && typeof d.prompts === 'object') { for (const [k, v] of Object.entries(d.prompts)) { if (v && typeof v === 'object' && !Array.isArray(v)) { From 675ae97088a363139b685c613095a2a6bc3122b4 Mon Sep 17 00:00:00 2001 From: Julian Date: Mon, 16 Jun 2025 03:55:35 +0200 Subject: [PATCH 11/13] UI: When user panned away and the graph is seemingly empty, show a Recenter button --- scripts/static/js/graph.js | 63 +++++++++++++++++++++++++ scripts/static/js/performance.js | 81 +++++++++++++++++++++++++++++--- 2 files changed, 137 insertions(+), 7 deletions(-) diff --git a/scripts/static/js/graph.js b/scripts/static/js/graph.js index 116ab09ba..de82e1103 100644 --- a/scripts/static/js/graph.js +++ b/scripts/static/js/graph.js @@ -121,6 +121,40 @@ Object.defineProperty(window, 'g', { set: function(val) { g = val; } }); +// Recenter Button Overlay +function showRecenterButton(onClick) { + let btn = document.getElementById('graph-recenter-btn'); + if (!btn) { + btn = document.createElement('button'); + btn.id = 'graph-recenter-btn'; + btn.textContent = 'Recenter'; + btn.style.position = 'absolute'; + btn.style.left = '50%'; + btn.style.top = '50%'; + btn.style.transform = 'translate(-50%, -50%)'; + btn.style.zIndex = 1000; + btn.style.fontSize = '2em'; + btn.style.padding = '0.5em 1.5em'; + btn.style.background = '#fff'; + btn.style.border = '2px solid #2196f3'; + btn.style.borderRadius = '12px'; + btn.style.boxShadow = '0 2px 16px #0002'; + btn.style.cursor = 'pointer'; + btn.style.display = 'block'; + document.getElementById('graph').appendChild(btn); + } + btn.style.display = 'block'; + btn.onclick = function() { + btn.style.display = 'none'; + if (typeof onClick === 'function') onClick(); + }; +} + +function hideRecenterButton() { + const btn = document.getElementById('graph-recenter-btn'); + if (btn) btn.style.display = 'none'; +} + function ensureGraphSvg() { // Get latest width/height from state.js let svgEl = d3.select('#graph').select('svg'); @@ -264,6 +298,35 @@ function renderGraph(data, options = {}) { .scaleExtent([0.2, 10]) .on('zoom', function(event) { g.attr('transform', event.transform); + // Check if all content is out of view + setTimeout(() => { + try { + const svgRect = svg.node().getBoundingClientRect(); + const allCircles = g.selectAll('circle').nodes(); + if (allCircles.length === 0) { hideRecenterButton(); return; } + let anyVisible = false; + for (const c of allCircles) { + const bbox = c.getBoundingClientRect(); + if ( + bbox.right > svgRect.left && + bbox.left < svgRect.right && + bbox.bottom > svgRect.top && + bbox.top < svgRect.bottom + ) { + anyVisible = true; + break; + } + } + if (!anyVisible) { + showRecenterButton(() => { + // Reset zoom/pan + svg.transition().duration(400).call(zoomBehavior.transform, d3.zoomIdentity); + }); + } else { + hideRecenterButton(); + } + } catch {} + }, 0); }); svg.call(zoomBehavior); if (prevTransform) { diff --git a/scripts/static/js/performance.js b/scripts/static/js/performance.js index a2f302946..4900e95ad 100644 --- a/scripts/static/js/performance.js +++ b/scripts/static/js/performance.js @@ -11,7 +11,7 @@ import { selectListNodeById } from './list.js'; if (!toggleDiv) { toggleDiv = document.createElement('div'); toggleDiv.id = 'perf-island-toggle'; - toggleDiv.style = 'display:flex;align-items:center;gap:0.7em;'; + toggleDiv.style = 'display:flex;align-items:center;gap:0.7em;margin-left:3em;'; toggleDiv.innerHTML = `