From 7f1f294fdfe4d3c948de8f198c67e11d93bd899a Mon Sep 17 00:00:00 2001 From: Asankhaya Sharma Date: Thu, 21 Aug 2025 09:42:44 +0800 Subject: [PATCH] fix --- openevolve/_version.py | 2 +- openevolve/llm/openai.py | 28 +++++++- tests/test_model_parameter_demo.py | 70 +++++++++++++++++++ tests/test_openai_model_detection.py | 101 +++++++++++++++++++++++++++ 4 files changed, 198 insertions(+), 3 deletions(-) create mode 100644 tests/test_model_parameter_demo.py create mode 100644 tests/test_openai_model_detection.py diff --git a/openevolve/_version.py b/openevolve/_version.py index 0b8c3430a..347654f34 100644 --- a/openevolve/_version.py +++ b/openevolve/_version.py @@ -1,3 +1,3 @@ """Version information for openevolve package.""" -__version__ = "0.2.2" +__version__ = "0.2.3" diff --git a/openevolve/llm/openai.py b/openevolve/llm/openai.py index d396dd1c4..bf5dde160 100644 --- a/openevolve/llm/openai.py +++ b/openevolve/llm/openai.py @@ -66,14 +66,38 @@ async def generate_with_context( formatted_messages.extend(messages) # Set up generation parameters - if self.api_base == "https://api.openai.com/v1" and str(self.model).lower().startswith("o"): - # For o-series models + # Define OpenAI reasoning models that require max_completion_tokens + # These models don't support temperature/top_p and use different parameters + OPENAI_REASONING_MODEL_PREFIXES = ( + # O-series reasoning models + "o1-", "o1", # o1, o1-mini, o1-preview + "o3-", "o3", # o3, o3-mini, o3-pro + "o4-", # o4-mini + # GPT-5 series are also reasoning models + "gpt-5-", "gpt-5" # gpt-5, gpt-5-mini, gpt-5-nano + ) + + # Check if this is an OpenAI reasoning model + model_lower = str(self.model).lower() + is_openai_reasoning_model = ( + self.api_base == "https://api.openai.com/v1" and + model_lower.startswith(OPENAI_REASONING_MODEL_PREFIXES) + ) + + if is_openai_reasoning_model: + # For OpenAI reasoning models params = { "model": self.model, "messages": formatted_messages, "max_completion_tokens": kwargs.get("max_tokens", self.max_tokens), } + # Add optional reasoning parameters if provided + if "reasoning_effort" in kwargs: + params["reasoning_effort"] = kwargs["reasoning_effort"] + if "verbosity" in kwargs: + params["verbosity"] = kwargs["verbosity"] else: + # Standard parameters for all other models params = { "model": self.model, "messages": formatted_messages, diff --git a/tests/test_model_parameter_demo.py b/tests/test_model_parameter_demo.py new file mode 100644 index 000000000..142fe43e1 --- /dev/null +++ b/tests/test_model_parameter_demo.py @@ -0,0 +1,70 @@ +""" +Demonstration of fixed OpenAI model parameter handling +""" + +def demo_model_parameter_selection(): + """Demonstrate how different models get different parameters""" + + # Mock the logic from openai.py + OPENAI_REASONING_MODEL_PREFIXES = ( + # O-series reasoning models + "o1-", "o1", # o1, o1-mini, o1-preview + "o3-", "o3", # o3, o3-mini, o3-pro + "o4-", # o4-mini + # GPT-5 series are also reasoning models + "gpt-5-", "gpt-5" # gpt-5, gpt-5-mini, gpt-5-nano + ) + + def get_params_for_model(model_name, api_base="https://api.openai.com/v1"): + """Show what parameters would be used for each model""" + model_lower = str(model_name).lower() + is_openai_reasoning_model = ( + api_base == "https://api.openai.com/v1" and + model_lower.startswith(OPENAI_REASONING_MODEL_PREFIXES) + ) + + if is_openai_reasoning_model: + return { + "type": "reasoning_model", + "uses": "max_completion_tokens", + "supports": ["reasoning_effort", "verbosity"], + "excludes": ["temperature", "top_p"] + } + else: + return { + "type": "standard_model", + "uses": "max_tokens", + "supports": ["temperature", "top_p"], + "excludes": [] + } + + print("šŸ”§ OpenAI Model Parameter Selection Demo") + print("=" * 50) + + test_models = [ + # Reasoning models + ("o1-mini", "āœ… Reasoning"), + ("o1-preview", "āœ… Reasoning"), + ("o3-mini-2025-01-31", "āœ… Reasoning (with date)"), + ("gpt-5-nano", "āœ… Reasoning (GPT-5 series)"), + + # Standard models + ("gpt-4o-mini", "āŒ Standard (not reasoning)"), + ("gpt-4o", "āŒ Standard"), + ("gpt-4-turbo", "āŒ Standard"), + ] + + for model, description in test_models: + params = get_params_for_model(model) + print(f"\nšŸ“‹ Model: {model}") + print(f" Type: {description}") + print(f" Uses: {params['uses']}") + print(f" Supports: {', '.join(params['supports'])}") + if params['excludes']: + print(f" Excludes: {', '.join(params['excludes'])}") + + print("\n" + "=" * 50) + print("āœ… Fix successful! No more false positives/negatives.") + +if __name__ == "__main__": + demo_model_parameter_selection() \ No newline at end of file diff --git a/tests/test_openai_model_detection.py b/tests/test_openai_model_detection.py new file mode 100644 index 000000000..fb9b745f0 --- /dev/null +++ b/tests/test_openai_model_detection.py @@ -0,0 +1,101 @@ +""" +Test OpenAI reasoning model detection logic +""" + +import unittest +from unittest.mock import MagicMock + + +class TestOpenAIReasoningModelDetection(unittest.TestCase): + """Test that OpenAI reasoning models are correctly identified""" + + def test_reasoning_model_detection(self): + """Test various model names to ensure correct reasoning model detection""" + + # Define the same constants as in the code + OPENAI_REASONING_MODEL_PREFIXES = ( + # O-series reasoning models + "o1-", "o1", # o1, o1-mini, o1-preview + "o3-", "o3", # o3, o3-mini, o3-pro + "o4-", # o4-mini + # GPT-5 series are also reasoning models + "gpt-5-", "gpt-5" # gpt-5, gpt-5-mini, gpt-5-nano + ) + + def is_reasoning_model(model_name, api_base="https://api.openai.com/v1"): + """Test function that mimics the logic in openai.py""" + model_lower = str(model_name).lower() + return ( + api_base == "https://api.openai.com/v1" and + model_lower.startswith(OPENAI_REASONING_MODEL_PREFIXES) + ) + + # Test cases: (model_name, expected_result, description) + test_cases = [ + # Reasoning models - should return True + ("o1", True, "Base o1 model"), + ("o1-mini", True, "o1-mini model"), + ("o1-preview", True, "o1-preview model"), + ("o1-mini-2025-01-31", True, "o1-mini with date"), + ("o3", True, "Base o3 model"), + ("o3-mini", True, "o3-mini model"), + ("o3-pro", True, "o3-pro model"), + ("o4-mini", True, "o4-mini model"), + ("gpt-5", True, "Base gpt-5 model"), + ("gpt-5-mini", True, "gpt-5-mini model"), + ("gpt-5-nano", True, "gpt-5-nano model"), + + # Non-reasoning models - should return False + ("gpt-4o-mini", False, "gpt-4o-mini (not reasoning)"), + ("gpt-4o", False, "gpt-4o (not reasoning)"), + ("gpt-4", False, "gpt-4 (not reasoning)"), + ("gpt-3.5-turbo", False, "gpt-3.5-turbo (not reasoning)"), + ("claude-3", False, "Non-OpenAI model"), + ("gemini-pro", False, "Non-OpenAI model"), + + # Edge cases + ("O1-MINI", True, "Uppercase o1-mini"), + ("GPT-5-MINI", True, "Uppercase gpt-5-mini"), + ] + + for model_name, expected, description in test_cases: + with self.subTest(model=model_name, desc=description): + result = is_reasoning_model(model_name) + self.assertEqual( + result, + expected, + f"Model '{model_name}' ({description}): expected {expected}, got {result}" + ) + + def test_non_openai_api_base(self): + """Test that non-OpenAI API bases don't trigger reasoning model logic""" + OPENAI_REASONING_MODEL_PREFIXES = ( + "o1-", "o1", "o3-", "o3", "o4-", "gpt-5-", "gpt-5" + ) + + def is_reasoning_model(model_name, api_base): + model_lower = str(model_name).lower() + return ( + api_base == "https://api.openai.com/v1" and + model_lower.startswith(OPENAI_REASONING_MODEL_PREFIXES) + ) + + # Even reasoning model names should return False for non-OpenAI APIs + test_cases = [ + ("o1-mini", "https://api.anthropic.com/v1", False), + ("gpt-5", "https://generativelanguage.googleapis.com/v1beta/openai/", False), + ("o3-mini", "https://api.deepseek.com/v1", False), + ] + + for model_name, api_base, expected in test_cases: + with self.subTest(model=model_name, api=api_base): + result = is_reasoning_model(model_name, api_base) + self.assertEqual( + result, + expected, + f"Model '{model_name}' with API '{api_base}' should return {expected}" + ) + + +if __name__ == "__main__": + unittest.main() \ No newline at end of file