Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion optillm/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Version information
__version__ = "0.2.4"
__version__ = "0.2.5"

# Import from server module
from .server import (
Expand Down
12 changes: 9 additions & 3 deletions optillm/plugins/proxy_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,9 @@ def run(system_prompt: str, initial_query: str, client, model: str,
{"role": "user", "content": initial_query}
]
)
return response.choices[0].message.content, response.usage.completion_tokens
# Return full response dict to preserve all usage information
response_dict = response.model_dump() if hasattr(response, 'model_dump') else response
return response_dict, 0

# Create or reuse proxy client to maintain state (important for round-robin)
config_key = str(config) # Simple config-based cache key
Expand Down Expand Up @@ -128,7 +130,9 @@ def run(system_prompt: str, initial_query: str, client, model: str,
**(request_config or {})
)

return response.choices[0].message.content, response.usage.completion_tokens
# Return full response dict to preserve all usage information
response_dict = response.model_dump() if hasattr(response, 'model_dump') else response
return response_dict, 0

except Exception as e:
logger.error(f"Proxy plugin error: {e}", exc_info=True)
Expand All @@ -141,4 +145,6 @@ def run(system_prompt: str, initial_query: str, client, model: str,
{"role": "user", "content": initial_query}
]
)
return response.choices[0].message.content, response.usage.completion_tokens
# Return full response dict to preserve all usage information
response_dict = response.model_dump() if hasattr(response, 'model_dump') else response
return response_dict, 0
16 changes: 16 additions & 0 deletions optillm/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -788,6 +788,22 @@ def proxy():

# Handle non-none approaches with n attempts
response, completion_tokens = execute_n_times(n, approaches, operation, system_prompt, initial_query, client, model, request_config, request_id)

# Check if the response is a full dict (like from proxy plugin or none approach)
if operation == 'SINGLE' and isinstance(response, dict) and 'choices' in response and 'usage' in response:
# This is a full response dict, return it directly
if conversation_logger and request_id:
conversation_logger.log_final_response(request_id, response)
conversation_logger.finalize_conversation(request_id)

if stream:
if request_id:
logger.info(f'Request {request_id}: Completed (streaming response)')
return Response(generate_streaming_response(extract_contents(response), model), content_type='text/event-stream')
else:
if request_id:
logger.info(f'Request {request_id}: Completed')
return jsonify(response), 200

except Exception as e:
# Log error to conversation logger if enabled
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "optillm"
version = "0.2.4"
version = "0.2.5"
description = "An optimizing inference proxy for LLMs."
readme = "README.md"
license = "Apache-2.0"
Expand Down
1 change: 1 addition & 0 deletions tests/test_ci_quick.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
from optillm.plugins.deep_research import DeepResearcher
from optillm.plugins.longcepo import run_longcepo
from optillm.plugins.spl import run_spl
from optillm.plugins.proxy import client, config, approach_handler
print("✅ Plugin submodule imports working - no relative import errors")
except ImportError as e:
if "attempted relative import" in str(e):
Expand Down
83 changes: 80 additions & 3 deletions tests/test_plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ def test_plugin_module_imports():
'optillm.plugins.deep_research_plugin',
'optillm.plugins.deepthink_plugin',
'optillm.plugins.longcepo_plugin',
'optillm.plugins.spl_plugin'
'optillm.plugins.spl_plugin',
'optillm.plugins.proxy_plugin'
]

for module_name in plugin_modules:
Expand All @@ -51,7 +52,7 @@ def test_plugin_approach_detection():
load_plugins()

# Check if known plugins are loaded
expected_plugins = ["memory", "readurls", "privacy", "web_search", "deep_research", "deepthink", "longcepo", "spl"]
expected_plugins = ["memory", "readurls", "privacy", "web_search", "deep_research", "deepthink", "longcepo", "spl", "proxy"]
for plugin_name in expected_plugins:
assert plugin_name in plugin_approaches, f"Plugin {plugin_name} not loaded"

Expand Down Expand Up @@ -141,6 +142,63 @@ def test_spl_plugin():
assert run_spl is not None


def test_proxy_plugin():
"""Test proxy plugin module"""
import optillm.plugins.proxy_plugin as plugin
assert hasattr(plugin, 'run')
assert hasattr(plugin, 'SLUG')
assert plugin.SLUG == "proxy"

# Test proxy submodules can be imported
from optillm.plugins.proxy import client, config, approach_handler
assert client is not None
assert config is not None
assert approach_handler is not None


def test_proxy_plugin_token_counts():
"""Test that proxy plugin returns complete token usage information"""
import optillm.plugins.proxy_plugin as plugin
from unittest.mock import Mock, MagicMock

# Create a mock client with a mock response that has all token counts
mock_client = Mock()
mock_response = MagicMock()
mock_response.choices = [Mock(message=Mock(content="Test response"))]
mock_response.usage = Mock(
prompt_tokens=10,
completion_tokens=5,
total_tokens=15
)
mock_response.model_dump.return_value = {
'choices': [{'message': {'content': 'Test response'}}],
'usage': {
'prompt_tokens': 10,
'completion_tokens': 5,
'total_tokens': 15
}
}
mock_client.chat.completions.create.return_value = mock_response

# Run the proxy plugin
result, _ = plugin.run(
system_prompt="Test system",
initial_query="Test query",
client=mock_client,
model="test-model"
)

# Verify the result contains all token counts
assert isinstance(result, dict), "Result should be a dictionary"
assert 'usage' in result, "Result should contain usage information"
assert 'prompt_tokens' in result['usage'], "Usage should contain prompt_tokens"
assert 'completion_tokens' in result['usage'], "Usage should contain completion_tokens"
assert 'total_tokens' in result['usage'], "Usage should contain total_tokens"
assert result['usage']['prompt_tokens'] == 10
assert result['usage']['completion_tokens'] == 5
assert result['usage']['total_tokens'] == 15


def test_plugin_subdirectory_imports():
"""Test all plugins with subdirectories can import their submodules"""
# Test deep_research
Expand All @@ -159,6 +217,12 @@ def test_plugin_subdirectory_imports():
# Test spl
from optillm.plugins.spl import run_spl
assert run_spl is not None

# Test proxy
from optillm.plugins.proxy import client, config, approach_handler
assert client is not None
assert config is not None
assert approach_handler is not None


def test_no_relative_import_errors():
Expand All @@ -170,7 +234,8 @@ def test_no_relative_import_errors():
'optillm.plugins.deepthink_plugin',
'optillm.plugins.deep_research_plugin',
'optillm.plugins.longcepo_plugin',
'optillm.plugins.spl_plugin'
'optillm.plugins.spl_plugin',
'optillm.plugins.proxy_plugin'
]

for plugin_name in plugins_with_subdirs:
Expand Down Expand Up @@ -256,6 +321,18 @@ def test_no_relative_import_errors():
except Exception as e:
print(f"❌ SPL plugin test failed: {e}")

try:
test_proxy_plugin()
print("✅ Proxy plugin test passed")
except Exception as e:
print(f"❌ Proxy plugin test failed: {e}")

try:
test_proxy_plugin_token_counts()
print("✅ Proxy plugin token counts test passed")
except Exception as e:
print(f"❌ Proxy plugin token counts test failed: {e}")

try:
test_plugin_subdirectory_imports()
print("✅ Plugin subdirectory imports test passed")
Expand Down