algorithmicsuperintelligence · codelion · Sep 8, 2025 · Sep 8, 2025 · Sep 8, 2025 · Sep 8, 2025
diff --git a/optillm/__init__.py b/optillm/__init__.py
@@ -1,5 +1,5 @@
 # Version information
-__version__ = "0.2.4"
+__version__ = "0.2.5"
 
 # Import from server module
 from .server import (

diff --git a/optillm/plugins/proxy_plugin.py b/optillm/plugins/proxy_plugin.py
@@ -54,7 +54,9 @@ def run(system_prompt: str, initial_query: str, client, model: str,
                     {"role": "user", "content": initial_query}
                 ]
             )
-            return response.choices[0].message.content, response.usage.completion_tokens
+            # Return full response dict to preserve all usage information
+            response_dict = response.model_dump() if hasattr(response, 'model_dump') else response
+            return response_dict, 0
 
         # Create or reuse proxy client to maintain state (important for round-robin)
         config_key = str(config)  # Simple config-based cache key
@@ -128,7 +130,9 @@ def run(system_prompt: str, initial_query: str, client, model: str,
             **(request_config or {})
         )
 
-        return response.choices[0].message.content, response.usage.completion_tokens
+        # Return full response dict to preserve all usage information
+        response_dict = response.model_dump() if hasattr(response, 'model_dump') else response
+        return response_dict, 0
 
     except Exception as e:
         logger.error(f"Proxy plugin error: {e}", exc_info=True)
@@ -141,4 +145,6 @@ def run(system_prompt: str, initial_query: str, client, model: str,
                 {"role": "user", "content": initial_query}
             ]
         )
-        return response.choices[0].message.content, response.usage.completion_tokens
+        # Return full response dict to preserve all usage information
+        response_dict = response.model_dump() if hasattr(response, 'model_dump') else response
+        return response_dict, 0
diff --git a/optillm/server.py b/optillm/server.py
@@ -788,6 +788,22 @@ def proxy():
 
         # Handle non-none approaches with n attempts
         response, completion_tokens = execute_n_times(n, approaches, operation, system_prompt, initial_query, client, model, request_config, request_id)
+
+        # Check if the response is a full dict (like from proxy plugin or none approach)
+        if operation == 'SINGLE' and isinstance(response, dict) and 'choices' in response and 'usage' in response:
+            # This is a full response dict, return it directly
+            if conversation_logger and request_id:
+                conversation_logger.log_final_response(request_id, response)
+                conversation_logger.finalize_conversation(request_id)
+
+            if stream:
+                if request_id:
+                    logger.info(f'Request {request_id}: Completed (streaming response)')
+                return Response(generate_streaming_response(extract_contents(response), model), content_type='text/event-stream')
+            else:
+                if request_id:
+                    logger.info(f'Request {request_id}: Completed')
+                return jsonify(response), 200
 
     except Exception as e:
         # Log error to conversation logger if enabled

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "optillm"
-version = "0.2.4"
+version = "0.2.5"
 description = "An optimizing inference proxy for LLMs."
 readme = "README.md"
 license = "Apache-2.0"

diff --git a/tests/test_ci_quick.py b/tests/test_ci_quick.py
@@ -44,6 +44,7 @@
     from optillm.plugins.deep_research import DeepResearcher
     from optillm.plugins.longcepo import run_longcepo
     from optillm.plugins.spl import run_spl
+    from optillm.plugins.proxy import client, config, approach_handler
     print("✅ Plugin submodule imports working - no relative import errors")
 except ImportError as e:
     if "attempted relative import" in str(e):

diff --git a/tests/test_plugins.py b/tests/test_plugins.py
@@ -30,7 +30,8 @@ def test_plugin_module_imports():
         'optillm.plugins.deep_research_plugin',
         'optillm.plugins.deepthink_plugin',
         'optillm.plugins.longcepo_plugin',
-        'optillm.plugins.spl_plugin'
+        'optillm.plugins.spl_plugin',
+        'optillm.plugins.proxy_plugin'
     ]
 
     for module_name in plugin_modules:
@@ -51,7 +52,7 @@ def test_plugin_approach_detection():
     load_plugins()
 
     # Check if known plugins are loaded
-    expected_plugins = ["memory", "readurls", "privacy", "web_search", "deep_research", "deepthink", "longcepo", "spl"]
+    expected_plugins = ["memory", "readurls", "privacy", "web_search", "deep_research", "deepthink", "longcepo", "spl", "proxy"]
     for plugin_name in expected_plugins:
         assert plugin_name in plugin_approaches, f"Plugin {plugin_name} not loaded"
 
@@ -141,6 +142,63 @@ def test_spl_plugin():
     assert run_spl is not None
 
 
+def test_proxy_plugin():
+    """Test proxy plugin module"""
+    import optillm.plugins.proxy_plugin as plugin
+    assert hasattr(plugin, 'run')
+    assert hasattr(plugin, 'SLUG')
+    assert plugin.SLUG == "proxy"
+
+    # Test proxy submodules can be imported
+    from optillm.plugins.proxy import client, config, approach_handler
+    assert client is not None
+    assert config is not None
+    assert approach_handler is not None
+
+
+def test_proxy_plugin_token_counts():
+    """Test that proxy plugin returns complete token usage information"""
+    import optillm.plugins.proxy_plugin as plugin
+    from unittest.mock import Mock, MagicMock
+
+    # Create a mock client with a mock response that has all token counts
+    mock_client = Mock()
+    mock_response = MagicMock()
+    mock_response.choices = [Mock(message=Mock(content="Test response"))]
+    mock_response.usage = Mock(
+        prompt_tokens=10,
+        completion_tokens=5,
+        total_tokens=15
+    )
+    mock_response.model_dump.return_value = {
+        'choices': [{'message': {'content': 'Test response'}}],
+        'usage': {
+            'prompt_tokens': 10,
+            'completion_tokens': 5,
+            'total_tokens': 15
+        }
+    }
+    mock_client.chat.completions.create.return_value = mock_response
+
+    # Run the proxy plugin
+    result, _ = plugin.run(
+        system_prompt="Test system",
+        initial_query="Test query",
+        client=mock_client,
+        model="test-model"
+    )
+
+    # Verify the result contains all token counts
+    assert isinstance(result, dict), "Result should be a dictionary"
+    assert 'usage' in result, "Result should contain usage information"
+    assert 'prompt_tokens' in result['usage'], "Usage should contain prompt_tokens"
+    assert 'completion_tokens' in result['usage'], "Usage should contain completion_tokens"
+    assert 'total_tokens' in result['usage'], "Usage should contain total_tokens"
+    assert result['usage']['prompt_tokens'] == 10
+    assert result['usage']['completion_tokens'] == 5
+    assert result['usage']['total_tokens'] == 15
+
+
 def test_plugin_subdirectory_imports():
     """Test all plugins with subdirectories can import their submodules"""
     # Test deep_research
@@ -159,6 +217,12 @@ def test_plugin_subdirectory_imports():
     # Test spl
     from optillm.plugins.spl import run_spl
     assert run_spl is not None
+
+    # Test proxy
+    from optillm.plugins.proxy import client, config, approach_handler
+    assert client is not None
+    assert config is not None
+    assert approach_handler is not None
 
 
 def test_no_relative_import_errors():
@@ -170,7 +234,8 @@ def test_no_relative_import_errors():
         'optillm.plugins.deepthink_plugin',
         'optillm.plugins.deep_research_plugin',
         'optillm.plugins.longcepo_plugin',
-        'optillm.plugins.spl_plugin'
+        'optillm.plugins.spl_plugin',
+        'optillm.plugins.proxy_plugin'
     ]
 
     for plugin_name in plugins_with_subdirs:
@@ -256,6 +321,18 @@ def test_no_relative_import_errors():
     except Exception as e:
         print(f"❌ SPL plugin test failed: {e}")
 
+    try:
+        test_proxy_plugin()
+        print("✅ Proxy plugin test passed")
+    except Exception as e:
+        print(f"❌ Proxy plugin test failed: {e}")
+
+    try:
+        test_proxy_plugin_token_counts()
+        print("✅ Proxy plugin token counts test passed")
+    except Exception as e:
+        print(f"❌ Proxy plugin token counts test failed: {e}")
+
     try:
         test_plugin_subdirectory_imports()
         print("✅ Plugin subdirectory imports test passed")