diff --git a/Server/models.py b/Server/models.py index 7c56327c..4158d8be 100644 --- a/Server/models.py +++ b/Server/models.py @@ -8,6 +8,10 @@ class MCPResponse(BaseModel): message: str | None = None error: str | None = None data: Any | None = None + # Optional hint for clients about how to handle the response. + # Supported values: + # - "retry": Unity is temporarily reloading; call should be retried politely. + hint: str | None = None class UnityInstanceInfo(BaseModel): diff --git a/Server/plugin_hub.py b/Server/plugin_hub.py index 0cf40648..4ca92ae9 100644 --- a/Server/plugin_hub.py +++ b/Server/plugin_hub.py @@ -4,12 +4,14 @@ import asyncio import logging +import time import uuid from typing import Any, Dict, Optional from starlette.endpoints import WebSocketEndpoint from starlette.websockets import WebSocket +from config import config from plugin_registry import PluginRegistry logger = logging.getLogger("mcp-for-unity-server") @@ -204,19 +206,71 @@ async def _get_connection(cls, session_id: str) -> WebSocket: # ------------------------------------------------------------------ @classmethod async def _resolve_session_id(cls, unity_instance: Optional[str]) -> str: + """Resolve a project hash (Unity instance id) to an active plugin session. + + During Unity domain reloads the plugin's WebSocket session is torn down + and reconnected shortly afterwards. Instead of failing immediately when + no sessions are available, we wait for a bounded period for a plugin + to reconnect so in-flight MCP calls can succeed transparently. + """ if cls._registry is None: raise RuntimeError("Plugin registry not configured") - if unity_instance: - session_id = await cls._registry.get_session_id_by_hash(unity_instance) - if session_id: - return session_id + # Use the same defaults as the stdio transport reload handling so that + # HTTP/WebSocket and TCP behave consistently without per-project env. + max_retries = max(1, int(getattr(config, "reload_max_retries", 40))) + retry_ms = float(getattr(config, "reload_retry_ms", 250)) + sleep_seconds = max(0.05, retry_ms / 1000.0) + + async def _try_once() -> Optional[str]: + # Prefer a specific Unity instance if one was requested + if unity_instance: + session_id = await cls._registry.get_session_id_by_hash(unity_instance) + if session_id: + return session_id + + # Fallback: use the first available plugin session, if any + sessions = await cls._registry.list_sessions() + if not sessions: + return None + # Deterministic order: rely on insertion ordering + return next(iter(sessions.keys())) + + session_id = await _try_once() + deadline = time.monotonic() + (max_retries * sleep_seconds) + wait_started = None + + # If there is no active plugin yet (e.g., Unity starting up or reloading), + # wait politely for a session to appear before surfacing an error. + while session_id is None and time.monotonic() < deadline: + if wait_started is None: + wait_started = time.monotonic() + logger.debug( + "No plugin session available (instance=%s); waiting up to %.2fs", + unity_instance or "default", + deadline - wait_started, + ) + await asyncio.sleep(sleep_seconds) + session_id = await _try_once() + + if session_id is not None and wait_started is not None: + logger.debug( + "Plugin session restored after %.3fs (instance=%s)", + time.monotonic() - wait_started, + unity_instance or "default", + ) - sessions = await cls._registry.list_sessions() - if not sessions: + if session_id is None: + logger.warning( + "No Unity plugin reconnected within %.2fs (instance=%s)", + max_retries * sleep_seconds, + unity_instance or "default", + ) + # At this point we've given the plugin ample time to reconnect; surface + # a clear error so the client can prompt the user to open Unity. raise RuntimeError("No Unity plugins are currently connected") - # Deterministic order: rely on insertion ordering - return next(iter(sessions.keys())) + + return session_id @classmethod async def send_command_for_instance( diff --git a/Server/tests/integration/test_domain_reload_resilience.py b/Server/tests/integration/test_domain_reload_resilience.py new file mode 100644 index 00000000..dc5131ea --- /dev/null +++ b/Server/tests/integration/test_domain_reload_resilience.py @@ -0,0 +1,225 @@ +""" +Integration test for domain reload resilience. + +Tests that the MCP server can handle rapid-fire requests during Unity domain reloads +by waiting for the plugin to reconnect instead of failing immediately. +""" +import asyncio +import pytest +from unittest.mock import AsyncMock, patch +from datetime import datetime + +from .test_helpers import DummyContext + + +@pytest.mark.asyncio +async def test_plugin_hub_waits_for_reconnection_during_reload(): + """Test that PluginHub._resolve_session_id waits for plugin reconnection.""" + # Import after conftest stubs are set up + from plugin_hub import PluginHub + from plugin_registry import PluginRegistry, PluginSession + + # Create a mock registry + mock_registry = AsyncMock(spec=PluginRegistry) + + # Simulate plugin reconnection sequence: + # First 2 calls: no sessions (plugin disconnected) + # Third call: session appears (plugin reconnected) + call_count = [0] + + async def mock_list_sessions(): + call_count[0] += 1 + if call_count[0] <= 2: + # Plugin not yet reconnected + return {} + else: + # Plugin reconnected + now = datetime.now() + session = PluginSession( + session_id="test-session-123", + project_name="TestProject", + project_hash="abc123", + unity_version="2022.3.0f1", + registered_at=now, + connected_at=now + ) + return {"test-session-123": session} + + mock_registry.list_sessions = mock_list_sessions + + # Configure PluginHub with our mock while preserving the original state + original_registry = PluginHub._registry + original_lock = PluginHub._lock + PluginHub._registry = mock_registry + PluginHub._lock = asyncio.Lock() + + try: + # Call _resolve_session_id when no session is available + # It should wait and retry until the session appears + session_id = await PluginHub._resolve_session_id(unity_instance=None) + + # Should have retried and eventually found the session + assert session_id == "test-session-123" + assert call_count[0] >= 3 # Should have tried at least 3 times + + finally: + # Clean up: restore original PluginHub state + PluginHub._registry = original_registry + PluginHub._lock = original_lock + + +@pytest.mark.asyncio +async def test_plugin_hub_fails_after_timeout(): + """Test that PluginHub._resolve_session_id eventually times out if plugin never reconnects.""" + from plugin_hub import PluginHub + from plugin_registry import PluginRegistry + + # Create a mock registry that never returns sessions + mock_registry = AsyncMock(spec=PluginRegistry) + + async def mock_list_sessions(): + return {} # Never returns sessions + + mock_registry.list_sessions = mock_list_sessions + + # Configure PluginHub with our mock while preserving the original state + original_registry = PluginHub._registry + original_lock = PluginHub._lock + PluginHub._registry = mock_registry + PluginHub._lock = asyncio.Lock() + + # Temporarily override config for a short timeout + with patch('plugin_hub.config') as mock_config: + mock_config.reload_max_retries = 3 # Only 3 retries + mock_config.reload_retry_ms = 10 # 10ms between retries + + try: + # Should raise RuntimeError after timeout + with pytest.raises(RuntimeError, match="No Unity plugins are currently connected"): + await PluginHub._resolve_session_id(unity_instance=None) + finally: + # Clean up: restore original PluginHub state + PluginHub._registry = original_registry + PluginHub._lock = original_lock + + +@pytest.mark.asyncio +async def test_read_console_during_simulated_reload(monkeypatch): + """ + Simulate the stress test: create script (triggers reload) + rapid read_console calls. + + This test simulates what happens when: + 1. A script is created (triggering domain reload) + 2. Multiple read_console calls are made immediately + 3. The plugin disconnects and reconnects during those calls + """ + # Setup tools + from tools.read_console import read_console + + call_count = [0] + + async def fake_send_command(*args, **kwargs): + """Simulate successful command execution.""" + call_count[0] += 1 + return { + "success": True, + "message": f"Retrieved {call_count[0]} log entries.", + "data": ["MCP-FOR-UNITY: Auto-discovered 10 tools"] + } + + # Patch the async_send_command_with_retry directly + import tools.read_console + monkeypatch.setattr( + tools.read_console, + "async_send_command_with_retry", + fake_send_command + ) + + # Run multiple read_console calls rapidly (simulating the stress test) + results = [] + for i in range(5): + result = await read_console( + ctx=DummyContext(), + action="get", + types=["all"], + count=50, + format="plain", + include_stacktrace=False + ) + results.append(result) + + # All calls should succeed + assert len(results) == 5 + for i, result in enumerate(results): + assert result["success"] is True, f"Call {i+1} failed with result: {result}" + assert "data" in result + + # At least 5 calls should have been made + assert call_count[0] == 5 + + +@pytest.mark.asyncio +async def test_plugin_hub_respects_unity_instance_preference(): + """Test that _resolve_session_id prefers a specific Unity instance if requested.""" + from plugin_hub import PluginHub + from plugin_registry import PluginRegistry, PluginSession + + # Create a mock registry with two sessions + mock_registry = AsyncMock(spec=PluginRegistry) + + now = datetime.now() + session1 = PluginSession( + session_id="session-1", + project_name="Project1", + project_hash="hash1", + unity_version="2022.3.0f1", + registered_at=now, + connected_at=now + ) + session2 = PluginSession( + session_id="session-2", + project_name="Project2", + project_hash="hash2", + unity_version="2022.3.0f1", + registered_at=now, + connected_at=now + ) + + async def mock_list_sessions(): + return { + "session-1": session1, + "session-2": session2 + } + + async def mock_get_session_by_hash(project_hash): + if project_hash == "hash2": + return "session-2" + return None + + mock_registry.list_sessions = mock_list_sessions + mock_registry.get_session_id_by_hash = mock_get_session_by_hash + + # Configure PluginHub with our mock while preserving the original state + original_registry = PluginHub._registry + original_lock = PluginHub._lock + PluginHub._registry = mock_registry + PluginHub._lock = asyncio.Lock() + + try: + # Request specific Unity instance + session_id = await PluginHub._resolve_session_id(unity_instance="hash2") + + # Should return the requested instance + assert session_id == "session-2" + + # Request default (no specific instance) + session_id = await PluginHub._resolve_session_id(unity_instance=None) + + # Should return first available session + assert session_id in ["session-1", "session-2"] + + finally: + # Clean up: restore original PluginHub state + PluginHub._registry = original_registry + PluginHub._lock = original_lock + diff --git a/Server/unity_connection.py b/Server/unity_connection.py index cf832025..cefc5ed1 100644 --- a/Server/unity_connection.py +++ b/Server/unity_connection.py @@ -690,14 +690,28 @@ def get_unity_connection(instance_identifier: Optional[str] = None) -> UnityConn # Centralized retry helpers # ----------------------------- -def _is_reloading_response(resp: dict) -> bool: - """Return True if the Unity response indicates the editor is reloading.""" - if not isinstance(resp, dict): - return False - if resp.get("state") == "reloading": - return True - message_text = (resp.get("message") or resp.get("error") or "").lower() - return "reload" in message_text +def _is_reloading_response(resp: object) -> bool: + """Return True if the Unity response indicates the editor is reloading. + + Supports both raw dict payloads from Unity and MCPResponse objects returned + by preflight checks or transport helpers. + """ + # Structured MCPResponse from preflight/transport + if isinstance(resp, MCPResponse): + # Explicit "please retry" hint from preflight + if getattr(resp, "hint", None) == "retry": + return True + message_text = f"{resp.message or ''} {resp.error or ''}".lower() + return "reload" in message_text + + # Raw Unity payloads + if isinstance(resp, dict): + if resp.get("state") == "reloading": + return True + message_text = (resp.get("message") or resp.get("error") or "").lower() + return "reload" in message_text + + return False def send_command_with_retry( @@ -707,7 +721,7 @@ def send_command_with_retry( instance_id: Optional[str] = None, max_retries: int | None = None, retry_ms: int | None = None -) -> Dict[str, Any]: +) -> dict[str, Any] | MCPResponse: """Send a command to a Unity instance, waiting politely through Unity reloads. Args: @@ -718,7 +732,7 @@ def send_command_with_retry( retry_ms: Delay between retries in milliseconds Returns: - Response dictionary from Unity + Response dictionary or MCPResponse from Unity Uses config.reload_retry_ms and config.reload_max_retries by default. Preserves the structured failure if retries are exhausted. diff --git a/TestProjects/UnityMCPTests/Assets/Tests/EditMode/Tools/DomainReloadResilienceTests.cs b/TestProjects/UnityMCPTests/Assets/Tests/EditMode/Tools/DomainReloadResilienceTests.cs new file mode 100644 index 00000000..332a46a3 --- /dev/null +++ b/TestProjects/UnityMCPTests/Assets/Tests/EditMode/Tools/DomainReloadResilienceTests.cs @@ -0,0 +1,267 @@ +using NUnit.Framework; +using UnityEngine; +using UnityEngine.TestTools; +using UnityEditor; +using System.Collections; +using System.IO; +using MCPForUnity.Editor.Tools; +using Newtonsoft.Json.Linq; + +namespace Tests.EditMode.Tools +{ + /// + /// Tests for domain reload resilience - ensuring MCP requests succeed even during Unity domain reloads. + /// + public class DomainReloadResilienceTests + { + private const string TempDir = "Assets/Temp/DomainReloadTests"; + + [SetUp] + public void Setup() + { + // Ensure temp directory exists + if (!AssetDatabase.IsValidFolder(TempDir)) + { + Directory.CreateDirectory(TempDir); + AssetDatabase.Refresh(); + } + } + + [TearDown] + public void TearDown() + { + // Clean up temp directory + if (AssetDatabase.IsValidFolder(TempDir)) + { + AssetDatabase.DeleteAsset(TempDir); + } + } + + /// + /// This test simulates the stress test scenario: + /// 1. Create a script (triggers domain reload) + /// 2. Make multiple rapid read_console calls + /// 3. Verify all calls succeed (no "No Unity plugins are currently connected" errors) + /// + /// Note: This test uses UnityTest coroutine to handle the async nature of domain reloads. + /// + [UnityTest] + public IEnumerator StressTest_CreateScriptAndReadConsoleMultipleTimes() + { + // Step 1: Create a script to trigger domain reload + var scriptPath = Path.Combine(TempDir, "StressTestScript.cs").Replace("\\", "/"); + var scriptContent = @"using UnityEngine; + +public class StressTestScript : MonoBehaviour +{ + void Start() { } +}"; + + // Write script file + File.WriteAllText(scriptPath, scriptContent); + AssetDatabase.Refresh(); + + Debug.Log("[DomainReloadTest] Script created, domain reload triggered"); + + // Wait a frame for the domain reload to start + yield return null; + + // Step 2: Make multiple rapid read_console calls + // These should succeed even during the reload window + int successCount = 0; + int totalCalls = 5; + + for (int i = 0; i < totalCalls; i++) + { + var request = new JObject + { + ["action"] = "get", + ["types"] = new JArray { "all" }, + ["count"] = 50, + ["format"] = "plain", + ["includeStacktrace"] = false + }; + + var result = ExecuteReadConsole(request); + + // Check if the call succeeded + if (result != null && result["success"]?.Value() == true) + { + successCount++; + Debug.Log($"[DomainReloadTest] read_console call {i+1}/{totalCalls} succeeded"); + } + else + { + var error = result?["error"]?.ToString() ?? "Unknown error"; + Debug.LogError($"[DomainReloadTest] read_console call {i+1}/{totalCalls} failed: {error}"); + } + + // Small delay between calls to simulate rapid-fire scenario + yield return WaitFrames(6); + } + + // Step 3: Verify all calls succeeded + Debug.Log($"[DomainReloadTest] {successCount}/{totalCalls} read_console calls succeeded"); + Assert.AreEqual(totalCalls, successCount, + $"Expected all {totalCalls} read_console calls to succeed during domain reload, but only {successCount} succeeded"); + } + + /// + /// Test that read_console works reliably after a domain reload completes. + /// + [Test] + public void ReadConsole_AfterDomainReload_Succeeds() + { + // This test assumes domain reload has already completed + // (Unity tests run after domain reload) + + var request = new JObject + { + ["action"] = "get", + ["types"] = new JArray { "error", "warning", "log" }, + ["count"] = 10, + ["format"] = "plain" + }; + + var result = ExecuteReadConsole(request); + + Assert.IsNotNull(result, "read_console should return a result"); + Assert.IsTrue(result["success"]?.Value() ?? false, + $"read_console should succeed: {result["error"]}"); + } + + /// + /// Test creating a script and immediately querying console logs. + /// This simulates a common AI workflow pattern. + /// + [UnityTest] + public IEnumerator CreateScript_ThenQueryConsole_Succeeds() + { + // Create a simple script + var scriptPath = Path.Combine(TempDir, "TestScript1.cs").Replace("\\", "/"); + var scriptContent = @"using UnityEngine; + +public class TestScript1 : MonoBehaviour +{ + void Start() + { + Debug.Log(""TestScript1 initialized""); + } +}"; + + File.WriteAllText(scriptPath, scriptContent); + AssetDatabase.Refresh(); + + Debug.Log("[DomainReloadTest] Script created"); + + // Wait a frame + yield return null; + + // Immediately try to read console + var request = new JObject + { + ["action"] = "get", + ["types"] = new JArray { "all" }, + ["count"] = 50, + ["format"] = "plain" + }; + + var result = ExecuteReadConsole(request); + + // Should succeed even if domain reload is happening + Assert.IsNotNull(result, "read_console should return a result"); + Assert.IsTrue(result["success"]?.Value() ?? false, + $"read_console should succeed even during/after script creation: {result["error"]}"); + } + + /// + /// Test rapid script creation followed by console reads. + /// This is an even more aggressive stress test. + /// + [UnityTest] + public IEnumerator RapidScriptCreation_WithConsoleReads_AllSucceed() + { + int scriptCount = 3; + int consoleReadsPerScript = 2; + int successCount = 0; + int totalExpectedReads = scriptCount * consoleReadsPerScript; + + for (int i = 0; i < scriptCount; i++) + { + // Create script + var scriptPath = Path.Combine(TempDir, $"RapidScript{i}.cs").Replace("\\", "/"); + var scriptContent = $@"using UnityEngine; + +public class RapidScript{i} : MonoBehaviour +{{ + void Start() {{ Debug.Log(""RapidScript{i}""); }} +}}"; + + File.WriteAllText(scriptPath, scriptContent); + AssetDatabase.Refresh(); + + Debug.Log($"[DomainReloadTest] Created script {i+1}/{scriptCount}"); + + // Immediately try console reads + for (int j = 0; j < consoleReadsPerScript; j++) + { + var request = new JObject + { + ["action"] = "get", + ["types"] = new JArray { "all" }, + ["count"] = 20, + ["format"] = "plain" + }; + + var result = ExecuteReadConsole(request); + + if (result != null && result["success"]?.Value() == true) + { + successCount++; + } + else + { + var error = result?["error"]?.ToString() ?? "Unknown error"; + Debug.LogError($"[DomainReloadTest] Console read failed: {error}"); + } + + yield return WaitFrames(3); + } + + // Brief wait between script creations + yield return WaitFrames(12); + } + + Debug.Log($"[DomainReloadTest] {successCount}/{totalExpectedReads} console reads succeeded"); + + // We expect at least 80% success rate (some may fail due to timing, but resilience should help most) + int minExpectedSuccess = (int)(totalExpectedReads * 0.8f); + Assert.GreaterOrEqual(successCount, minExpectedSuccess, + $"Expected at least {minExpectedSuccess} console reads to succeed, but only {successCount} succeeded"); + } + + private static JObject ExecuteReadConsole(JObject request) + { + var raw = ReadConsole.HandleCommand(request); + if (raw == null) + { + return new JObject + { + ["success"] = false, + ["error"] = "ReadConsole returned null" + }; + } + + return JObject.FromObject(raw); + } + + private static IEnumerator WaitFrames(int frameCount) + { + for (int i = 0; i < frameCount; i++) + { + yield return null; + } + } + } +} + diff --git a/TestProjects/UnityMCPTests/Assets/Tests/EditMode/Tools/DomainReloadResilienceTests.cs.meta b/TestProjects/UnityMCPTests/Assets/Tests/EditMode/Tools/DomainReloadResilienceTests.cs.meta new file mode 100644 index 00000000..86ead2c8 --- /dev/null +++ b/TestProjects/UnityMCPTests/Assets/Tests/EditMode/Tools/DomainReloadResilienceTests.cs.meta @@ -0,0 +1,11 @@ +fileFormatVersion: 2 +guid: a3d4e0a7c0814f75a3cf7d1eee7bcdd1 +MonoImporter: + externalObjects: {} + serializedVersion: 2 + defaultReferences: [] + executionOrder: 0 + icon: {instanceID: 0} + userData: + assetBundleName: + assetBundleVariant: