Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Server/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ class MCPResponse(BaseModel):
message: str | None = None
error: str | None = None
data: Any | None = None
# Optional hint for clients about how to handle the response.
# Supported values:
# - "retry": Unity is temporarily reloading; call should be retried politely.
hint: str | None = None


class UnityInstanceInfo(BaseModel):
Expand Down
70 changes: 62 additions & 8 deletions Server/plugin_hub.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@

import asyncio
import logging
import time
import uuid
from typing import Any, Dict, Optional

from starlette.endpoints import WebSocketEndpoint
from starlette.websockets import WebSocket

from config import config
from plugin_registry import PluginRegistry

logger = logging.getLogger("mcp-for-unity-server")
Expand Down Expand Up @@ -204,19 +206,71 @@ async def _get_connection(cls, session_id: str) -> WebSocket:
# ------------------------------------------------------------------
@classmethod
async def _resolve_session_id(cls, unity_instance: Optional[str]) -> str:
"""Resolve a project hash (Unity instance id) to an active plugin session.

During Unity domain reloads the plugin's WebSocket session is torn down
and reconnected shortly afterwards. Instead of failing immediately when
no sessions are available, we wait for a bounded period for a plugin
to reconnect so in-flight MCP calls can succeed transparently.
"""
if cls._registry is None:
raise RuntimeError("Plugin registry not configured")

if unity_instance:
session_id = await cls._registry.get_session_id_by_hash(unity_instance)
if session_id:
return session_id
# Use the same defaults as the stdio transport reload handling so that
# HTTP/WebSocket and TCP behave consistently without per-project env.
max_retries = max(1, int(getattr(config, "reload_max_retries", 40)))
retry_ms = float(getattr(config, "reload_retry_ms", 250))
sleep_seconds = max(0.05, retry_ms / 1000.0)

async def _try_once() -> Optional[str]:
# Prefer a specific Unity instance if one was requested
if unity_instance:
session_id = await cls._registry.get_session_id_by_hash(unity_instance)
if session_id:
return session_id

# Fallback: use the first available plugin session, if any
sessions = await cls._registry.list_sessions()
if not sessions:
return None
# Deterministic order: rely on insertion ordering
return next(iter(sessions.keys()))
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Wrong Unity instance selected when specific instance unavailable

When a specific unity_instance is requested but not found, _try_once falls through to the fallback logic and returns any available session instead of None. This causes commands intended for a specific Unity project to be routed to a different project. The function should return None immediately after get_session_id_by_hash returns None for a specific instance, allowing the retry loop to wait for the correct instance to reconnect rather than silently connecting to the wrong project.

Fix in Cursor Fix in Web


session_id = await _try_once()
deadline = time.monotonic() + (max_retries * sleep_seconds)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

logic: the deadline calculation should account for the time taken by the first _try_once() call

currently if _try_once() takes significant time, the retry loop gets less time than intended

Suggested change
deadline = time.monotonic() + (max_retries * sleep_seconds)
session_id = await _try_once()
wait_started_monotonic = time.monotonic()
deadline = wait_started_monotonic + (max_retries * sleep_seconds)
Prompt To Fix With AI
This is a comment left during a code review.
Path: Server/plugin_hub.py
Line: 240:240

Comment:
**logic:** the deadline calculation should account for the time taken by the first `_try_once()` call

currently if `_try_once()` takes significant time, the retry loop gets less time than intended

```suggestion
        session_id = await _try_once()
        wait_started_monotonic = time.monotonic()
        deadline = wait_started_monotonic + (max_retries * sleep_seconds)
```

How can I resolve this? If you propose a fix, please make it concise.

wait_started = None

# If there is no active plugin yet (e.g., Unity starting up or reloading),
# wait politely for a session to appear before surfacing an error.
while session_id is None and time.monotonic() < deadline:
if wait_started is None:
wait_started = time.monotonic()
logger.debug(
"No plugin session available (instance=%s); waiting up to %.2fs",
unity_instance or "default",
deadline - wait_started,
)
await asyncio.sleep(sleep_seconds)
session_id = await _try_once()

if session_id is not None and wait_started is not None:
logger.debug(
"Plugin session restored after %.3fs (instance=%s)",
time.monotonic() - wait_started,
unity_instance or "default",
)

sessions = await cls._registry.list_sessions()
if not sessions:
if session_id is None:
logger.warning(
"No Unity plugin reconnected within %.2fs (instance=%s)",
max_retries * sleep_seconds,
unity_instance or "default",
)
# At this point we've given the plugin ample time to reconnect; surface
# a clear error so the client can prompt the user to open Unity.
raise RuntimeError("No Unity plugins are currently connected")
# Deterministic order: rely on insertion ordering
return next(iter(sessions.keys()))

return session_id

@classmethod
async def send_command_for_instance(
Expand Down
225 changes: 225 additions & 0 deletions Server/tests/integration/test_domain_reload_resilience.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
"""
Integration test for domain reload resilience.

Tests that the MCP server can handle rapid-fire requests during Unity domain reloads
by waiting for the plugin to reconnect instead of failing immediately.
"""
import asyncio
import pytest
from unittest.mock import AsyncMock, patch
from datetime import datetime

from .test_helpers import DummyContext


@pytest.mark.asyncio
async def test_plugin_hub_waits_for_reconnection_during_reload():
"""Test that PluginHub._resolve_session_id waits for plugin reconnection."""
# Import after conftest stubs are set up
from plugin_hub import PluginHub
from plugin_registry import PluginRegistry, PluginSession

# Create a mock registry
mock_registry = AsyncMock(spec=PluginRegistry)

# Simulate plugin reconnection sequence:
# First 2 calls: no sessions (plugin disconnected)
# Third call: session appears (plugin reconnected)
call_count = [0]

async def mock_list_sessions():
call_count[0] += 1
if call_count[0] <= 2:
# Plugin not yet reconnected
return {}
else:
# Plugin reconnected
now = datetime.now()
session = PluginSession(
session_id="test-session-123",
project_name="TestProject",
project_hash="abc123",
unity_version="2022.3.0f1",
registered_at=now,
connected_at=now
)
return {"test-session-123": session}

mock_registry.list_sessions = mock_list_sessions

# Configure PluginHub with our mock while preserving the original state
original_registry = PluginHub._registry
original_lock = PluginHub._lock
PluginHub._registry = mock_registry
PluginHub._lock = asyncio.Lock()

try:
# Call _resolve_session_id when no session is available
# It should wait and retry until the session appears
session_id = await PluginHub._resolve_session_id(unity_instance=None)

# Should have retried and eventually found the session
assert session_id == "test-session-123"
assert call_count[0] >= 3 # Should have tried at least 3 times

finally:
# Clean up: restore original PluginHub state
PluginHub._registry = original_registry
PluginHub._lock = original_lock


@pytest.mark.asyncio
async def test_plugin_hub_fails_after_timeout():
"""Test that PluginHub._resolve_session_id eventually times out if plugin never reconnects."""
from plugin_hub import PluginHub
from plugin_registry import PluginRegistry

# Create a mock registry that never returns sessions
mock_registry = AsyncMock(spec=PluginRegistry)

async def mock_list_sessions():
return {} # Never returns sessions

mock_registry.list_sessions = mock_list_sessions

# Configure PluginHub with our mock while preserving the original state
original_registry = PluginHub._registry
original_lock = PluginHub._lock
PluginHub._registry = mock_registry
PluginHub._lock = asyncio.Lock()

# Temporarily override config for a short timeout
with patch('plugin_hub.config') as mock_config:
mock_config.reload_max_retries = 3 # Only 3 retries
mock_config.reload_retry_ms = 10 # 10ms between retries

try:
# Should raise RuntimeError after timeout
with pytest.raises(RuntimeError, match="No Unity plugins are currently connected"):
await PluginHub._resolve_session_id(unity_instance=None)
finally:
# Clean up: restore original PluginHub state
PluginHub._registry = original_registry
PluginHub._lock = original_lock


@pytest.mark.asyncio
async def test_read_console_during_simulated_reload(monkeypatch):
"""
Simulate the stress test: create script (triggers reload) + rapid read_console calls.

This test simulates what happens when:
1. A script is created (triggering domain reload)
2. Multiple read_console calls are made immediately
3. The plugin disconnects and reconnects during those calls
"""
# Setup tools
from tools.read_console import read_console

call_count = [0]

async def fake_send_command(*args, **kwargs):
"""Simulate successful command execution."""
call_count[0] += 1
return {
"success": True,
"message": f"Retrieved {call_count[0]} log entries.",
"data": ["<b><color=#2EA3FF>MCP-FOR-UNITY</color></b>: Auto-discovered 10 tools"]
}

# Patch the async_send_command_with_retry directly
import tools.read_console
monkeypatch.setattr(
tools.read_console,
"async_send_command_with_retry",
fake_send_command
)

# Run multiple read_console calls rapidly (simulating the stress test)
results = []
for i in range(5):
result = await read_console(
ctx=DummyContext(),
action="get",
types=["all"],
count=50,
format="plain",
include_stacktrace=False
)
results.append(result)

# All calls should succeed
assert len(results) == 5
for i, result in enumerate(results):
assert result["success"] is True, f"Call {i+1} failed with result: {result}"
assert "data" in result

# At least 5 calls should have been made
assert call_count[0] == 5


@pytest.mark.asyncio
async def test_plugin_hub_respects_unity_instance_preference():
"""Test that _resolve_session_id prefers a specific Unity instance if requested."""
from plugin_hub import PluginHub
from plugin_registry import PluginRegistry, PluginSession

# Create a mock registry with two sessions
mock_registry = AsyncMock(spec=PluginRegistry)

now = datetime.now()
session1 = PluginSession(
session_id="session-1",
project_name="Project1",
project_hash="hash1",
unity_version="2022.3.0f1",
registered_at=now,
connected_at=now
)
session2 = PluginSession(
session_id="session-2",
project_name="Project2",
project_hash="hash2",
unity_version="2022.3.0f1",
registered_at=now,
connected_at=now
)

async def mock_list_sessions():
return {
"session-1": session1,
"session-2": session2
}

async def mock_get_session_by_hash(project_hash):
if project_hash == "hash2":
return "session-2"
return None

mock_registry.list_sessions = mock_list_sessions
mock_registry.get_session_id_by_hash = mock_get_session_by_hash

# Configure PluginHub with our mock while preserving the original state
original_registry = PluginHub._registry
original_lock = PluginHub._lock
PluginHub._registry = mock_registry
PluginHub._lock = asyncio.Lock()

try:
# Request specific Unity instance
session_id = await PluginHub._resolve_session_id(unity_instance="hash2")

# Should return the requested instance
assert session_id == "session-2"

# Request default (no specific instance)
session_id = await PluginHub._resolve_session_id(unity_instance=None)

# Should return first available session
assert session_id in ["session-1", "session-2"]

finally:
# Clean up: restore original PluginHub state
PluginHub._registry = original_registry
PluginHub._lock = original_lock

34 changes: 24 additions & 10 deletions Server/unity_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -690,14 +690,28 @@ def get_unity_connection(instance_identifier: Optional[str] = None) -> UnityConn
# Centralized retry helpers
# -----------------------------

def _is_reloading_response(resp: dict) -> bool:
"""Return True if the Unity response indicates the editor is reloading."""
if not isinstance(resp, dict):
return False
if resp.get("state") == "reloading":
return True
message_text = (resp.get("message") or resp.get("error") or "").lower()
return "reload" in message_text
def _is_reloading_response(resp: object) -> bool:
"""Return True if the Unity response indicates the editor is reloading.

Supports both raw dict payloads from Unity and MCPResponse objects returned
by preflight checks or transport helpers.
"""
# Structured MCPResponse from preflight/transport
if isinstance(resp, MCPResponse):
# Explicit "please retry" hint from preflight
if getattr(resp, "hint", None) == "retry":
return True
message_text = f"{resp.message or ''} {resp.error or ''}".lower()
return "reload" in message_text

# Raw Unity payloads
if isinstance(resp, dict):
if resp.get("state") == "reloading":
return True
message_text = (resp.get("message") or resp.get("error") or "").lower()
return "reload" in message_text

return False


def send_command_with_retry(
Expand All @@ -707,7 +721,7 @@ def send_command_with_retry(
instance_id: Optional[str] = None,
max_retries: int | None = None,
retry_ms: int | None = None
) -> Dict[str, Any]:
) -> dict[str, Any] | MCPResponse:
"""Send a command to a Unity instance, waiting politely through Unity reloads.

Args:
Expand All @@ -718,7 +732,7 @@ def send_command_with_retry(
retry_ms: Delay between retries in milliseconds

Returns:
Response dictionary from Unity
Response dictionary or MCPResponse from Unity

Uses config.reload_retry_ms and config.reload_max_retries by default. Preserves the
structured failure if retries are exhausted.
Expand Down
Loading
Loading