From 131de2c6777dda91451df0aff02b807bf8e8d5b5 Mon Sep 17 00:00:00 2001 From: Ian Clarke Date: Tue, 30 Sep 2025 21:16:39 +0200 Subject: [PATCH 1/4] Add debug logging to forward_conn --- .vtcode/tool-policy.json | 54 ++++++++++++++++++++++++++++++++++++++++ .worktrees/pr1865 | 1 + logs/trajectory.jsonl | 2 ++ worktrees/pr-1853 | 1 + 4 files changed, 58 insertions(+) create mode 100644 .vtcode/tool-policy.json create mode 160000 .worktrees/pr1865 create mode 100644 logs/trajectory.jsonl create mode 160000 worktrees/pr-1853 diff --git a/.vtcode/tool-policy.json b/.vtcode/tool-policy.json new file mode 100644 index 000000000..e392cabcb --- /dev/null +++ b/.vtcode/tool-policy.json @@ -0,0 +1,54 @@ +{ + "version": 1, + "available_tools": [ + "grep_search", + "list_files", + "update_plan", + "run_terminal_cmd", + "curl", + "read_file", + "write_file", + "edit_file", + "simple_search", + "bash", + "apply_patch", + "srgn" + ], + "policies": { + "run_terminal_cmd": "allow", + "bash": "allow", + "grep_search": "prompt", + "list_files": "prompt", + "update_plan": "prompt", + "curl": "prompt", + "read_file": "prompt", + "write_file": "prompt", + "edit_file": "prompt", + "simple_search": "prompt", + "apply_patch": "prompt", + "srgn": "prompt" + }, + "constraints": { + "curl": { + "allowed_modes": null, + "max_results_per_call": null, + "max_items_per_call": null, + "default_response_format": null, + "max_bytes_per_read": null, + "max_response_bytes": 65536, + "allowed_url_schemes": [ + "https" + ], + "denied_url_hosts": [ + "localhost", + "127.0.0.1", + "0.0.0.0", + "::1", + ".localhost", + ".local", + ".internal", + ".lan" + ] + } + } +} \ No newline at end of file diff --git a/.worktrees/pr1865 b/.worktrees/pr1865 new file mode 160000 index 000000000..16d256d0b --- /dev/null +++ b/.worktrees/pr1865 @@ -0,0 +1 @@ +Subproject commit 16d256d0b62bb32c2ca95e2a6668ad7cb621414d diff --git a/logs/trajectory.jsonl b/logs/trajectory.jsonl new file mode 100644 index 000000000..daffff829 --- /dev/null +++ b/logs/trajectory.jsonl @@ -0,0 +1,2 @@ +{"kind":"route","turn":1,"selected_model":"gemini-2.5-flash-preview-05-20","class":"simple","input_preview":"Please review github issue 1860 and provide feedback","ts":1758934838} +{"kind":"route","turn":1,"selected_model":"gemini-2.5-flash-preview-05-20","class":"simple","input_preview":"Please reivew github issue 1860","ts":1758934901} diff --git a/worktrees/pr-1853 b/worktrees/pr-1853 new file mode 160000 index 000000000..afc8b9306 --- /dev/null +++ b/worktrees/pr-1853 @@ -0,0 +1 @@ +Subproject commit afc8b9306dbcf3fff8f2884b647a35cfc143c3a4 From 9f01fae796386b874cc2180220d101d191242f21 Mon Sep 17 00:00:00 2001 From: Ian Clarke Date: Wed, 1 Oct 2025 18:26:07 +0200 Subject: [PATCH 2/4] Fix request router deduplication race with instant-completion PUT operations (issue #1886) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Problem: Request router deduplication can cause race conditions when PUT operations complete very quickly on isolated nodes. Client 1's TX gets removed before Client 2 can reuse it. Solution: Add defensive protection - detect local-only PUTs (no remote peers) and bypass RequestRouter to avoid race. Similar to GET operations (already protected) and Subscribe fix (PR #1844). Changes: - crates/core/src/client_events/mod.rs: Add local-only detection for PUT operations - crates/core/tests/isolated_node_regression.rs: Add concurrent GET race test All tests pass. Defensive fix prevents race even if hard to reproduce. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude [AI-assisted debugging and comment] --- crates/core/src/client_events/mod.rs | 180 +++++++++++------ crates/core/tests/isolated_node_regression.rs | 185 ++++++++++++++++++ 2 files changed, 301 insertions(+), 64 deletions(-) diff --git a/crates/core/src/client_events/mod.rs b/crates/core/src/client_events/mod.rs index e5c37673a..cbf273278 100644 --- a/crates/core/src/client_events/mod.rs +++ b/crates/core/src/client_events/mod.rs @@ -398,99 +398,151 @@ async fn process_open_request( let contract_key = contract.key(); - // Use RequestRouter for deduplication if in actor mode, otherwise direct operation - if let Some(router) = &request_router { + // Check if this will be a local-only PUT (no network peers available) + // This prevents race condition where PUT completes instantly and TX is removed + // before a second client can reuse it (issue #1886) + let own_location = op_manager.ring.connection_manager.own_location(); + let has_remote_peers = op_manager + .ring + .closest_potentially_caching(&contract_key, &[own_location.peer][..]) + .is_some(); + + if !has_remote_peers { + // Local-only PUT - bypass router to avoid race condition tracing::debug!( peer_id = %peer_id, key = %contract_key, - "Routing PUT request through deduplication layer (actor mode)", + "PUT will complete locally (no remote peers), starting direct local PUT operation" ); - let request = crate::node::DeduplicatedRequest::Put { - key: contract_key, - contract: contract.clone(), - related_contracts: related_contracts.clone(), - state: state.clone(), + // Start a local PUT operation without going through the router + // This avoids the race condition while still providing proper result delivery + let op = put::start_op( + contract.clone(), + related_contracts.clone(), + state.clone(), + op_manager.ring.max_hops_to_live, subscribe, - client_id, - request_id, - }; - - let (transaction_id, should_start_operation) = - router.route_request(request).await.map_err(|e| { - Error::Node(format!("Request routing failed: {}", e)) - })?; + ); + let op_id = op.id; - // Always register this client for the result + // Register client for transaction result op_manager .ch_outbound - .waiting_for_transaction_result( - transaction_id, - client_id, - request_id, - ) + .waiting_for_transaction_result(op_id, client_id, request_id) .await .inspect_err(|err| { - tracing::error!( - "Error waiting for transaction result: {}", - err - ); + tracing::error!("Error waiting for transaction result: {}", err) })?; - // Only start new network operation if this is a new operation - if should_start_operation { + // Execute the PUT operation + // Since there are no remote peers, this will complete locally + if let Err(err) = put::request_put(&op_manager, op).await { + tracing::error!("Local PUT request error: {}", err); + } + + // Note: We bypass the router for local-only PUTs to avoid the race + // condition where the transaction completes instantly and is removed + // before other clients can join. The operation will complete locally + // and deliver results through the normal transaction mechanism. + } else { + // Has remote peers - use RequestRouter for deduplication if in actor mode, otherwise direct operation + if let Some(router) = &request_router { tracing::debug!( peer_id = %peer_id, key = %contract_key, - "Starting new PUT network operation via RequestRouter", + "Routing PUT request through deduplication layer (actor mode)", ); - let op = put::start_op_with_id( - contract.clone(), - related_contracts.clone(), - state.clone(), - op_manager.ring.max_hops_to_live, + let request = crate::node::DeduplicatedRequest::Put { + key: contract_key, + contract: contract.clone(), + related_contracts: related_contracts.clone(), + state: state.clone(), subscribe, - transaction_id, - ); + client_id, + request_id, + }; - if let Err(err) = put::request_put(&op_manager, op).await { - tracing::error!("Put request error: {}", err); + let (transaction_id, should_start_operation) = + router.route_request(request).await.map_err(|e| { + Error::Node(format!("Request routing failed: {}", e)) + })?; + + // Always register this client for the result + op_manager + .ch_outbound + .waiting_for_transaction_result( + transaction_id, + client_id, + request_id, + ) + .await + .inspect_err(|err| { + tracing::error!( + "Error waiting for transaction result: {}", + err + ); + })?; + + // Only start new network operation if this is a new operation + if should_start_operation { + tracing::debug!( + peer_id = %peer_id, + key = %contract_key, + "Starting new PUT network operation via RequestRouter", + ); + + let op = put::start_op_with_id( + contract.clone(), + related_contracts.clone(), + state.clone(), + op_manager.ring.max_hops_to_live, + subscribe, + transaction_id, + ); + + if let Err(err) = put::request_put(&op_manager, op).await { + tracing::error!("Put request error: {}", err); + } + } else { + tracing::debug!( + peer_id = %peer_id, + key = %contract_key, + "Reusing existing PUT operation via RequestRouter - client registered for result", + ); } } else { tracing::debug!( peer_id = %peer_id, key = %contract_key, - "Reusing existing PUT operation via RequestRouter - client registered for result", + "Starting direct PUT operation (legacy mode)", ); - } - } else { - tracing::debug!( - peer_id = %peer_id, - key = %contract_key, - "Starting direct PUT operation (legacy mode)", - ); - // Legacy mode: direct operation without deduplication - let op = put::start_op( - contract.clone(), - related_contracts.clone(), - state.clone(), - op_manager.ring.max_hops_to_live, - subscribe, - ); - let op_id = op.id; + // Legacy mode: direct operation without deduplication + let op = put::start_op( + contract.clone(), + related_contracts.clone(), + state.clone(), + op_manager.ring.max_hops_to_live, + subscribe, + ); + let op_id = op.id; - op_manager - .ch_outbound - .waiting_for_transaction_result(op_id, client_id, request_id) - .await - .inspect_err(|err| { - tracing::error!("Error waiting for transaction result: {}", err) - })?; + op_manager + .ch_outbound + .waiting_for_transaction_result(op_id, client_id, request_id) + .await + .inspect_err(|err| { + tracing::error!( + "Error waiting for transaction result: {}", + err + ) + })?; - if let Err(err) = put::request_put(&op_manager, op).await { - tracing::error!("Put request error: {}", err); + if let Err(err) = put::request_put(&op_manager, op).await { + tracing::error!("Put request error: {}", err); + } } } diff --git a/crates/core/tests/isolated_node_regression.rs b/crates/core/tests/isolated_node_regression.rs index b2f127f67..82b27c5e0 100644 --- a/crates/core/tests/isolated_node_regression.rs +++ b/crates/core/tests/isolated_node_regression.rs @@ -233,3 +233,188 @@ async fn test_isolated_node_put_get_workflow() -> anyhow::Result<()> { Ok(()) } + +/// Test concurrent GET operations to reproduce deduplication race condition (issue #1886) +/// +/// This test attempts to reproduce the race condition where: +/// 1. Client 1 sends GET request → Router creates operation with TX +/// 2. Operation completes instantly (contract cached locally) +/// 3. Result delivered to Client 1, TX removed from tracking +/// 4. Client 2 sends identical GET request → Router tries to reuse removed TX +/// 5. Bug: Client 2 never receives response +#[tokio::test(flavor = "multi_thread", worker_threads = 4)] +async fn test_concurrent_get_deduplication_race() -> anyhow::Result<()> { + freenet::config::set_logger(Some(tracing::level_filters::LevelFilter::INFO), None); + + // Start a single isolated node (no peers) - ensures instant completion + let ws_port = 50900; + let network_port = 50901; + let (config, _temp_dir) = create_test_node_config(true, ws_port, Some(network_port)).await?; + + // Load a small test contract + const TEST_CONTRACT: &str = "test-contract-integration"; + let contract = load_contract(TEST_CONTRACT, vec![].into())?; + let contract_key = contract.key(); + let initial_state = freenet::test_utils::create_empty_todo_list(); + let wrapped_state = WrappedState::from(initial_state); + + // Start the node + let node_handle = { + let config = config.clone(); + async move { + let built_config = config.build().await?; + let node = NodeConfig::new(built_config.clone()) + .await? + .build(serve_gateway(built_config.ws_api).await) + .await?; + node.run().await + } + .boxed_local() + }; + + // Run the test with timeout + let test_result = timeout(Duration::from_secs(60), async { + // Give node time to start + println!("Waiting for node to start up..."); + tokio::time::sleep(Duration::from_secs(10)).await; + println!("Node should be ready, proceeding with test..."); + + let url = format!( + "ws://localhost:{}/v1/contract/command?encodingProtocol=native", + ws_port + ); + + // Connect multiple clients + let (ws_stream1, _) = connect_async(&url).await?; + let mut client1 = WebApi::start(ws_stream1); + + let (ws_stream2, _) = connect_async(&url).await?; + let mut client2 = WebApi::start(ws_stream2); + + let (ws_stream3, _) = connect_async(&url).await?; + let mut client3 = WebApi::start(ws_stream3); + + println!("Step 1: PUT contract to cache it locally"); + + // Cache the contract locally using client1 + make_put(&mut client1, wrapped_state.clone(), contract.clone(), false).await?; + let put_result = timeout(Duration::from_secs(30), client1.recv()).await; + + match put_result { + Ok(Ok(HostResponse::ContractResponse(ContractResponse::PutResponse { key }))) => { + assert_eq!(key, contract_key); + println!("Contract cached successfully"); + } + other => { + panic!("PUT failed: {:?}", other); + } + } + + println!("Step 2: Concurrent GET requests from multiple clients"); + println!("This tests the deduplication race condition from issue #1886"); + + // Send GET requests concurrently from all clients + // The contract is cached, so these will complete instantly + // This creates the race condition: TX may be removed before all clients register + let get1 = async { + make_get(&mut client1, contract_key, true, false).await?; + let result = timeout(Duration::from_secs(5), client1.recv()).await; + Ok::<_, anyhow::Error>((1, result)) + }; + + let get2 = async { + make_get(&mut client2, contract_key, true, false).await?; + let result = timeout(Duration::from_secs(5), client2.recv()).await; + Ok::<_, anyhow::Error>((2, result)) + }; + + let get3 = async { + make_get(&mut client3, contract_key, true, false).await?; + let result = timeout(Duration::from_secs(5), client3.recv()).await; + Ok::<_, anyhow::Error>((3, result)) + }; + + // Execute all GETs concurrently + let (result1, result2, result3) = tokio::join!(get1, get2, get3); + + // Verify all clients received responses + let check_result = + |client_num: i32, result: anyhow::Result<(i32, Result, _>)>| { + match result { + Ok(( + _, + Ok(Ok(HostResponse::ContractResponse(ContractResponse::GetResponse { + key, + state, + .. + }))), + )) => { + assert_eq!(key, contract_key); + assert_eq!(state, wrapped_state); + println!("Client {}: ✅ Received GET response", client_num); + true + } + Ok((_, Ok(Ok(other)))) => { + println!("Client {}: ❌ Unexpected response: {:?}", client_num, other); + false + } + Ok((_, Ok(Err(e)))) => { + println!("Client {}: ❌ Error: {}", client_num, e); + false + } + Ok((_, Err(_))) => { + println!( + "Client {}: ❌ TIMEOUT - This is the bug from issue #1886!", + client_num + ); + false + } + Err(e) => { + println!("Client {}: ❌ Failed to send request: {}", client_num, e); + false + } + } + }; + + let success1 = check_result(1, result1); + let success2 = check_result(2, result2); + let success3 = check_result(3, result3); + + // REGRESSION TEST: All clients should receive responses + // If any client times out, it indicates the deduplication race condition + assert!( + success1 && success2 && success3, + "All clients should receive GET responses. Failures indicate issue #1886 race condition." + ); + + println!("✅ All clients received responses - no race condition detected"); + + // Cleanup + client1 + .send(ClientRequest::Disconnect { cause: None }) + .await?; + client2 + .send(ClientRequest::Disconnect { cause: None }) + .await?; + client3 + .send(ClientRequest::Disconnect { cause: None }) + .await?; + tokio::time::sleep(Duration::from_millis(100)).await; + + Ok::<(), anyhow::Error>(()) + }); + + // Run node and test concurrently + select! { + _ = node_handle => { + error!("Node exited unexpectedly"); + panic!("Node should not exit during test"); + } + result = test_result => { + result??; + tokio::time::sleep(Duration::from_secs(1)).await; + } + } + + Ok(()) +} From 57a94aae869a32d08bdfc22ad93d0fb142e1f065 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Wed, 1 Oct 2025 17:00:57 +0000 Subject: [PATCH 3/4] chore(deps): bump opentelemetry-otlp from 0.27.0 to 0.31.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolve merge conflict with main branch by updating opentelemetry dependencies to match the versions in main: - opentelemetry-otlp: 0.27.0 → 0.31.0 - opentelemetry_sdk: 0.29 → 0.31 Related to PR #1877 Co-authored-by: Ian Clarke --- crates/core/Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index fe8415307..5f9695451 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -69,8 +69,8 @@ opentelemetry-jaeger = { features = ["collector_client", "isahc", "rt-tokio"], o tracing = { version = "0.1" } tracing-opentelemetry = { optional = true, version = "0.30.0" } tracing-subscriber = { optional = true, version = "0.3" } -opentelemetry-otlp = { optional = true, version = "0.27.0" } -opentelemetry_sdk = { optional = true, version = "0.29", features = ["rt-tokio"] } +opentelemetry-otlp = { optional = true, version = "0.31.0" } +opentelemetry_sdk = { optional = true, version = "0.31", features = ["rt-tokio"] } # internal deps freenet-stdlib = { features = ["net"], workspace = true } From 929f156303062acf816422c17debff52b24b1214 Mon Sep 17 00:00:00 2001 From: "claude[bot]" <41898282+claude[bot]@users.noreply.github.com> Date: Wed, 1 Oct 2025 17:09:32 +0000 Subject: [PATCH 4/4] Add worktrees, .worktrees, and logs/* to .gitignore Remove accidentally committed development files: - .vtcode/tool-policy.json - worktrees/ and .worktrees/ directories - logs/trajectory.jsonl Co-authored-by: nacho.d.g --- .gitignore | 7 ++++++ .vtcode/tool-policy.json | 54 ---------------------------------------- .worktrees/pr1859 | 1 - .worktrees/pr1861 | 1 - .worktrees/pr1865 | 1 - logs/trajectory.jsonl | 2 -- worktrees/pr-1853 | 1 - 7 files changed, 7 insertions(+), 60 deletions(-) delete mode 100644 .vtcode/tool-policy.json delete mode 160000 .worktrees/pr1859 delete mode 160000 .worktrees/pr1861 delete mode 160000 .worktrees/pr1865 delete mode 100644 logs/trajectory.jsonl delete mode 160000 worktrees/pr-1853 diff --git a/.gitignore b/.gitignore index 00e2f6a9e..29335979a 100644 --- a/.gitignore +++ b/.gitignore @@ -55,3 +55,10 @@ crates/core/tmp_work/ .claude-flow/ .swarm/ test-results/ + +# Git worktrees +worktrees/ +.worktrees/ + +# Development logs +logs/* diff --git a/.vtcode/tool-policy.json b/.vtcode/tool-policy.json deleted file mode 100644 index e392cabcb..000000000 --- a/.vtcode/tool-policy.json +++ /dev/null @@ -1,54 +0,0 @@ -{ - "version": 1, - "available_tools": [ - "grep_search", - "list_files", - "update_plan", - "run_terminal_cmd", - "curl", - "read_file", - "write_file", - "edit_file", - "simple_search", - "bash", - "apply_patch", - "srgn" - ], - "policies": { - "run_terminal_cmd": "allow", - "bash": "allow", - "grep_search": "prompt", - "list_files": "prompt", - "update_plan": "prompt", - "curl": "prompt", - "read_file": "prompt", - "write_file": "prompt", - "edit_file": "prompt", - "simple_search": "prompt", - "apply_patch": "prompt", - "srgn": "prompt" - }, - "constraints": { - "curl": { - "allowed_modes": null, - "max_results_per_call": null, - "max_items_per_call": null, - "default_response_format": null, - "max_bytes_per_read": null, - "max_response_bytes": 65536, - "allowed_url_schemes": [ - "https" - ], - "denied_url_hosts": [ - "localhost", - "127.0.0.1", - "0.0.0.0", - "::1", - ".localhost", - ".local", - ".internal", - ".lan" - ] - } - } -} \ No newline at end of file diff --git a/.worktrees/pr1859 b/.worktrees/pr1859 deleted file mode 160000 index 362724e31..000000000 --- a/.worktrees/pr1859 +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 362724e312d179e5cbf8db04e1b4791ee152b6fb diff --git a/.worktrees/pr1861 b/.worktrees/pr1861 deleted file mode 160000 index a9e27602e..000000000 --- a/.worktrees/pr1861 +++ /dev/null @@ -1 +0,0 @@ -Subproject commit a9e27602e0542874c74c2041aac429443a5b3663 diff --git a/.worktrees/pr1865 b/.worktrees/pr1865 deleted file mode 160000 index 16d256d0b..000000000 --- a/.worktrees/pr1865 +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 16d256d0b62bb32c2ca95e2a6668ad7cb621414d diff --git a/logs/trajectory.jsonl b/logs/trajectory.jsonl deleted file mode 100644 index daffff829..000000000 --- a/logs/trajectory.jsonl +++ /dev/null @@ -1,2 +0,0 @@ -{"kind":"route","turn":1,"selected_model":"gemini-2.5-flash-preview-05-20","class":"simple","input_preview":"Please review github issue 1860 and provide feedback","ts":1758934838} -{"kind":"route","turn":1,"selected_model":"gemini-2.5-flash-preview-05-20","class":"simple","input_preview":"Please reivew github issue 1860","ts":1758934901} diff --git a/worktrees/pr-1853 b/worktrees/pr-1853 deleted file mode 160000 index afc8b9306..000000000 --- a/worktrees/pr-1853 +++ /dev/null @@ -1 +0,0 @@ -Subproject commit afc8b9306dbcf3fff8f2884b647a35cfc143c3a4