From 6dce4963064a0983cf7051fce0bae36e5ce75353 Mon Sep 17 00:00:00 2001 From: janiussyafiq Date: Thu, 16 Apr 2026 16:36:50 +0800 Subject: [PATCH 1/7] fix(tracer): prevent stale ctx.tracing crash on HTTPS keepalive connections --- apisix/tracer.lua | 20 +++++++-- t/node/tracer.t | 105 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 121 insertions(+), 4 deletions(-) create mode 100644 t/node/tracer.t diff --git a/apisix/tracer.lua b/apisix/tracer.lua index ca47730c5e60..fd326db48f20 100644 --- a/apisix/tracer.lua +++ b/apisix/tracer.lua @@ -39,7 +39,7 @@ function _M.start(ctx, name, kind) end local tracing = ctx.tracing - if not tracing then + if not tracing or not tracing.spans then tracing = tablepool.fetch("tracing", 0, 8) tracing.spans = tablepool.fetch("tracing_spans", 20, 0) ctx.tracing = tracing @@ -77,10 +77,22 @@ function _M.release(ctx) return end - for _, sp in ipairs(tracing.spans) do - sp:release() + -- Clear the reference first so that any error during pool release does not + -- leave a dangling pointer in ctx. This is especially important for HTTPS + -- keepalive connections where ngx.ctx is shared across requests: if this + -- line were placed after the tablepool.release() calls and something went + -- wrong before reaching it, the next request on the same connection would + -- inherit a stale tracing table (zeroed by the pool) and crash in span.new. + ctx.tracing = nil + + local spans = tracing.spans + if spans then + for _, sp in ipairs(spans) do + sp:release() + end + tablepool.release("tracing_spans", spans) + tracing.spans = nil end - tablepool.release("tracing_spans", tracing.spans) tablepool.release("tracing", tracing) end diff --git a/t/node/tracer.t b/t/node/tracer.t new file mode 100644 index 000000000000..ed44193cf0ff --- /dev/null +++ b/t/node/tracer.t @@ -0,0 +1,105 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +use t::APISIX 'no_plan'; + +repeat_each(1); +log_level('info'); +no_root_location(); +no_shuffle(); + +add_block_preprocessor(sub { + my ($block) = @_; + + if (!$block->extra_yaml_config) { + my $extra_yaml_config = <<_EOC_; +apisix: + tracing: true +_EOC_ + $block->set_value("extra_yaml_config", $extra_yaml_config); + } + + if (!$block->request) { + $block->set_value("request", "GET /t"); + } + + if (!defined $block->response_body) { + $block->set_value("response_body", "passed\n"); + } +}); + +run_tests; + +__DATA__ + +=== TEST 1: set SSL cert for test.com +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin") + local ssl_cert = t.read_file("t/certs/apisix.crt") + local ssl_key = t.read_file("t/certs/apisix.key") + local core = require("apisix.core") + local data = {cert = ssl_cert, key = ssl_key, sni = "test.com"} + local code, body = t.test('/apisix/admin/ssls/1', + ngx.HTTP_PUT, + core.json.encode(data), + [[{ + "value": { + "sni": "test.com" + }, + "key": "/apisix/ssls/1" + }]] + ) + ngx.status = code + ngx.say(body) + } + } + + + +=== TEST 2: set route +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "upstream": { + "nodes": { + "127.0.0.1:1980": 1 + }, + "type": "roundrobin" + }, + "uri": "/opentracing" + }]] + ) + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } + + + +=== TEST 3: consecutive HTTPS keepalive requests do not crash when tracing is enabled +--- exec +curl -s -k https://test.com:1994/opentracing https://test.com:1994/opentracing +--- response_body +opentracing +opentracing From 415b5709b81665ec6e5f482a7960ebae95d860e8 Mon Sep 17 00:00:00 2001 From: janiussyafiq Date: Thu, 16 Apr 2026 17:11:36 +0800 Subject: [PATCH 2/7] chore: remove long comments --- apisix/tracer.lua | 6 ------ 1 file changed, 6 deletions(-) diff --git a/apisix/tracer.lua b/apisix/tracer.lua index fd326db48f20..869dc211d3da 100644 --- a/apisix/tracer.lua +++ b/apisix/tracer.lua @@ -77,12 +77,6 @@ function _M.release(ctx) return end - -- Clear the reference first so that any error during pool release does not - -- leave a dangling pointer in ctx. This is especially important for HTTPS - -- keepalive connections where ngx.ctx is shared across requests: if this - -- line were placed after the tablepool.release() calls and something went - -- wrong before reaching it, the next request on the same connection would - -- inherit a stale tracing table (zeroed by the pool) and crash in span.new. ctx.tracing = nil local spans = tracing.spans From 51d05e702326d47f63a308d935f7f3ececbe8249 Mon Sep 17 00:00:00 2001 From: janiussyafiq Date: Fri, 17 Apr 2026 11:01:41 +0800 Subject: [PATCH 3/7] fix: remove expensive op dereference table --- apisix/tracer.lua | 3 --- 1 file changed, 3 deletions(-) diff --git a/apisix/tracer.lua b/apisix/tracer.lua index 869dc211d3da..398911bf5897 100644 --- a/apisix/tracer.lua +++ b/apisix/tracer.lua @@ -77,15 +77,12 @@ function _M.release(ctx) return end - ctx.tracing = nil - local spans = tracing.spans if spans then for _, sp in ipairs(spans) do sp:release() end tablepool.release("tracing_spans", spans) - tracing.spans = nil end tablepool.release("tracing", tracing) end From 90f0a9109968a44aa37f78af3f9a9297c5f55f96 Mon Sep 17 00:00:00 2001 From: janiussyafiq Date: Tue, 21 Apr 2026 22:39:26 +0800 Subject: [PATCH 4/7] fix: set ctx.tracing to nil and add test case for http2 --- apisix/tracer.lua | 1 + t/node/tracer.t | 9 +++++++++ 2 files changed, 10 insertions(+) diff --git a/apisix/tracer.lua b/apisix/tracer.lua index 398911bf5897..d01aa53de4c6 100644 --- a/apisix/tracer.lua +++ b/apisix/tracer.lua @@ -85,6 +85,7 @@ function _M.release(ctx) tablepool.release("tracing_spans", spans) end tablepool.release("tracing", tracing) + ctx.tracing = nil end diff --git a/t/node/tracer.t b/t/node/tracer.t index ed44193cf0ff..6b15c0edbed7 100644 --- a/t/node/tracer.t +++ b/t/node/tracer.t @@ -103,3 +103,12 @@ curl -s -k https://test.com:1994/opentracing https://test.com:1994/opentracing --- response_body opentracing opentracing + + + +=== TEST 4: concurrent HTTP/2 requests do not crash when tracing is enabled +--- exec +curl -s -k --http2 --parallel https://test.com:1994/opentracing https://test.com:1994/opentracing +--- response_body +opentracing +opentracing From ab3dc791d607cc3b8b541ff9c02ca03750ecfb40 Mon Sep 17 00:00:00 2001 From: janiussyafiq Date: Fri, 24 Apr 2026 01:33:15 +0800 Subject: [PATCH 5/7] fix(tracer): isolate per-request tracing from SSL-phase ctx using rawget/rawset --- apisix/init.lua | 3 ++ apisix/tracer.lua | 20 ++++++------- t/lib/test_otel.lua | 46 ++++++++++++++++++++++++++++++ t/node/tracer.t | 11 +------ t/plugin/opentelemetry6.t | 60 ++++++++++++++++++++++++++++++++++----- 5 files changed, 112 insertions(+), 28 deletions(-) diff --git a/apisix/init.lua b/apisix/init.lua index 639709cd3228..f88af75db38d 100644 --- a/apisix/init.lua +++ b/apisix/init.lua @@ -222,6 +222,7 @@ function _M.ssl_client_hello_phase() core.log.error("failed to match any SSL certificate by SNI: ", sni) span:set_status(tracer.status.ERROR, "no matched SSL") span:finish(ngx_ctx) + tracer.release(ngx_ctx) ngx_exit(-1) end @@ -230,6 +231,7 @@ function _M.ssl_client_hello_phase() core.log.error("failed to set ssl protocols: ", err) span:set_status(tracer.status.ERROR, "failed set protocols") span:finish(ngx_ctx) + tracer.release(ngx_ctx) ngx_exit(-1) end @@ -237,6 +239,7 @@ function _M.ssl_client_hello_phase() -- so that we can't get real SNI without recording it in ngx.ctx during client_hello phase ngx.ctx.client_hello_sni = sni span:finish(ngx_ctx) + tracer.release(ngx_ctx) end diff --git a/apisix/tracer.lua b/apisix/tracer.lua index d01aa53de4c6..4ccc371c173c 100644 --- a/apisix/tracer.lua +++ b/apisix/tracer.lua @@ -38,11 +38,11 @@ function _M.start(ctx, name, kind) return noop_span end - local tracing = ctx.tracing - if not tracing or not tracing.spans then + local tracing = rawget(ctx, "tracing") + if not tracing then tracing = tablepool.fetch("tracing", 0, 8) tracing.spans = tablepool.fetch("tracing_spans", 20, 0) - ctx.tracing = tracing + rawset(ctx, "tracing", tracing) -- create a dummy root span as the invisible parent of all top-level spans span.new(ctx, "root", nil) end @@ -72,20 +72,18 @@ end function _M.release(ctx) - local tracing = ctx.tracing + local tracing = rawget(ctx, "tracing") if not tracing then return end - local spans = tracing.spans - if spans then - for _, sp in ipairs(spans) do - sp:release() - end - tablepool.release("tracing_spans", spans) + ngx.log(ngx.DEBUG, "span_count:", #tracing.spans) + for _, sp in ipairs(tracing.spans) do + sp:release() end + tablepool.release("tracing_spans", tracing.spans) tablepool.release("tracing", tracing) - ctx.tracing = nil + rawset(ctx, "tracing", nil) end diff --git a/t/lib/test_otel.lua b/t/lib/test_otel.lua index 0bbbf1645c01..bb96d03ddf9b 100644 --- a/t/lib/test_otel.lua +++ b/t/lib/test_otel.lua @@ -132,4 +132,50 @@ function _M.verify_tree(filepath, expected_tree) end +function _M.verify_isolated_traces(filepath, root_name, count) + local spans_by_id, err = parse_spans(filepath) + if not spans_by_id then + return false, err + end + + local traces = {} + for _, span in pairs(spans_by_id) do + if not traces[span.traceId] then + traces[span.traceId] = {} + end + table.insert(traces[span.traceId], span.name) + end + + local matching = {} + for trace_id, names in pairs(traces) do + for _, name in ipairs(names) do + if name == root_name then + table.insert(matching, { id = trace_id, names = names }) + break + end + end + end + + if #matching ~= count then + return false, string.format( + "expected %d traces with span '%s', got %d", + count, root_name, #matching) + end + + for _, trace in ipairs(matching) do + local seen = {} + for _, name in ipairs(trace.names) do + if seen[name] then + return false, string.format( + "trace %s has duplicate span '%s': cross-stream contamination detected", + trace.id, name) + end + seen[name] = true + end + end + + return true +end + + return _M diff --git a/t/node/tracer.t b/t/node/tracer.t index 6b15c0edbed7..061c94b6cce3 100644 --- a/t/node/tracer.t +++ b/t/node/tracer.t @@ -17,7 +17,7 @@ use t::APISIX 'no_plan'; repeat_each(1); -log_level('info'); +log_level('debug'); no_root_location(); no_shuffle(); @@ -103,12 +103,3 @@ curl -s -k https://test.com:1994/opentracing https://test.com:1994/opentracing --- response_body opentracing opentracing - - - -=== TEST 4: concurrent HTTP/2 requests do not crash when tracing is enabled ---- exec -curl -s -k --http2 --parallel https://test.com:1994/opentracing https://test.com:1994/opentracing ---- response_body -opentracing -opentracing diff --git a/t/plugin/opentelemetry6.t b/t/plugin/opentelemetry6.t index 7504af34a647..9f68f88092ef 100644 --- a/t/plugin/opentelemetry6.t +++ b/t/plugin/opentelemetry6.t @@ -218,13 +218,6 @@ opentracing ["http.status_code"] = "200", }, children = { - { - name = "ssl_client_hello_phase", - kind = 2, - children = { - { name = "sni_radixtree_match", kind = 1 }, - } - }, { name = "apisix.phase.access", kind = 2, @@ -248,3 +241,56 @@ opentracing end } } + + + +=== TEST 7: clear file +--- exec +echo '' > ci/pod/otelcol-contrib/data-otlp.json +--- response_body eval +qr// + + + +=== TEST 8: trigger two concurrent HTTP/2 requests on the same TLS connection +--- init_by_lua_block + require "resty.core" + apisix = require("apisix") + core = require("apisix.core") + apisix.http_init() + + local utils = require("apisix.core.utils") + utils.dns_parse = function (domain) + if domain == "test1.com" then + return {address = "127.0.0.2"} + end + error("unknown domain: " .. domain) + end +--- exec +curl -sk --http2 --parallel --resolve "test.com:1994:127.0.0.1" https://test.com:1994/opentracing https://test.com:1994/opentracing +--- wait: 5 +--- response_body +opentracing +opentracing + + + +=== TEST 9: verify each HTTP/2 stream has its own isolated span set +--- config + location /t { + content_by_lua_block { + local otel = require("lib.test_otel") + + local ok, err = otel.verify_isolated_traces( + "ci/pod/otelcol-contrib/data-otlp.json", + "GET /opentracing", + 2 + ) + + if not ok then + ngx.say("FAIL:\n" .. err) + else + ngx.say("passed") + end + } + } From 2bf1f5a79afa55521edd89390492aec0fb6c447b Mon Sep 17 00:00:00 2001 From: janiussyafiq Date: Fri, 24 Apr 2026 01:35:21 +0800 Subject: [PATCH 6/7] fix: remove unused log --- apisix/tracer.lua | 1 - 1 file changed, 1 deletion(-) diff --git a/apisix/tracer.lua b/apisix/tracer.lua index 4ccc371c173c..2b8825039a01 100644 --- a/apisix/tracer.lua +++ b/apisix/tracer.lua @@ -77,7 +77,6 @@ function _M.release(ctx) return end - ngx.log(ngx.DEBUG, "span_count:", #tracing.spans) for _, sp in ipairs(tracing.spans) do sp:release() end From 7d335861efe60bb12845b3e5d91913b703543e89 Mon Sep 17 00:00:00 2001 From: janiussyafiq Date: Fri, 24 Apr 2026 23:54:27 +0800 Subject: [PATCH 7/7] chore: fix lint --- t/lib/test_otel.lua | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/t/lib/test_otel.lua b/t/lib/test_otel.lua index bb96d03ddf9b..8deab1b9e79e 100644 --- a/t/lib/test_otel.lua +++ b/t/lib/test_otel.lua @@ -160,8 +160,8 @@ function _M.verify_isolated_traces(filepath, root_name, count) return false, string.format( "expected %d traces with span '%s', got %d", count, root_name, #matching) - end - + end + for _, trace in ipairs(matching) do local seen = {} for _, name in ipairs(trace.names) do