diff --git a/apisix/plugins/ai-rate-limiting.lua b/apisix/plugins/ai-rate-limiting.lua index 8c7eea51aee9..cdf2d9fb140d 100644 --- a/apisix/plugins/ai-rate-limiting.lua +++ b/apisix/plugins/ai-rate-limiting.lua @@ -18,6 +18,11 @@ local require = require local setmetatable = setmetatable local ipairs = ipairs local type = type +local pairs = pairs +local pcall = pcall +local load = load +local math_floor = math.floor +local math_huge = math.huge local core = require("apisix.core") local limit_count = require("apisix.plugins.limit-count.init") @@ -61,10 +66,19 @@ local schema = { show_limit_quota_header = {type = "boolean", default = true}, limit_strategy = { type = "string", - enum = {"total_tokens", "prompt_tokens", "completion_tokens"}, + enum = {"total_tokens", "prompt_tokens", "completion_tokens", "expression"}, default = "total_tokens", description = "The strategy to limit the tokens" }, + cost_expr = { + type = "string", + minLength = 1, + description = "Lua arithmetic expression for dynamic token cost calculation. " + .. "Variables are injected from the LLM API raw usage response fields. " + .. "Missing variables default to 0. " + .. "Only valid when limit_strategy is 'expression'. " + .. "Example: input_tokens + cache_creation_input_tokens + output_tokens", + }, instances = { type = "array", items = instance_limit_schema, @@ -136,8 +150,42 @@ local limit_conf_cache = core.lrucache.new({ }) +-- safe math functions allowed in cost expressions +local expr_safe_env = { + math = math, + abs = math.abs, + ceil = math.ceil, + floor = math.floor, + max = math.max, + min = math.min, +} + +local function compile_cost_expr(expr_str) + local fn_code = "return " .. expr_str + -- validate syntax by loading first + local fn, err = load(fn_code, "cost_expr", "t", expr_safe_env) + if not fn then + return nil, err + end + return fn_code +end + + function _M.check_schema(conf) - return core.schema.check(schema, conf) + local ok, err = core.schema.check(schema, conf) + if not ok then + return false, err + end + if conf.limit_strategy == "expression" then + if not conf.cost_expr or conf.cost_expr == "" then + return false, "cost_expr is required when limit_strategy is 'expression'" + end + local _, compile_err = compile_cost_expr(conf.cost_expr) + if compile_err then + return false, "invalid cost_expr: " .. compile_err + end + end + return true end @@ -264,7 +312,57 @@ function _M.check_instance_status(conf, ctx, instance_name) end +local function eval_cost_expr(conf_cost_expr, raw) + local fn_code = "return " .. conf_cost_expr + -- build environment: safe math + usage variables (missing vars default to 0) + local env = setmetatable({}, { + __index = function(_, k) + local v = expr_safe_env[k] + if v ~= nil then + return v + end + return 0 + end + }) + for k, v in pairs(raw) do + if type(v) == "number" and not expr_safe_env[k] then + env[k] = v + end + end + local fn, err = load(fn_code, "cost_expr", "t", env) + if not fn then + return nil, "failed to compile cost_expr: " .. err + end + local ok, result = pcall(fn) + if not ok then + return nil, "failed to evaluate cost_expr: " .. result + end + if type(result) ~= "number" then + return nil, "cost_expr must return a number, got: " .. type(result) + end + if result ~= result or result == math_huge or result == -math_huge then + return nil, "cost_expr returned non-finite value" + end + if result < 0 then + result = 0 + end + return math_floor(result + 0.5) +end + local function get_token_usage(conf, ctx) + if conf.limit_strategy == "expression" then + local raw = ctx.llm_raw_usage + if not raw then + return + end + local result, err = eval_cost_expr(conf.cost_expr, raw) + if not result then + core.log.error(err) + return + end + return result + end + local usage = ctx.ai_token_usage if not usage then return @@ -288,6 +386,10 @@ function _M.log(conf, ctx) core.log.error("failed to get token usage for llm service") return end + if used_tokens == 0 then + core.log.info("token usage is 0, skip rate limiting") + return + end core.log.info("instance name: ", instance_name, " used tokens: ", used_tokens) diff --git a/t/plugin/ai-rate-limiting-expression.t b/t/plugin/ai-rate-limiting-expression.t new file mode 100644 index 000000000000..a0f818406f55 --- /dev/null +++ b/t/plugin/ai-rate-limiting-expression.t @@ -0,0 +1,620 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +BEGIN { + $ENV{TEST_ENABLE_CONTROL_API_V1} = "0"; +} + +use t::APISIX 'no_plan'; + +log_level("info"); +repeat_each(1); +no_long_string(); +no_shuffle(); +no_root_location(); + +add_block_preprocessor(sub { + my ($block) = @_; + + if (!defined $block->request) { + $block->set_value("request", "GET /t"); + } + + my $http_config = $block->http_config // <<_EOC_; + server { + server_name anthropic; + listen 16725; + + default_type 'application/json'; + + location /v1/messages { + content_by_lua_block { + local json = require("cjson.safe") + local ngx = ngx + + ngx.req.read_body() + local body = ngx.req.get_body_data() + body = json.decode(body) + + if not body or not body.messages then + ngx.status = 400 + ngx.say('{"type":"error","error":{"type":"invalid_request_error","message":"missing messages"}}') + return + end + + local api_key = ngx.req.get_headers()["x-api-key"] + if api_key ~= "test-key" then + ngx.status = 401 + ngx.say('{"type":"error","error":{"type":"authentication_error","message":"invalid x-api-key"}}') + return + end + + if body.stream then + ngx.header["Content-Type"] = "text/event-stream" + + -- message_start with input_tokens and cache tokens + local message_start = json.encode({ + type = "message_start", + message = { + id = "msg_test123", + type = "message", + role = "assistant", + model = body.model or "claude-sonnet-4-20250514", + content = {}, + usage = { + input_tokens = 50, + output_tokens = 0, + cache_creation_input_tokens = 100, + cache_read_input_tokens = 200, + }, + }, + }) + ngx.say("event: message_start") + ngx.say("data: " .. message_start) + ngx.say("") + + -- content_block_start + ngx.say("event: content_block_start") + ngx.say('data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}}') + ngx.say("") + + -- content_block_delta + ngx.say("event: content_block_delta") + ngx.say('data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Hello from Claude!"}}') + ngx.say("") + + -- content_block_stop + ngx.say("event: content_block_stop") + ngx.say('data: {"type":"content_block_stop","index":0}') + ngx.say("") + + -- message_delta with output_tokens + local message_delta = json.encode({ + type = "message_delta", + delta = { stop_reason = "end_turn" }, + usage = { + output_tokens = 30, + }, + }) + ngx.say("event: message_delta") + ngx.say("data: " .. message_delta) + ngx.say("") + + -- message_stop + ngx.say("event: message_stop") + ngx.say("data: {}") + ngx.say("") + else + ngx.status = 200 + ngx.say(json.encode({ + id = "msg_test456", + type = "message", + role = "assistant", + model = body.model or "claude-sonnet-4-20250514", + content = {{ + type = "text", + text = "Hello from Claude!", + }}, + stop_reason = "end_turn", + usage = { + input_tokens = 50, + output_tokens = 30, + cache_creation_input_tokens = 100, + cache_read_input_tokens = 200, + }, + })) + end + } + } + } +_EOC_ + + $block->set_value("http_config", $http_config); +}); + +run_tests(); + +__DATA__ + +=== TEST 1: schema validation - expression strategy requires cost_expr +--- config + location /t { + content_by_lua_block { + local plugin = require("apisix.plugins.ai-rate-limiting") + local configs = { + -- expression without cost_expr + { + limit = 100, + time_window = 60, + limit_strategy = "expression", + }, + -- expression with empty cost_expr + { + limit = 100, + time_window = 60, + limit_strategy = "expression", + cost_expr = "", + }, + -- expression with invalid cost_expr syntax + { + limit = 100, + time_window = 60, + limit_strategy = "expression", + cost_expr = "invalid $$$ syntax %%%", + }, + -- valid expression + { + limit = 100, + time_window = 60, + limit_strategy = "expression", + cost_expr = "input_tokens + output_tokens", + }, + -- valid complex expression + { + limit = 100, + time_window = 60, + limit_strategy = "expression", + cost_expr = "(input_tokens - cache_read_input_tokens) + cache_creation_input_tokens * 1.25 + output_tokens", + }, + } + for i, conf in ipairs(configs) do + local ok, err = plugin.check_schema(conf) + if ok then + ngx.say("config " .. i .. ": valid") + else + ngx.say("config " .. i .. ": invalid") + end + end + } + } +--- response_body +config 1: invalid +config 2: invalid +config 3: invalid +config 4: valid +config 5: valid + + + +=== TEST 2: set route with expression rate limiting (non-streaming, native Anthropic) +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "uri": "/v1/messages", + "plugins": { + "ai-proxy": { + "provider": "anthropic", + "auth": { + "header": { + "x-api-key": "test-key", + "anthropic-version": "2023-06-01" + } + }, + "options": { + "model": "claude-sonnet-4-20250514" + }, + "override": { + "endpoint": "http://localhost:16725" + }, + "ssl_verify": false + }, + "ai-rate-limiting": { + "limit": 500, + "time_window": 60, + "limit_strategy": "expression", + "cost_expr": "input_tokens + cache_creation_input_tokens + output_tokens" + } + }, + "upstream": { + "type": "roundrobin", + "nodes": { + "canbeanything.com": 1 + } + } + }]] + ) + + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- response_body +passed + + + +=== TEST 3: non-streaming request - expression counts input_tokens + cache_creation + output_tokens +--- pipelined_requests eval +[ + "POST /v1/messages\n" . '{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}]}', + "POST /v1/messages\n" . '{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}]}', +] +--- response_headers_like eval +[ + "X-AI-RateLimit-Remaining-ai-proxy-anthropic: 499", + "X-AI-RateLimit-Remaining-ai-proxy-anthropic: 319", +] +--- no_error_log +[error] + + + +=== TEST 4: set route with expression rate limiting (streaming, native Anthropic) +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "uri": "/v1/messages", + "plugins": { + "ai-proxy": { + "provider": "anthropic", + "auth": { + "header": { + "x-api-key": "test-key", + "anthropic-version": "2023-06-01" + } + }, + "options": { + "model": "claude-sonnet-4-20250514" + }, + "override": { + "endpoint": "http://localhost:16725" + }, + "ssl_verify": false + }, + "ai-rate-limiting": { + "limit": 500, + "time_window": 60, + "limit_strategy": "expression", + "cost_expr": "input_tokens + cache_creation_input_tokens + output_tokens" + } + }, + "upstream": { + "type": "roundrobin", + "nodes": { + "canbeanything.com": 1 + } + } + }]] + ) + + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- response_body +passed + + + +=== TEST 5: streaming request - verify token usage accumulation and rate limiting +--- pipelined_requests eval +[ + "POST /v1/messages\n" . '{"model":"claude-sonnet-4-20250514","max_tokens":1024,"stream":true,"messages":[{"role":"user","content":"Hello"}]}', + "POST /v1/messages\n" . '{"model":"claude-sonnet-4-20250514","max_tokens":1024,"stream":true,"messages":[{"role":"user","content":"Hello"}]}', +] +--- response_headers_like eval +[ + "X-AI-RateLimit-Remaining-ai-proxy-anthropic: 499", + "X-AI-RateLimit-Remaining-ai-proxy-anthropic: 319", +] +--- no_error_log +[error] + + + +=== TEST 6: set route with cache-aware ITPM expression (excludes cache_read_input_tokens) +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "uri": "/v1/messages", + "plugins": { + "ai-proxy": { + "provider": "anthropic", + "auth": { + "header": { + "x-api-key": "test-key", + "anthropic-version": "2023-06-01" + } + }, + "options": { + "model": "claude-sonnet-4-20250514" + }, + "override": { + "endpoint": "http://localhost:16725" + }, + "ssl_verify": false + }, + "ai-rate-limiting": { + "limit": 100, + "time_window": 60, + "limit_strategy": "expression", + "cost_expr": "input_tokens + cache_creation_input_tokens" + } + }, + "upstream": { + "type": "roundrobin", + "nodes": { + "canbeanything.com": 1 + } + } + }]] + ) + + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- response_body +passed + + + +=== TEST 7: cache-aware ITPM - cost=150 exceeds limit=100 after first request, second rejected +--- pipelined_requests eval +[ + "POST /v1/messages\n" . '{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}]}', + "POST /v1/messages\n" . '{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}]}', +] +--- error_code eval +[200, 503] +--- no_error_log +[error] + + + +=== TEST 8: set route with weighted expression (cache_read costs 10%, cache_creation costs 125%) +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "uri": "/v1/messages", + "plugins": { + "ai-proxy": { + "provider": "anthropic", + "auth": { + "header": { + "x-api-key": "test-key", + "anthropic-version": "2023-06-01" + } + }, + "options": { + "model": "claude-sonnet-4-20250514" + }, + "override": { + "endpoint": "http://localhost:16725" + }, + "ssl_verify": false + }, + "ai-rate-limiting": { + "limit": 1000, + "time_window": 60, + "limit_strategy": "expression", + "cost_expr": "input_tokens + cache_read_input_tokens * 0.1 + cache_creation_input_tokens * 1.25 + output_tokens" + } + }, + "upstream": { + "type": "roundrobin", + "nodes": { + "canbeanything.com": 1 + } + } + }]] + ) + + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- response_body +passed + + + +=== TEST 9: weighted expression - two requests (cost = 50 + 200*0.1 + 100*1.25 + 30 = 225 each) +--- pipelined_requests eval +[ + "POST /v1/messages\n" . '{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}]}', + "POST /v1/messages\n" . '{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}]}', +] +--- response_headers_like eval +[ + "X-AI-RateLimit-Remaining-ai-proxy-anthropic: 999", + "X-AI-RateLimit-Remaining-ai-proxy-anthropic: 774", +] +--- no_error_log +[error] + + + +=== TEST 10: expression with missing variables defaults to 0 +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "uri": "/v1/messages", + "plugins": { + "ai-proxy": { + "provider": "anthropic", + "auth": { + "header": { + "x-api-key": "test-key", + "anthropic-version": "2023-06-01" + } + }, + "options": { + "model": "claude-sonnet-4-20250514" + }, + "override": { + "endpoint": "http://localhost:16725" + }, + "ssl_verify": false + }, + "ai-rate-limiting": { + "limit": 500, + "time_window": 60, + "limit_strategy": "expression", + "cost_expr": "input_tokens + nonexistent_field + output_tokens" + } + }, + "upstream": { + "type": "roundrobin", + "nodes": { + "canbeanything.com": 1 + } + } + }]] + ) + + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- response_body +passed + + + +=== TEST 11: missing variable defaults to 0 - cost = 50 + 0 + 30 = 80 per request +--- pipelined_requests eval +[ + "POST /v1/messages\n" . '{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}]}', + "POST /v1/messages\n" . '{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}]}', +] +--- response_headers_like eval +[ + "X-AI-RateLimit-Remaining-ai-proxy-anthropic: 499", + "X-AI-RateLimit-Remaining-ai-proxy-anthropic: 419", +] +--- no_error_log +[error] + + + +=== TEST 12: set route with expression that can yield negative cost +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "uri": "/v1/messages", + "plugins": { + "ai-proxy": { + "provider": "anthropic", + "auth": { + "header": { + "x-api-key": "test-key", + "anthropic-version": "2023-06-01" + } + }, + "options": { + "model": "claude-sonnet-4-20250514" + }, + "override": { + "endpoint": "http://localhost:16725" + }, + "ssl_verify": false + }, + "ai-rate-limiting": { + "limit": 100, + "time_window": 60, + "limit_strategy": "expression", + "cost_expr": "input_tokens - cache_read_input_tokens" + } + }, + "upstream": { + "type": "roundrobin", + "nodes": { + "canbeanything.com": 1 + } + } + }]] + ) + + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- response_body +passed + + + +=== TEST 13: negative expression result clamped to 0 - cost = 50 - 200 = -150, clamped to 0 +--- pipelined_requests eval +[ + "POST /v1/messages\n" . '{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}]}', + "POST /v1/messages\n" . '{"model":"claude-sonnet-4-20250514","max_tokens":1024,"messages":[{"role":"user","content":"Hello"}]}', +] +--- response_headers_like eval +[ + "X-AI-RateLimit-Remaining-ai-proxy-anthropic: 99", + "X-AI-RateLimit-Remaining-ai-proxy-anthropic: 99", +] +--- no_error_log +[error]