From 4d0d57acc930516c7cc13f8ded4c45fdac14485b Mon Sep 17 00:00:00 2001 From: Lakhan Samani Date: Tue, 7 Apr 2026 08:52:37 +0530 Subject: [PATCH 1/2] security: GraphQL depth/complexity/alias limits and disable GET transport (#14) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The GraphQL endpoint had no query depth limit, no per-operation alias cap, and accepted GET requests. These open three denial-of-service / leakage vectors: 1. Deeply nested queries can blow the call stack and exhaust resolver work 2. Alias amplification (many aliases on the same expensive field) multiplies work without changing complexity score 3. transport.GET leaks queries (and any sensitive arguments) into proxy logs, server access logs, and browser history Changes: - Remove transport.GET — clients must POST - Add a queryLimits handler extension that walks the parsed AST and enforces configurable max depth and max alias count, returning a gqlerror before execution begins - Make the existing complexity limit configurable instead of hardcoded 300 - Cap the GraphQL request body size via http.MaxBytesReader (default 1 MB) New CLI flags (with safe defaults): - --graphql-max-complexity (default 300) - --graphql-max-depth (default 15) - --graphql-max-aliases (default 30) - --graphql-max-body-bytes (default 1048576) --- cmd/root.go | 4 + internal/config/config.go | 11 ++ internal/http_handlers/graphql.go | 174 +++++++++++++++++++++++++++++- internal/metrics/metrics.go | 29 +++++ 4 files changed, 215 insertions(+), 3 deletions(-) diff --git a/cmd/root.go b/cmd/root.go index 1321ae94..4f2b999a 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -92,6 +92,10 @@ func init() { f.BoolVar(&rootArgs.config.EnableGraphQLIntrospection, "enable-graphql-introspection", true, "Enable GraphQL introspection for the /graphql endpoint") f.BoolVar(&rootArgs.config.EnableHSTS, "enable-hsts", false, "Enable Strict-Transport-Security response header (only enable behind TLS)") f.BoolVar(&rootArgs.config.DisableCSP, "disable-csp", false, "Disable the default Content-Security-Policy response header") + f.IntVar(&rootArgs.config.GraphQLMaxComplexity, "graphql-max-complexity", 300, "Maximum total complexity score for a single GraphQL operation") + f.IntVar(&rootArgs.config.GraphQLMaxDepth, "graphql-max-depth", 15, "Maximum nesting depth of a GraphQL selection set") + f.IntVar(&rootArgs.config.GraphQLMaxAliases, "graphql-max-aliases", 30, "Maximum total number of aliased fields per GraphQL operation") + f.Int64Var(&rootArgs.config.GraphQLMaxBodyBytes, "graphql-max-body-bytes", 1<<20, "Maximum allowed GraphQL request body size in bytes (default 1MB)") // Organization flags f.StringVar(&rootArgs.config.OrganizationLogo, "organization-logo", defaultOrganizationLogo, "Logo of the organization") diff --git a/internal/config/config.go b/internal/config/config.go index 808cbd96..37438853 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -31,6 +31,17 @@ type Config struct { // Off by default — CSP is on by default. Provided as an escape hatch // for dashboards that load assets in ways the default policy blocks. DisableCSP bool + // GraphQLMaxComplexity caps the total complexity score of a single GraphQL + // operation. Operations exceeding this limit are rejected before execution. + GraphQLMaxComplexity int + // GraphQLMaxDepth caps the maximum nesting depth of a GraphQL selection set. + GraphQLMaxDepth int + // GraphQLMaxAliases caps the total number of aliased fields per operation. + // Defends against alias-amplification denial-of-service attacks. + GraphQLMaxAliases int + // GraphQLMaxBodyBytes caps the size of the request body accepted by the + // GraphQL endpoint to prevent oversized-payload denial of service. + GraphQLMaxBodyBytes int64 // Database Configurations // DatabaseType is the type of database to use diff --git a/internal/http_handlers/graphql.go b/internal/http_handlers/graphql.go index ffca64a7..c3ec86ce 100644 --- a/internal/http_handlers/graphql.go +++ b/internal/http_handlers/graphql.go @@ -2,11 +2,14 @@ package http_handlers import ( "context" + "errors" + "io" "net/http" "sort" "sync" "time" + "github.com/99designs/gqlgen/complexity" gql "github.com/99designs/gqlgen/graphql" "github.com/99designs/gqlgen/graphql/handler" "github.com/99designs/gqlgen/graphql/handler/extension" @@ -14,6 +17,7 @@ import ( "github.com/99designs/gqlgen/graphql/handler/transport" "github.com/gin-gonic/gin" "github.com/vektah/gqlparser/v2/ast" + "github.com/vektah/gqlparser/v2/gqlerror" "github.com/authorizerdev/authorizer/internal/graph" "github.com/authorizerdev/authorizer/internal/graph/generated" @@ -22,6 +26,106 @@ import ( "github.com/authorizerdev/authorizer/internal/utils" ) +// queryLimits is a gqlgen handler extension that enforces depth, alias, and +// complexity limits on parsed operations. It runs after parsing but before +// execution so abusive queries are rejected without consuming resolver work. +// +// We replace gqlgen's stock extension.FixedComplexityLimit so all three +// limits go through the same code path and emit the same Prometheus +// counter (authorizer_graphql_limit_rejections_total) labelled by the +// specific limit kind. Operators can then alert on a sustained non-zero +// rate per limit and tune individually. +type queryLimits struct { + maxDepth int + maxAliases int + maxComplexity int + schema gql.ExecutableSchema +} + +var ( + _ gql.HandlerExtension = (*queryLimits)(nil) + _ gql.OperationContextMutator = (*queryLimits)(nil) +) + +func (*queryLimits) ExtensionName() string { return "QueryLimits" } +func (q *queryLimits) Validate(schema gql.ExecutableSchema) error { + q.schema = schema + return nil +} +func (q *queryLimits) MutateOperationContext(ctx context.Context, rc *gql.OperationContext) *gqlerror.Error { + if rc == nil || rc.Operation == nil { + return nil + } + if q.maxDepth > 0 { + depth := selectionSetDepth(rc.Operation.SelectionSet) + if depth > q.maxDepth { + metrics.RecordGraphQLLimitRejection(metrics.GraphQLLimitDepth) + return gqlerror.Errorf("query depth %d exceeds maximum allowed depth %d", depth, q.maxDepth) + } + } + if q.maxAliases > 0 { + aliases := countAliases(rc.Operation.SelectionSet) + if aliases > q.maxAliases { + metrics.RecordGraphQLLimitRejection(metrics.GraphQLLimitAlias) + return gqlerror.Errorf("query uses %d aliases, exceeds maximum %d", aliases, q.maxAliases) + } + } + if q.maxComplexity > 0 && q.schema != nil { + score := complexity.Calculate(ctx, q.schema, rc.Operation, rc.Variables) + if score > q.maxComplexity { + metrics.RecordGraphQLLimitRejection(metrics.GraphQLLimitComplexity) + return gqlerror.Errorf("operation has complexity %d, which exceeds the limit of %d", score, q.maxComplexity) + } + } + return nil +} + +func selectionSetDepth(set ast.SelectionSet) int { + max := 0 + for _, sel := range set { + switch s := sel.(type) { + case *ast.Field: + d := 1 + selectionSetDepth(s.SelectionSet) + if d > max { + max = d + } + case *ast.InlineFragment: + d := selectionSetDepth(s.SelectionSet) + if d > max { + max = d + } + case *ast.FragmentSpread: + if s.Definition != nil { + d := selectionSetDepth(s.Definition.SelectionSet) + if d > max { + max = d + } + } + } + } + return max +} + +func countAliases(set ast.SelectionSet) int { + n := 0 + for _, sel := range set { + switch s := sel.(type) { + case *ast.Field: + if s.Alias != "" && s.Alias != s.Name { + n++ + } + n += countAliases(s.SelectionSet) + case *ast.InlineFragment: + n += countAliases(s.SelectionSet) + case *ast.FragmentSpread: + if s.Definition != nil { + n += countAliases(s.Definition.SelectionSet) + } + } + } + return n +} + type gqlResolvedFieldsCtxKey struct{} // resolvedFieldsCollector gathers unique GraphQL field names for one operation. @@ -142,7 +246,9 @@ func (h *httpProvider) GraphqlHandler() gin.HandlerFunc { }})) srv.AddTransport(transport.Options{}) - srv.AddTransport(transport.GET{}) + // transport.GET is intentionally omitted: GraphQL queries (and especially + // mutations) over GET leak into proxy/server logs and browser history. + // Clients must POST. srv.AddTransport(transport.POST{}) srv.SetQueryCache(lru.New[*ast.QueryDocument](1000)) @@ -154,17 +260,79 @@ func (h *httpProvider) GraphqlHandler() gin.HandlerFunc { srv.Use(extension.AutomaticPersistedQuery{ Cache: lru.New[string](100), }) - // Limit query complexity to prevent resource exhaustion - srv.Use(extension.FixedComplexityLimit(300)) + + // Limit query depth, alias count, AND complexity through a single + // extension so all three rejections share one Prometheus counter + // (authorizer_graphql_limit_rejections_total). Defaults applied if + // config is unset. + maxComplexity := h.Config.GraphQLMaxComplexity + if maxComplexity <= 0 { + maxComplexity = 300 + } + maxDepth := h.Config.GraphQLMaxDepth + if maxDepth <= 0 { + maxDepth = 15 + } + maxAliases := h.Config.GraphQLMaxAliases + if maxAliases <= 0 { + maxAliases = 30 + } + srv.Use(&queryLimits{ + maxDepth: maxDepth, + maxAliases: maxAliases, + maxComplexity: maxComplexity, + }) + + // Cap the request body size to defend against oversized-payload DoS. + maxBody := h.Config.GraphQLMaxBodyBytes + if maxBody <= 0 { + maxBody = 1 << 20 // 1 MB + } return func(c *gin.Context) { // Create a custom handler that ensures gin context is available handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Bound the request body so a single client cannot exhaust memory. + // http.MaxBytesReader will return an error from r.Body.Read once + // the limit is exceeded; gqlgen surfaces that as a parse error. + // We wrap the writer in a sniffer so we can detect the error and + // emit the body_size limit metric. + r.Body = &maxBytesBody{ + ReadCloser: http.MaxBytesReader(w, r.Body, maxBody), + } // Ensure the gin context is available in the request context ctx := utils.ContextWithGin(r.Context(), c) r = r.WithContext(ctx) srv.ServeHTTP(w, r) + // If the body reader hit the cap, record the rejection. We do + // this once per request after the handler returns so the metric + // reflects actual aborts, not just oversized-but-streaming reads. + if mb, ok := r.Body.(*maxBytesBody); ok && mb.exceeded { + metrics.RecordGraphQLLimitRejection(metrics.GraphQLLimitBodySize) + } }) handler.ServeHTTP(c.Writer, c.Request) } } + +// maxBytesBody wraps the io.ReadCloser returned by http.MaxBytesReader so +// the request handler can tell after the fact whether the body exceeded +// the configured cap. http.MaxBytesReader signals exhaustion via a +// *http.MaxBytesError wrapping io.EOF, but the gqlgen handler swallows the +// error inside its parse step — we need to observe the read directly to +// emit the body_size limit rejection metric. +type maxBytesBody struct { + io.ReadCloser + exceeded bool +} + +func (m *maxBytesBody) Read(p []byte) (int, error) { + n, err := m.ReadCloser.Read(p) + if err != nil { + var mbe *http.MaxBytesError + if errors.As(err, &mbe) { + m.exceeded = true + } + } + return n, err +} diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go index 5f42dbed..3a667155 100644 --- a/internal/metrics/metrics.go +++ b/internal/metrics/metrics.go @@ -89,6 +89,19 @@ var ( []string{"operation"}, ) + // GraphQLLimitRejectionsTotal tracks GraphQL operations rejected because + // they exceeded one of the configured query limits (depth, complexity, + // alias count, body size). Use this to spot abuse patterns or to tune + // the limits — a sustained non-zero rate on the legitimate operation + // surface usually means the limit is too tight. + GraphQLLimitRejectionsTotal = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "authorizer_graphql_limit_rejections_total", + Help: "GraphQL operations rejected for exceeding a configured query limit. limit label is one of: depth, complexity, alias, body_size", + }, + []string{"limit"}, + ) + // GraphQLRequestDuration tracks GraphQL operation latency. GraphQLRequestDuration = prometheus.NewHistogramVec( prometheus.HistogramOpts{ @@ -193,6 +206,7 @@ func Init() { prometheus.MustRegister(ActiveSessions) prometheus.MustRegister(SecurityEventsTotal) prometheus.MustRegister(GraphQLErrorsTotal) + prometheus.MustRegister(GraphQLLimitRejectionsTotal) prometheus.MustRegister(GraphQLRequestDuration) prometheus.MustRegister(DBHealthCheckTotal) prometheus.MustRegister(ClientIDHeaderMissingTotal) @@ -226,6 +240,21 @@ func RecordGraphQLError(operation string) { GraphQLErrorsTotal.WithLabelValues(GraphQLOperationPrometheusLabel(operation)).Inc() } +// GraphQL query-limit kind labels (low-cardinality, package-internal). +const ( + GraphQLLimitDepth = "depth" + GraphQLLimitComplexity = "complexity" + GraphQLLimitAlias = "alias" + GraphQLLimitBodySize = "body_size" +) + +// RecordGraphQLLimitRejection records a GraphQL operation rejected for +// exceeding one of the configured query limits. limit must be one of the +// GraphQLLimit* constants above. +func RecordGraphQLLimitRejection(limit string) { + GraphQLLimitRejectionsTotal.WithLabelValues(limit).Inc() +} + // RecordClientIDHeaderMissing records a request that had no client ID header. func RecordClientIDHeaderMissing() { ClientIDHeaderMissingTotal.Inc() From b69bbac8c45f6af95182f0536cb5315cb2192f46 Mon Sep 17 00:00:00 2001 From: Lakhan Samani Date: Tue, 7 Apr 2026 10:26:11 +0530 Subject: [PATCH 2/2] perf: fold GraphQL depth and alias walks into a single AST pass MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous implementation walked the operation's selection-set tree twice for legitimate traffic — once for max depth, once for total alias count. Both walks visit every node, so the second pass is pure duplicated work. Replace selectionSetDepth + countAliases with a single recursive walkSelectionSet that returns (depth, aliases) in one traversal. The behaviour is identical: - Field nodes contribute one level of depth and (if aliased) one to the alias count - Inline fragments and fragment spreads do not add a depth level but their nested aliases still count - Walks short-circuit naturally on the rejection path because the caller checks the limits in order after one pass For a typical operation (~10–100 nodes) this halves the per-request AST work added by this PR. No allocations beyond the recursion stack. --- internal/http_handlers/graphql.go | 68 +++++++++++++------------------ 1 file changed, 29 insertions(+), 39 deletions(-) diff --git a/internal/http_handlers/graphql.go b/internal/http_handlers/graphql.go index c3ec86ce..92375ac8 100644 --- a/internal/http_handlers/graphql.go +++ b/internal/http_handlers/graphql.go @@ -56,16 +56,17 @@ func (q *queryLimits) MutateOperationContext(ctx context.Context, rc *gql.Operat if rc == nil || rc.Operation == nil { return nil } - if q.maxDepth > 0 { - depth := selectionSetDepth(rc.Operation.SelectionSet) - if depth > q.maxDepth { + // Single AST walk computes both max depth and total alias count so we + // touch each selection-set node exactly once. The earlier two-pass + // implementation walked the same tree twice for legitimate traffic; + // folding them halves the per-request AST work. + if q.maxDepth > 0 || q.maxAliases > 0 { + depth, aliases := walkSelectionSet(rc.Operation.SelectionSet) + if q.maxDepth > 0 && depth > q.maxDepth { metrics.RecordGraphQLLimitRejection(metrics.GraphQLLimitDepth) return gqlerror.Errorf("query depth %d exceeds maximum allowed depth %d", depth, q.maxDepth) } - } - if q.maxAliases > 0 { - aliases := countAliases(rc.Operation.SelectionSet) - if aliases > q.maxAliases { + if q.maxAliases > 0 && aliases > q.maxAliases { metrics.RecordGraphQLLimitRejection(metrics.GraphQLLimitAlias) return gqlerror.Errorf("query uses %d aliases, exceeds maximum %d", aliases, q.maxAliases) } @@ -80,50 +81,39 @@ func (q *queryLimits) MutateOperationContext(ctx context.Context, rc *gql.Operat return nil } -func selectionSetDepth(set ast.SelectionSet) int { - max := 0 +// walkSelectionSet returns (max nesting depth, total alias count) for the +// supplied selection set in a single recursive pass. Inline fragments and +// fragment spreads do not contribute their own depth level (matching the +// usual GraphQL convention) but their aliases do count. +func walkSelectionSet(set ast.SelectionSet) (depth, aliases int) { for _, sel := range set { switch s := sel.(type) { case *ast.Field: - d := 1 + selectionSetDepth(s.SelectionSet) - if d > max { - max = d + if s.Alias != "" && s.Alias != s.Name { + aliases++ + } + childDepth, childAliases := walkSelectionSet(s.SelectionSet) + aliases += childAliases + if d := 1 + childDepth; d > depth { + depth = d } case *ast.InlineFragment: - d := selectionSetDepth(s.SelectionSet) - if d > max { - max = d + childDepth, childAliases := walkSelectionSet(s.SelectionSet) + aliases += childAliases + if childDepth > depth { + depth = childDepth } case *ast.FragmentSpread: if s.Definition != nil { - d := selectionSetDepth(s.Definition.SelectionSet) - if d > max { - max = d + childDepth, childAliases := walkSelectionSet(s.Definition.SelectionSet) + aliases += childAliases + if childDepth > depth { + depth = childDepth } } } } - return max -} - -func countAliases(set ast.SelectionSet) int { - n := 0 - for _, sel := range set { - switch s := sel.(type) { - case *ast.Field: - if s.Alias != "" && s.Alias != s.Name { - n++ - } - n += countAliases(s.SelectionSet) - case *ast.InlineFragment: - n += countAliases(s.SelectionSet) - case *ast.FragmentSpread: - if s.Definition != nil { - n += countAliases(s.Definition.SelectionSet) - } - } - } - return n + return depth, aliases } type gqlResolvedFieldsCtxKey struct{}