diff --git a/cmd/root.go b/cmd/root.go index 1321ae94..4f2b999a 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -92,6 +92,10 @@ func init() { f.BoolVar(&rootArgs.config.EnableGraphQLIntrospection, "enable-graphql-introspection", true, "Enable GraphQL introspection for the /graphql endpoint") f.BoolVar(&rootArgs.config.EnableHSTS, "enable-hsts", false, "Enable Strict-Transport-Security response header (only enable behind TLS)") f.BoolVar(&rootArgs.config.DisableCSP, "disable-csp", false, "Disable the default Content-Security-Policy response header") + f.IntVar(&rootArgs.config.GraphQLMaxComplexity, "graphql-max-complexity", 300, "Maximum total complexity score for a single GraphQL operation") + f.IntVar(&rootArgs.config.GraphQLMaxDepth, "graphql-max-depth", 15, "Maximum nesting depth of a GraphQL selection set") + f.IntVar(&rootArgs.config.GraphQLMaxAliases, "graphql-max-aliases", 30, "Maximum total number of aliased fields per GraphQL operation") + f.Int64Var(&rootArgs.config.GraphQLMaxBodyBytes, "graphql-max-body-bytes", 1<<20, "Maximum allowed GraphQL request body size in bytes (default 1MB)") // Organization flags f.StringVar(&rootArgs.config.OrganizationLogo, "organization-logo", defaultOrganizationLogo, "Logo of the organization") diff --git a/internal/config/config.go b/internal/config/config.go index 808cbd96..37438853 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -31,6 +31,17 @@ type Config struct { // Off by default — CSP is on by default. Provided as an escape hatch // for dashboards that load assets in ways the default policy blocks. DisableCSP bool + // GraphQLMaxComplexity caps the total complexity score of a single GraphQL + // operation. Operations exceeding this limit are rejected before execution. + GraphQLMaxComplexity int + // GraphQLMaxDepth caps the maximum nesting depth of a GraphQL selection set. + GraphQLMaxDepth int + // GraphQLMaxAliases caps the total number of aliased fields per operation. + // Defends against alias-amplification denial-of-service attacks. + GraphQLMaxAliases int + // GraphQLMaxBodyBytes caps the size of the request body accepted by the + // GraphQL endpoint to prevent oversized-payload denial of service. + GraphQLMaxBodyBytes int64 // Database Configurations // DatabaseType is the type of database to use diff --git a/internal/http_handlers/graphql.go b/internal/http_handlers/graphql.go index ffca64a7..92375ac8 100644 --- a/internal/http_handlers/graphql.go +++ b/internal/http_handlers/graphql.go @@ -2,11 +2,14 @@ package http_handlers import ( "context" + "errors" + "io" "net/http" "sort" "sync" "time" + "github.com/99designs/gqlgen/complexity" gql "github.com/99designs/gqlgen/graphql" "github.com/99designs/gqlgen/graphql/handler" "github.com/99designs/gqlgen/graphql/handler/extension" @@ -14,6 +17,7 @@ import ( "github.com/99designs/gqlgen/graphql/handler/transport" "github.com/gin-gonic/gin" "github.com/vektah/gqlparser/v2/ast" + "github.com/vektah/gqlparser/v2/gqlerror" "github.com/authorizerdev/authorizer/internal/graph" "github.com/authorizerdev/authorizer/internal/graph/generated" @@ -22,6 +26,96 @@ import ( "github.com/authorizerdev/authorizer/internal/utils" ) +// queryLimits is a gqlgen handler extension that enforces depth, alias, and +// complexity limits on parsed operations. It runs after parsing but before +// execution so abusive queries are rejected without consuming resolver work. +// +// We replace gqlgen's stock extension.FixedComplexityLimit so all three +// limits go through the same code path and emit the same Prometheus +// counter (authorizer_graphql_limit_rejections_total) labelled by the +// specific limit kind. Operators can then alert on a sustained non-zero +// rate per limit and tune individually. +type queryLimits struct { + maxDepth int + maxAliases int + maxComplexity int + schema gql.ExecutableSchema +} + +var ( + _ gql.HandlerExtension = (*queryLimits)(nil) + _ gql.OperationContextMutator = (*queryLimits)(nil) +) + +func (*queryLimits) ExtensionName() string { return "QueryLimits" } +func (q *queryLimits) Validate(schema gql.ExecutableSchema) error { + q.schema = schema + return nil +} +func (q *queryLimits) MutateOperationContext(ctx context.Context, rc *gql.OperationContext) *gqlerror.Error { + if rc == nil || rc.Operation == nil { + return nil + } + // Single AST walk computes both max depth and total alias count so we + // touch each selection-set node exactly once. The earlier two-pass + // implementation walked the same tree twice for legitimate traffic; + // folding them halves the per-request AST work. + if q.maxDepth > 0 || q.maxAliases > 0 { + depth, aliases := walkSelectionSet(rc.Operation.SelectionSet) + if q.maxDepth > 0 && depth > q.maxDepth { + metrics.RecordGraphQLLimitRejection(metrics.GraphQLLimitDepth) + return gqlerror.Errorf("query depth %d exceeds maximum allowed depth %d", depth, q.maxDepth) + } + if q.maxAliases > 0 && aliases > q.maxAliases { + metrics.RecordGraphQLLimitRejection(metrics.GraphQLLimitAlias) + return gqlerror.Errorf("query uses %d aliases, exceeds maximum %d", aliases, q.maxAliases) + } + } + if q.maxComplexity > 0 && q.schema != nil { + score := complexity.Calculate(ctx, q.schema, rc.Operation, rc.Variables) + if score > q.maxComplexity { + metrics.RecordGraphQLLimitRejection(metrics.GraphQLLimitComplexity) + return gqlerror.Errorf("operation has complexity %d, which exceeds the limit of %d", score, q.maxComplexity) + } + } + return nil +} + +// walkSelectionSet returns (max nesting depth, total alias count) for the +// supplied selection set in a single recursive pass. Inline fragments and +// fragment spreads do not contribute their own depth level (matching the +// usual GraphQL convention) but their aliases do count. +func walkSelectionSet(set ast.SelectionSet) (depth, aliases int) { + for _, sel := range set { + switch s := sel.(type) { + case *ast.Field: + if s.Alias != "" && s.Alias != s.Name { + aliases++ + } + childDepth, childAliases := walkSelectionSet(s.SelectionSet) + aliases += childAliases + if d := 1 + childDepth; d > depth { + depth = d + } + case *ast.InlineFragment: + childDepth, childAliases := walkSelectionSet(s.SelectionSet) + aliases += childAliases + if childDepth > depth { + depth = childDepth + } + case *ast.FragmentSpread: + if s.Definition != nil { + childDepth, childAliases := walkSelectionSet(s.Definition.SelectionSet) + aliases += childAliases + if childDepth > depth { + depth = childDepth + } + } + } + } + return depth, aliases +} + type gqlResolvedFieldsCtxKey struct{} // resolvedFieldsCollector gathers unique GraphQL field names for one operation. @@ -142,7 +236,9 @@ func (h *httpProvider) GraphqlHandler() gin.HandlerFunc { }})) srv.AddTransport(transport.Options{}) - srv.AddTransport(transport.GET{}) + // transport.GET is intentionally omitted: GraphQL queries (and especially + // mutations) over GET leak into proxy/server logs and browser history. + // Clients must POST. srv.AddTransport(transport.POST{}) srv.SetQueryCache(lru.New[*ast.QueryDocument](1000)) @@ -154,17 +250,79 @@ func (h *httpProvider) GraphqlHandler() gin.HandlerFunc { srv.Use(extension.AutomaticPersistedQuery{ Cache: lru.New[string](100), }) - // Limit query complexity to prevent resource exhaustion - srv.Use(extension.FixedComplexityLimit(300)) + + // Limit query depth, alias count, AND complexity through a single + // extension so all three rejections share one Prometheus counter + // (authorizer_graphql_limit_rejections_total). Defaults applied if + // config is unset. + maxComplexity := h.Config.GraphQLMaxComplexity + if maxComplexity <= 0 { + maxComplexity = 300 + } + maxDepth := h.Config.GraphQLMaxDepth + if maxDepth <= 0 { + maxDepth = 15 + } + maxAliases := h.Config.GraphQLMaxAliases + if maxAliases <= 0 { + maxAliases = 30 + } + srv.Use(&queryLimits{ + maxDepth: maxDepth, + maxAliases: maxAliases, + maxComplexity: maxComplexity, + }) + + // Cap the request body size to defend against oversized-payload DoS. + maxBody := h.Config.GraphQLMaxBodyBytes + if maxBody <= 0 { + maxBody = 1 << 20 // 1 MB + } return func(c *gin.Context) { // Create a custom handler that ensures gin context is available handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Bound the request body so a single client cannot exhaust memory. + // http.MaxBytesReader will return an error from r.Body.Read once + // the limit is exceeded; gqlgen surfaces that as a parse error. + // We wrap the writer in a sniffer so we can detect the error and + // emit the body_size limit metric. + r.Body = &maxBytesBody{ + ReadCloser: http.MaxBytesReader(w, r.Body, maxBody), + } // Ensure the gin context is available in the request context ctx := utils.ContextWithGin(r.Context(), c) r = r.WithContext(ctx) srv.ServeHTTP(w, r) + // If the body reader hit the cap, record the rejection. We do + // this once per request after the handler returns so the metric + // reflects actual aborts, not just oversized-but-streaming reads. + if mb, ok := r.Body.(*maxBytesBody); ok && mb.exceeded { + metrics.RecordGraphQLLimitRejection(metrics.GraphQLLimitBodySize) + } }) handler.ServeHTTP(c.Writer, c.Request) } } + +// maxBytesBody wraps the io.ReadCloser returned by http.MaxBytesReader so +// the request handler can tell after the fact whether the body exceeded +// the configured cap. http.MaxBytesReader signals exhaustion via a +// *http.MaxBytesError wrapping io.EOF, but the gqlgen handler swallows the +// error inside its parse step — we need to observe the read directly to +// emit the body_size limit rejection metric. +type maxBytesBody struct { + io.ReadCloser + exceeded bool +} + +func (m *maxBytesBody) Read(p []byte) (int, error) { + n, err := m.ReadCloser.Read(p) + if err != nil { + var mbe *http.MaxBytesError + if errors.As(err, &mbe) { + m.exceeded = true + } + } + return n, err +} diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go index 5f42dbed..3a667155 100644 --- a/internal/metrics/metrics.go +++ b/internal/metrics/metrics.go @@ -89,6 +89,19 @@ var ( []string{"operation"}, ) + // GraphQLLimitRejectionsTotal tracks GraphQL operations rejected because + // they exceeded one of the configured query limits (depth, complexity, + // alias count, body size). Use this to spot abuse patterns or to tune + // the limits — a sustained non-zero rate on the legitimate operation + // surface usually means the limit is too tight. + GraphQLLimitRejectionsTotal = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "authorizer_graphql_limit_rejections_total", + Help: "GraphQL operations rejected for exceeding a configured query limit. limit label is one of: depth, complexity, alias, body_size", + }, + []string{"limit"}, + ) + // GraphQLRequestDuration tracks GraphQL operation latency. GraphQLRequestDuration = prometheus.NewHistogramVec( prometheus.HistogramOpts{ @@ -193,6 +206,7 @@ func Init() { prometheus.MustRegister(ActiveSessions) prometheus.MustRegister(SecurityEventsTotal) prometheus.MustRegister(GraphQLErrorsTotal) + prometheus.MustRegister(GraphQLLimitRejectionsTotal) prometheus.MustRegister(GraphQLRequestDuration) prometheus.MustRegister(DBHealthCheckTotal) prometheus.MustRegister(ClientIDHeaderMissingTotal) @@ -226,6 +240,21 @@ func RecordGraphQLError(operation string) { GraphQLErrorsTotal.WithLabelValues(GraphQLOperationPrometheusLabel(operation)).Inc() } +// GraphQL query-limit kind labels (low-cardinality, package-internal). +const ( + GraphQLLimitDepth = "depth" + GraphQLLimitComplexity = "complexity" + GraphQLLimitAlias = "alias" + GraphQLLimitBodySize = "body_size" +) + +// RecordGraphQLLimitRejection records a GraphQL operation rejected for +// exceeding one of the configured query limits. limit must be one of the +// GraphQLLimit* constants above. +func RecordGraphQLLimitRejection(limit string) { + GraphQLLimitRejectionsTotal.WithLabelValues(limit).Inc() +} + // RecordClientIDHeaderMissing records a request that had no client ID header. func RecordClientIDHeaderMissing() { ClientIDHeaderMissingTotal.Inc()