Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

querier: remove query_timeout and engine timeout from querier conf #10302

Merged
merged 10 commits into from
Aug 25, 2023
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
* [9813](https://github.com/grafana/loki/pull/9813) **jeschkies**: Enable Protobuf encoding via content negotiation between querier and query frontend.
* [10281](https://github.com/grafana/loki/pull/10281) **dannykopping**: Track effectiveness of hedged requests.
* [10140](https://github.com/grafana/loki/pull/10140) **dannykopping**: Dynamic client-side throttling to avoid object storage rate-limits (GCS only)
* [10302](https://github.com/grafana/loki/pull/10302) **ashwanthgoli**: Remove query_timeout and engine timeout from querier configuration
ashwanthgoli marked this conversation as resolved.
Show resolved Hide resolved

##### Fixes

Expand Down
4 changes: 0 additions & 4 deletions docs/sources/configure/_index.md
Original file line number Diff line number Diff line change
Expand Up @@ -538,10 +538,6 @@ Configures the `querier`. Only appropriate when running all modules or just the
[query_ingesters_within: <duration> | default = 3h]

engine:
# Deprecated: Use querier.query-timeout instead. Timeout for query execution.
# CLI flag: -querier.engine.timeout
[timeout: <duration> | default = 5m]

# The maximum amount of time to look back for log lines. Used only for instant
# log queries.
# CLI flag: -querier.engine.max-lookback-period
Expand Down
5 changes: 5 additions & 0 deletions docs/sources/setup/upgrade/_index.md
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,11 @@ If you have a use-case that relies on strict parsing where you expect the parser
logfmt parser doesn't include standalone keys(keys without a value) in the resulting label set anymore.
You can use `--keep-empty` flag to retain them.

#### `query_timeout` and engine `timeout` are removed from querier configuration
`query_timeout` and `engine:timeout` can no longer be configured in the querier section, these configs were deprecated in 2.7.0 release.
You can instead configure `query_timeout` in [Limits Config](/docs/loki/latest/configuration/#limits_config) or use the following argument `-querier.query-timeout`.
CLI flag `-querier.engine.timeout` to configure the engine timeout is also removed.

### Jsonnet

##### Deprecated PodDisruptionBudget definition has been removed
Expand Down
10 changes: 0 additions & 10 deletions pkg/logql/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ import (
)

const (
DefaultEngineTimeout = 5 * time.Minute
DefaultBlockedQueryMessage = "blocked by policy"
)

Expand Down Expand Up @@ -114,10 +113,6 @@ type Querier interface {

// EngineOpts is the list of options to use with the LogQL query engine.
type EngineOpts struct {
// TODO: remove this after next release.
// Timeout for queries execution
Timeout time.Duration `yaml:"timeout" doc:"deprecated"`

// MaxLookBackPeriod is the maximum amount of time to look back for log lines.
// only used for instant log queries.
MaxLookBackPeriod time.Duration `yaml:"max_look_back_period"`
Expand All @@ -127,8 +122,6 @@ type EngineOpts struct {
}

func (opts *EngineOpts) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
// TODO: remove this configuration after next release.
f.DurationVar(&opts.Timeout, prefix+".engine.timeout", DefaultEngineTimeout, "Use querier.query-timeout instead. Timeout for query execution.")
f.DurationVar(&opts.MaxLookBackPeriod, prefix+".engine.max-lookback-period", 30*time.Second, "The maximum amount of time to look back for log lines. Used only for instant log queries.")
// Log executing query by default
opts.LogExecutingQuery = true
Expand All @@ -142,7 +135,6 @@ func (opts *EngineOpts) applyDefault() {

// Engine is the LogQL engine.
type Engine struct {
Timeout time.Duration
logger log.Logger
evaluator Evaluator
limits Limits
Expand All @@ -151,7 +143,6 @@ type Engine struct {

// NewEngine creates a new LogQL Engine.
func NewEngine(opts EngineOpts, q Querier, l Limits, logger log.Logger) *Engine {
queryTimeout := opts.Timeout
opts.applyDefault()
if logger == nil {
logger = log.NewNopLogger()
Expand All @@ -160,7 +151,6 @@ func NewEngine(opts EngineOpts, q Querier, l Limits, logger log.Logger) *Engine
logger: logger,
evaluator: NewDefaultEvaluator(q, opts.MaxLookBackPeriod),
limits: l,
Timeout: queryTimeout,
opts: opts,
}
}
Expand Down
59 changes: 0 additions & 59 deletions pkg/loki/loki.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ import (
"github.com/grafana/loki/pkg/distributor"
"github.com/grafana/loki/pkg/ingester"
ingester_client "github.com/grafana/loki/pkg/ingester/client"
"github.com/grafana/loki/pkg/logql"
"github.com/grafana/loki/pkg/loki/common"
"github.com/grafana/loki/pkg/lokifrontend"
"github.com/grafana/loki/pkg/querier"
Expand Down Expand Up @@ -255,10 +254,6 @@ func (c *Config) Validate() error {
return err
}

if err := AdjustForTimeoutsMigration(c); err != nil {
return err
}

// Honor the legacy scalable deployment topology
if c.LegacyReadTarget {
if c.isModuleEnabled(Backend) {
Expand All @@ -269,60 +264,6 @@ func (c *Config) Validate() error {
return nil
}

// AdjustForTimeoutsMigration will adjust Loki timeouts configuration to be in accordance with the next major release.
//
// We're preparing to unify the querier:engine:timeout and querier:query_timeout into a single timeout named limits_config:query_timeout.
// The migration encompasses of:
// - If limits_config:query_timeout is explicitly configured, use it everywhere as it is a new configuration and by
// configuring it, users are expressing that they're willing of using it.
// - If none are explicitly configured, use the default engine:timeout everywhere as it is longer than the default limits_config:query_timeout
// and otherwise users would start to experience shorter timeouts without expecting it.
// - If only the querier:engine:timeout was explicitly configured, warn the user and use it everywhere.
func AdjustForTimeoutsMigration(c *Config) error {
engineTimeoutIsDefault := c.Querier.Engine.Timeout == logql.DefaultEngineTimeout
globalTimeoutIsDefault := c.LimitsConfig.QueryTimeout.String() == validation.DefaultPerTenantQueryTimeout
if engineTimeoutIsDefault && globalTimeoutIsDefault {
if err := c.LimitsConfig.QueryTimeout.Set(c.Querier.Engine.Timeout.String()); err != nil {
return fmt.Errorf("couldn't set global query_timeout as the engine timeout value: %w", err)
}
level.Warn(util_log.Logger).Log("msg",
fmt.Sprintf(
"global timeout not configured, using default engine timeout (%q). This behavior will change in the next major to always use the default global timeout (%q).",
c.Querier.Engine.Timeout.String(),
c.LimitsConfig.QueryTimeout.String(),
),
)
return nil
}

if !globalTimeoutIsDefault && !engineTimeoutIsDefault {
level.Warn(util_log.Logger).Log("msg",
fmt.Sprintf(
"using configured global timeout (%q) as the default (can be overridden per-tenant in the limits_config). Configured engine timeout (%q) is deprecated and will be ignored.",
c.LimitsConfig.QueryTimeout.String(),
c.Querier.Engine.Timeout.String(),
),
)
return nil
}

if globalTimeoutIsDefault && !engineTimeoutIsDefault {
if err := c.LimitsConfig.QueryTimeout.Set(c.Querier.Engine.Timeout.String()); err != nil {
return fmt.Errorf("couldn't set global query_timeout as the engine timeout value: %w", err)
}
level.Warn(util_log.Logger).Log("msg",
fmt.Sprintf(
"using configured engine timeout (%q) as the default (can be overridden per-tenant in the limits_config). Be aware that engine timeout (%q) is deprecated and will be removed in the next major version.",
c.Querier.Engine.Timeout.String(),
c.LimitsConfig.QueryTimeout.String(),
),
)
return nil
}

return nil
}

func (c *Config) isModuleEnabled(m string) bool {
return util.StringsContain(c.Target, m)
}
Expand Down
7 changes: 0 additions & 7 deletions pkg/querier/http.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"context"
"net/http"
"strconv"
"strings"
"time"

"github.com/go-kit/log"
Expand Down Expand Up @@ -558,12 +557,6 @@ func WrapQuerySpanAndTimeout(call string, q *QuerierAPI) middleware.Interface {

timeoutCapture := func(id string) time.Duration { return q.limits.QueryTimeout(ctx, id) }
timeout := util_validation.SmallestPositiveNonZeroDurationPerTenant(tenants, timeoutCapture)
// TODO: remove this clause once we remove the deprecated query-timeout flag.
if q.cfg.QueryTimeout != 0 { // querier YAML configuration is still configured.
level.Warn(log).Log("msg", "deprecated querier:query_timeout YAML configuration identified. Please migrate to limits:query_timeout instead.", "call", "WrapQuerySpanAndTimeout", "org_id", strings.Join(tenants, ","))
timeout = q.cfg.QueryTimeout
}

newCtx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()

Expand Down
51 changes: 3 additions & 48 deletions pkg/querier/http_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,8 @@ func TestQueryWrapperMiddleware(t *testing.T) {

t.Run("request timeout is the shortest one", func(t *testing.T) {
defaultLimits := defaultLimitsTestConfig()
defaultLimits.QueryTimeout = model.Duration(time.Millisecond * 10)

limits, err := validation.NewOverrides(defaultLimits, nil)
require.NoError(t, err)
api := NewQuerierAPI(mockQuerierConfig(), nil, limits, log.NewNopLogger())
Expand All @@ -234,7 +236,6 @@ func TestQueryWrapperMiddleware(t *testing.T) {
deadline: shortestTimeout,
}

api.cfg.QueryTimeout = time.Millisecond * 10
midl := WrapQuerySpanAndTimeout("mycall", api).Wrap(connSimulator)

req, err := http.NewRequest("GET", "/loki/api/v1/label", nil)
Expand All @@ -261,53 +262,7 @@ func TestQueryWrapperMiddleware(t *testing.T) {
require.True(t, connSimulator.didTimeout)
})

t.Run("old querier:query_timeout is configured to supersede all others", func(t *testing.T) {
defaultLimits := defaultLimitsTestConfig()
defaultLimits.QueryTimeout = model.Duration(shortestTimeout)
limits, err := validation.NewOverrides(defaultLimits, nil)
require.NoError(t, err)
api := NewQuerierAPI(mockQuerierConfig(), nil, limits, log.NewNopLogger())

// configure old querier:query_timeout parameter.
// although it is longer than the limits timeout, it should supersede it.
api.cfg.QueryTimeout = time.Millisecond * 100

// although limits:query_timeout is shorter than querier:query_timeout,
// limits:query_timeout should be ignored.
// here we configure it to sleep for 100ms and we want it to timeout at the 100ms.
connSimulator := &slowConnectionSimulator{
sleepFor: api.cfg.QueryTimeout,
deadline: time.Millisecond * 200,
}

midl := WrapQuerySpanAndTimeout("mycall", api).Wrap(connSimulator)

req, err := http.NewRequest("GET", "/loki/api/v1/label", nil)
ctx, cancelFunc := context.WithTimeout(user.InjectOrgID(req.Context(), "fake"), time.Millisecond*200)
defer cancelFunc()
req = req.WithContext(ctx)
require.NoError(t, err)

rr := httptest.NewRecorder()
srv := http.HandlerFunc(midl.ServeHTTP)

srv.ServeHTTP(rr, req)
require.Equal(t, http.StatusOK, rr.Code)

select {
case <-ctx.Done():
require.FailNow(t, fmt.Sprintf("should timeout in %s", api.cfg.QueryTimeout))
case <-time.After(shortestTimeout):
// didn't use the limits timeout (i.e: shortest one), exactly what we want.
break
case <-time.After(api.cfg.QueryTimeout):
require.FailNow(t, fmt.Sprintf("should timeout in %s", api.cfg.QueryTimeout))
}

require.True(t, connSimulator.didTimeout)
})

t.Run("new limits query timeout is configured to supersede all others", func(t *testing.T) {
t.Run("apply limits query timeout", func(t *testing.T) {
defaultLimits := defaultLimitsTestConfig()
defaultLimits.QueryTimeout = model.Duration(shortestTimeout)

Expand Down
28 changes: 0 additions & 28 deletions pkg/querier/querier.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ import (
"github.com/grafana/loki/pkg/storage/stores/index/stats"
"github.com/grafana/loki/pkg/storage/stores/indexshipper/compactor/deletion"
listutil "github.com/grafana/loki/pkg/util"
util_log "github.com/grafana/loki/pkg/util/log"
"github.com/grafana/loki/pkg/util/spanlogger"
util_validation "github.com/grafana/loki/pkg/util/validation"
)
Expand Down Expand Up @@ -58,7 +57,6 @@ type Config struct {
QueryStoreOnly bool `yaml:"query_store_only"`
QueryIngesterOnly bool `yaml:"query_ingester_only"`
MultiTenantQueriesEnabled bool `yaml:"multi_tenant_queries_enabled"`
QueryTimeout time.Duration `yaml:"query_timeout" doc:"hidden"`
PerRequestLimitsEnabled bool `yaml:"per_request_limits_enabled"`
}

Expand Down Expand Up @@ -380,11 +378,6 @@ func (q *SingleTenantQuerier) Label(ctx context.Context, req *logproto.LabelRequ

// Enforce the query timeout while querying backends
queryTimeout := q.limits.QueryTimeout(ctx, userID)
// TODO: remove this clause once we remove the deprecated query-timeout flag.
if q.cfg.QueryTimeout != 0 { // querier YAML configuration.
level.Warn(util_log.Logger).Log("msg", "deprecated querier:query_timeout YAML configuration identified. Please migrate to limits:query_timeout instead.", "err", err, "call", "Label")
queryTimeout = q.cfg.QueryTimeout
}
ctx, cancel := context.WithDeadline(ctx, time.Now().Add(queryTimeout))
defer cancel()

Expand Down Expand Up @@ -474,11 +467,6 @@ func (q *SingleTenantQuerier) Tail(ctx context.Context, req *logproto.TailReques
return nil, errors.Wrap(err, "failed to load tenant")
}
queryTimeout := q.limits.QueryTimeout(tailCtx, tenantID)
// TODO: remove this clause once we remove the deprecated query-timeout flag.
if q.cfg.QueryTimeout != 0 { // querier YAML configuration.
level.Warn(util_log.Logger).Log("msg", "deprecated querier:query_timeout YAML configuration identified. Please migrate to limits:query_timeout instead.", "call", "SingleTenantQuerier/Tail")
queryTimeout = q.cfg.QueryTimeout
}
queryCtx, cancelQuery := context.WithDeadline(ctx, time.Now().Add(queryTimeout))
defer cancelQuery()

Expand Down Expand Up @@ -523,11 +511,6 @@ func (q *SingleTenantQuerier) Series(ctx context.Context, req *logproto.SeriesRe

// Enforce the query timeout while querying backends
queryTimeout := q.limits.QueryTimeout(ctx, userID)
// TODO: remove this clause once we remove the deprecated query-timeout flag.
if q.cfg.QueryTimeout != 0 { // querier YAML configuration.
level.Warn(util_log.Logger).Log("msg", "deprecated querier:query_timeout YAML configuration identified. Please migrate to limits:query_timeout instead.", "call", "SingleTenantQuerier/Series")
queryTimeout = q.cfg.QueryTimeout
}
ctx, cancel := context.WithDeadline(ctx, time.Now().Add(queryTimeout))
defer cancel()

Expand Down Expand Up @@ -752,11 +735,6 @@ func (q *SingleTenantQuerier) IndexStats(ctx context.Context, req *loghttp.Range

// Enforce the query timeout while querying backends
queryTimeout := q.limits.QueryTimeout(ctx, userID)
// TODO: remove this clause once we remove the deprecated query-timeout flag.
if q.cfg.QueryTimeout != 0 { // querier YAML configuration.
level.Warn(util_log.Logger).Log("msg", "deprecated querier:query_timeout YAML configuration identified. Please migrate to limits:query_timeout instead.", "call", "SingleTenantQuerier/IndexStats")
queryTimeout = q.cfg.QueryTimeout
}
ctx, cancel := context.WithDeadline(ctx, time.Now().Add(queryTimeout))
defer cancel()

Expand Down Expand Up @@ -785,12 +763,6 @@ func (q *SingleTenantQuerier) Volume(ctx context.Context, req *logproto.VolumeRe

// Enforce the query timeout while querying backends
queryTimeout := q.limits.QueryTimeout(ctx, userID)

// TODO: remove this clause once we remove the deprecated query-timeout flag.
if q.cfg.QueryTimeout != 0 { // querier YAML configuration.
level.Warn(util_log.Logger).Log("msg", "deprecated querier:query_timeout YAML configuration identified. Please migrate to limits:query_timeout instead.", "call", "SingleTenantQuerier/Volume")
queryTimeout = q.cfg.QueryTimeout
}
ctx, cancel := context.WithDeadline(ctx, time.Now().Add(queryTimeout))
defer cancel()

Expand Down
1 change: 0 additions & 1 deletion pkg/querier/queryrange/roundtrip_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ var (
},
}
testEngineOpts = logql.EngineOpts{
Timeout: 30 * time.Second,
MaxLookBackPeriod: 30 * time.Second,
LogExecutingQuery: false,
}
Expand Down