Fix queries and storage block operations synchronisation #699

kolesnikovae · 2023-05-15T12:26:17Z

Resolves #687

kolesnikovae · 2023-05-17T13:39:06Z

In addition, testing revealed one more bug I can easily reproduce on main (d3033486) – fixed as well

panic serving 192.168.1.64:63941: runtime error: index out of range [7] with length 0
golang.org/x/net/http2.(*serverConn).runHandler.func1()
        /Users/kolesnikovae/go/pkg/mod/golang.org/x/net@v0.8.0/http2/server.go:2304 +0x20c
panic({0x1067e4f00, 0xc00b27e210})
        /Users/kolesnikovae/.gvm/gos/go1.20rc3/src/runtime/panic.go:890 +0x248
github.com/opentracing-contrib/go-stdlib/nethttp.MiddlewareFunc.func5.1()
        /Users/kolesnikovae/go/pkg/mod/github.com/opentracing-contrib/go-stdlib@v1.0.0/nethttp/server.go:150 +0x19c
panic({0x1067e4f00, 0xc00b27e210})
        /Users/kolesnikovae/.gvm/gos/go1.20rc3/src/runtime/panic.go:890 +0x248
github.com/grafana/phlare/pkg/phlaredb.(*headOnDiskQuerier).rowGroup(0xc00766ca90)
        /Users/kolesnikovae/src/grafana/phlare/pkg/phlaredb/head_queriers.go:29 +0x194
github.com/grafana/phlare/pkg/phlaredb.(*headOnDiskQuerier).SelectMatchingProfiles(0xc00766ca90, {0x106b92d78, 0xc00739e050}, 0xc00739e000)
        /Users/kolesnikovae/src/grafana/phlare/pkg/phlaredb/head_queriers.go:51 +0x18c
github.com/grafana/phlare/pkg/phlaredb.Queriers.MergeProfilesLabels({0xc00485ee00, 0x33, 0x33}, {0x106b92e20, 0xc00111e870}, 0x10000000?)
        /Users/kolesnikovae/src/grafana/phlare/pkg/phlaredb/block_querier.go:617 +0xdc8

kolesnikovae · 2023-05-17T13:48:19Z

Fixed a data race spotted by race detector:

Stack traces

==================
WARNING: DATA RACE
Read at 0x00c005aaed18 by goroutine 162129:
  github.com/prometheus/common/model.(*Time).MarshalJSON()
      <autogenerated>:1 +0x34
  encoding/json.addrMarshalerEncoder()
      /Users/kolesnikovae/.gvm/gos/go1.20rc3/src/encoding/json/encode.go:494 +0x17c
  encoding/json.condAddrEncoder.encode()
      /Users/kolesnikovae/.gvm/gos/go1.20rc3/src/encoding/json/encode.go:958 +0x78
  encoding/json.condAddrEncoder.encode-fm()
      <autogenerated>:1 +0x78
  encoding/json.structEncoder.encode()
      /Users/kolesnikovae/.gvm/gos/go1.20rc3/src/encoding/json/encode.go:759 +0x1bc
  encoding/json.structEncoder.encode-fm()
      <autogenerated>:1 +0x94
  encoding/json.ptrEncoder.encode()
      /Users/kolesnikovae/.gvm/gos/go1.20rc3/src/encoding/json/encode.go:943 +0x2a4
  encoding/json.ptrEncoder.encode-fm()
      <autogenerated>:1 +0x6c
  encoding/json.(*encodeState).reflectValue()
      /Users/kolesnikovae/.gvm/gos/go1.20rc3/src/encoding/json/encode.go:358 +0x74
  encoding/json.(*encodeState).marshal()
      /Users/kolesnikovae/.gvm/gos/go1.20rc3/src/encoding/json/encode.go:330 +0x1a0
  encoding/json.Marshal()
      /Users/kolesnikovae/.gvm/gos/go1.20rc3/src/encoding/json/encode.go:161 +0xa0
  encoding/json.MarshalIndent()
      /Users/kolesnikovae/.gvm/gos/go1.20rc3/src/encoding/json/encode.go:174 +0x44
  github.com/grafana/phlare/pkg/phlaredb/block.(*Meta).WriteToFile()
      /Users/kolesnikovae/src/grafana/phlare/pkg/phlaredb/block/metadata.go:193 +0x158
  github.com/grafana/phlare/pkg/phlaredb.(*Head).flush()
      /Users/kolesnikovae/src/grafana/phlare/pkg/phlaredb/head.go:940 +0xdec
  github.com/grafana/phlare/pkg/phlaredb.(*Head).Flush()
      /Users/kolesnikovae/src/grafana/phlare/pkg/phlaredb/head.go:874 +0x8c
  github.com/grafana/phlare/pkg/phlaredb.(*PhlareDB).Flush()
      /Users/kolesnikovae/src/grafana/phlare/pkg/phlaredb/phlaredb.go:343 +0x44
  github.com/grafana/phlare/pkg/ingester.(*Ingester).Flush()
      /Users/kolesnikovae/src/grafana/phlare/pkg/ingester/ingester.go:253 +0x130
  github.com/grafana/phlare/pkg/ingester.(*ingesterFlusherCompat).Flush()
      /Users/kolesnikovae/src/grafana/phlare/pkg/ingester/ingester.go:74 +0x94
  github.com/grafana/dskit/ring.(*Lifecycler).processShutdown()
      /Users/kolesnikovae/go/pkg/mod/github.com/grafana/dskit@v0.0.0-20230120165636-649501dde2ca/ring/lifecycler.go:870 +0x850
  github.com/grafana/dskit/ring.(*Lifecycler).stopping.func1()
      /Users/kolesnikovae/go/pkg/mod/github.com/grafana/dskit@v0.0.0-20230120165636-649501dde2ca/ring/lifecycler.go:496 +0x50

Previous write at 0x00c005aaed18 by goroutine 161058:
  github.com/grafana/phlare/pkg/phlaredb.(*Head).Ingest()
      /Users/kolesnikovae/src/grafana/phlare/pkg/phlaredb/head.go:416 +0xe2c
  github.com/grafana/phlare/pkg/ingester.(*Ingester).Push.func1.1()
      /Users/kolesnikovae/src/grafana/phlare/pkg/ingester/ingester.go:225 +0x128
  github.com/grafana/phlare/pkg/pprof.FromBytes()
      /Users/kolesnikovae/src/grafana/phlare/pkg/pprof/pprof.go:117 +0x14c
  github.com/grafana/phlare/pkg/ingester.(*Ingester).Push.func1()
      /Users/kolesnikovae/src/grafana/phlare/pkg/ingester/ingester.go:220 +0x428
  github.com/grafana/phlare/pkg/ingester.forInstanceUnary[...].func1()
      /Users/kolesnikovae/src/grafana/phlare/pkg/ingester/ingester.go:174 +0x40
  github.com/grafana/phlare/pkg/ingester.(*Ingester).forInstance()
      /Users/kolesnikovae/src/grafana/phlare/pkg/ingester/ingester.go:193 +0x214
  github.com/grafana/phlare/pkg/ingester.forInstanceUnary[...]()
      /Users/kolesnikovae/src/grafana/phlare/pkg/ingester/ingester.go:173 +0x74
  github.com/grafana/phlare/pkg/ingester.(*Ingester).Push()
      /Users/kolesnikovae/src/grafana/phlare/pkg/ingester/ingester.go:216 +0x6c
  github.com/grafana/phlare/api/gen/proto/go/ingester/v1/ingesterv1connect.IngesterServiceHandler.Push-fm()
      <autogenerated>:1 +0x60
  github.com/bufbuild/connect-go.NewUnaryHandler[...].func1()
      /Users/kolesnikovae/go/pkg/mod/github.com/bufbuild/connect-go@v1.4.1/handler.go:51 +0x1e4
  github.com/grafana/phlare/pkg/tenant.(*authInterceptor).WrapUnary.func1()
      /Users/kolesnikovae/src/grafana/phlare/pkg/tenant/interceptor.go:47 +0x36c
  github.com/bufbuild/connect-go.NewUnaryHandler[...].func2()
      /Users/kolesnikovae/go/pkg/mod/github.com/bufbuild/connect-go@v1.4.1/handler.go:75 +0x274
  github.com/bufbuild/connect-go.(*Handler).ServeHTTP()
      /Users/kolesnikovae/go/pkg/mod/github.com/bufbuild/connect-go@v1.4.1/handler.go:229 +0x710
  github.com/gorilla/mux.(*Router).ServeHTTP()
      /Users/kolesnikovae/go/pkg/mod/github.com/gorilla/mux@v1.8.0/mux.go:210 +0x298
  github.com/weaveworks/common/middleware.Instrument.Wrap.func1.2()
      /Users/kolesnikovae/go/pkg/mod/github.com/weaveworks/common@v0.0.0-20221201103051-7c2720a9024d/middleware/instrument.go:70 +0x5c
  github.com/felixge/httpsnoop.(*Metrics).CaptureMetrics()
      /Users/kolesnikovae/go/pkg/mod/github.com/felixge/httpsnoop@v1.0.3/capture_metrics.go:84 +0x290
  github.com/felixge/httpsnoop.CaptureMetricsFn()
      /Users/kolesnikovae/go/pkg/mod/github.com/felixge/httpsnoop@v1.0.3/capture_metrics.go:39 +0x380
  github.com/weaveworks/common/middleware.Instrument.Wrap.func1()
      /Users/kolesnikovae/go/pkg/mod/github.com/weaveworks/common@v0.0.0-20221201103051-7c2720a9024d/middleware/instrument.go:69 +0x318
  net/http.HandlerFunc.ServeHTTP()
      /Users/kolesnikovae/.gvm/gos/go1.20rc3/src/net/http/server.go:2122 +0x48
  github.com/grafana/phlare/pkg/util.Log.Wrap.func1()
      /Users/kolesnikovae/src/grafana/phlare/pkg/util/http.go:165 +0x574
  net/http.HandlerFunc.ServeHTTP()
      /Users/kolesnikovae/.gvm/gos/go1.20rc3/src/net/http/server.go:2122 +0x48
  net/http.Handler.ServeHTTP-fm()
      <autogenerated>:1 +0x60
  github.com/opentracing-contrib/go-stdlib/nethttp.MiddlewareFunc.func5()
      /Users/kolesnikovae/go/pkg/mod/github.com/opentracing-contrib/go-stdlib@v1.0.0/nethttp/server.go:154 +0x510
  net/http.HandlerFunc.ServeHTTP()
      /Users/kolesnikovae/.gvm/gos/go1.20rc3/src/net/http/server.go:2122 +0x48
  net/http.Handler.ServeHTTP-fm()
      <autogenerated>:1 +0x60
  golang.org/x/net/http2.(*serverConn).runHandler()
      /Users/kolesnikovae/go/pkg/mod/golang.org/x/net@v0.8.0/http2/server.go:2311 +0xa4
  golang.org/x/net/http2.(*serverConn).processHeaders.func1()
      /Users/kolesnikovae/go/pkg/mod/golang.org/x/net@v0.8.0/http2/server.go:2025 +0x58

kolesnikovae · 2023-05-17T16:24:53Z

I'm going to leave my stress test running over the night, to make sure the fix does not introduce new bugs:

50 clients with "real" profiles, 5 parallel queries (last hour, the response data is not verified), phlare is compiled with the race detector enabled and is running with these options:

-phlaredb.max-block-duration=1m -phlaredb.row-group-target-size=5000000 -distributor.push.timeout=15s

It's worth running a test like this periodically, I believe

cyriltovena · 2023-05-17T21:06:02Z

pkg/phlaredb/deduplicating_slice.go

+	s.lock.RLock()
+	defer s.lock.RUnlock()
+
+	buffer := parquet.NewBuffer(


Not possible to re-use it anymore ?

I'm afraid I failed to find where we reuse dedup slice or its parts: we use zero values at head initialization:

phlare/pkg/phlaredb/head.go

Line 158 in d303348

func NewHead(phlarectx context.Context, cfg Config, limiter TenantLimiter) (*Head, error) {

and the Init function does not seem to try to reuse allocated objects by any means:

phlare/pkg/phlaredb/deduplicating_slice.go

Lines 61 to 77 in d303348

func (s *deduplicatingSlice[M, K, H, P]) Init(path string, cfg *ParquetConfig, metrics *headMetrics) error {

s.cfg = cfg

s.metrics = metrics

file, err := os.OpenFile(filepath.Join(path, s.persister.Name()+block.ParquetSuffix), os.O_RDWR|os.O_CREATE|os.O_EXCL, 0o644)

if err != nil {

return err

}

s.file = file

// TODO: Reuse parquet.Writer beyond life time of the head.

s.writer = parquet.NewGenericWriter[P](file, s.persister.Schema(),

parquet.ColumnPageBuffers(parquet.NewFileBufferPool(os.TempDir(), "phlaredb-parquet-buffers*")),

parquet.CreatedBy("github.com/grafana/phlare/", build.Version, build.Revision),

)

s.lookup = make(map[K]int64)

return nil

}

The buffer itself is not used anywhere but at Flush.

I should say that if we want to flush the head without blocking writes to the db, we have to sacrifice some memory. Although a pool could help there, judging by the GC rate, it's very likely the objects will be removed from it before the next flush. My conclusion is, provided that I don't find this allocation particularly harmful (it is almost unnoticeable in the heap profile), performance improvement would be small to unnoticeable. Therefore I'm not very concerned about this

cyriltovena

Nice work, I'd love to see how this impact our tail latency.

LGTM 👏

kolesnikovae · 2023-05-18T14:48:37Z

Also fixed one more bug in block querier – turns out the recent change has not fixed the race:

…are#699) * Fix ingest/flush race * Allow concurrent reads in cutRowGroup * profileStore synchronization * alloc dedup slice buffer * Fix block eviction synchronisation

Fix ingest/flush race

ff67b4d

kolesnikovae self-assigned this May 15, 2023

kolesnikovae added 2 commits May 15, 2023 20:52

Allow concurrent reads in cutRowGroup

4ef8bef

profileStore synchronization

c42dfe5

kolesnikovae force-pushed the fix/head-blocked-at-flush branch from b1567dc to c42dfe5 Compare May 16, 2023 18:11

kolesnikovae requested a review from cyriltovena May 17, 2023 14:53

alloc dedup slice buffer

576b20b

kolesnikovae force-pushed the fix/head-blocked-at-flush branch from 4a2bed1 to 576b20b Compare May 17, 2023 16:10

kolesnikovae marked this pull request as ready for review May 17, 2023 16:19

cyriltovena reviewed May 17, 2023

View reviewed changes

cyriltovena approved these changes May 17, 2023

View reviewed changes

Fix block eviction synchronisation

32bbfc1

kolesnikovae merged commit 06093f8 into main May 19, 2023
17 checks passed

kolesnikovae deleted the fix/head-blocked-at-flush branch May 19, 2023 08:12

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Fix queries and storage block operations synchronisation #699

Fix queries and storage block operations synchronisation #699

kolesnikovae commented May 15, 2023 •

edited

kolesnikovae commented May 17, 2023

kolesnikovae commented May 17, 2023 •

edited

kolesnikovae commented May 17, 2023 •

edited

cyriltovena May 17, 2023

kolesnikovae May 18, 2023

cyriltovena left a comment

kolesnikovae commented May 18, 2023 •

edited

	func (s deduplicatingSlice[M, K, H, P]) Init(path string, cfg ParquetConfig, metrics *headMetrics) error {
	s.cfg = cfg
	s.metrics = metrics
	file, err := os.OpenFile(filepath.Join(path, s.persister.Name()+block.ParquetSuffix), os.O_RDWR\|os.O_CREATE\|os.O_EXCL, 0o644)
	if err != nil {
	return err
	}
	s.file = file

	// TODO: Reuse parquet.Writer beyond life time of the head.
	s.writer = parquet.NewGenericWriter[P](file, s.persister.Schema(),
	parquet.ColumnPageBuffers(parquet.NewFileBufferPool(os.TempDir(), "phlaredb-parquet-buffers*")),
	parquet.CreatedBy("github.com/grafana/phlare/", build.Version, build.Revision),
	)
	s.lookup = make(map[K]int64)
	return nil
	}

Fix queries and storage block operations synchronisation #699

Fix queries and storage block operations synchronisation #699

Conversation

kolesnikovae commented May 15, 2023 • edited

kolesnikovae commented May 17, 2023

kolesnikovae commented May 17, 2023 • edited

kolesnikovae commented May 17, 2023 • edited

cyriltovena May 17, 2023

Choose a reason for hiding this comment

kolesnikovae May 18, 2023

Choose a reason for hiding this comment

cyriltovena left a comment

Choose a reason for hiding this comment

kolesnikovae commented May 18, 2023 • edited

kolesnikovae commented May 15, 2023 •

edited

kolesnikovae commented May 17, 2023 •

edited

kolesnikovae commented May 17, 2023 •

edited

kolesnikovae commented May 18, 2023 •

edited