Skip to content

Commit

Permalink
vfs, table_cache: fadvise FADV_RANDOM on sstable files
Browse files Browse the repository at this point in the history
This change calls fadvise with FADV_RANDOM on sstable file descriptors,
to ensure that readahead is disabled. This reduces wasted I/Os when
reading from sstables, since sstable reads especially for short to
medium range scans do not read large enough contiguous blocks to be
able to take advantage of readahead. Instead, readahead ends up reducing
user-visible I/O performance.

See cockroachdb#198 .
  • Loading branch information
itsbilal committed Aug 12, 2019
1 parent eb27354 commit 1804676
Show file tree
Hide file tree
Showing 10 changed files with 74 additions and 10 deletions.
8 changes: 6 additions & 2 deletions error_test.go
Expand Up @@ -60,15 +60,19 @@ func (fs *errorFS) Link(oldname, newname string) error {
return fs.fs.Link(oldname, newname)
}

func (fs *errorFS) Open(name string) (vfs.File, error) {
func (fs *errorFS) Open(name string, opts ...vfs.OpenOption) (vfs.File, error) {
if err := fs.maybeError(); err != nil {
return nil, err
}
f, err := fs.fs.Open(name)
if err != nil {
return nil, err
}
return errorFile{f, fs}, nil
ef := errorFile{f, fs}
for _, opt := range opts {
opt.Apply(ef)
}
return ef, nil
}

func (fs *errorFS) OpenDir(name string) (vfs.File, error) {
Expand Down
1 change: 1 addition & 0 deletions go.mod
Expand Up @@ -11,4 +11,5 @@ require (
github.com/spf13/pflag v1.0.3 // indirect
github.com/stretchr/testify v1.2.2
golang.org/x/exp v0.0.0-20190426190305-956cc1757749
golang.org/x/sys v0.0.0-20190804053845-51ab0e2deafa // indirect
)
2 changes: 2 additions & 0 deletions go.sum
Expand Up @@ -28,5 +28,7 @@ golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190804053845-51ab0e2deafa h1:KIDDMLT1O0Nr7TSxp8xM5tJcdn8tgyAONntO829og1M=
golang.org/x/sys v0.0.0-20190804053845-51ab0e2deafa/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
2 changes: 1 addition & 1 deletion table_cache.go
Expand Up @@ -327,7 +327,7 @@ type tableCacheNode struct {

func (n *tableCacheNode) load(c *tableCacheShard) {
// Try opening the fileTypeTable first.
f, err := c.fs.Open(dbFilename(c.dirname, fileTypeTable, n.meta.fileNum))
f, err := c.fs.Open(dbFilename(c.dirname, fileTypeTable, n.meta.fileNum), &vfs.RandomReadsOption{})
if err != nil {
n.err = err
close(n.loaded)
Expand Down
4 changes: 2 additions & 2 deletions table_cache_test.go
Expand Up @@ -41,13 +41,13 @@ type tableCacheTestFS struct {
closeCounts map[string]int
}

func (fs *tableCacheTestFS) Open(name string) (vfs.File, error) {
func (fs *tableCacheTestFS) Open(name string, opts ...vfs.OpenOption) (vfs.File, error) {
fs.mu.Lock()
if fs.openCounts != nil {
fs.openCounts[name]++
}
fs.mu.Unlock()
f, err := fs.FS.Open(name)
f, err := fs.FS.Open(name, opts...)
if err != nil {
return nil, err
}
Expand Down
2 changes: 2 additions & 0 deletions vendor/modules.txt
Expand Up @@ -21,3 +21,5 @@ github.com/stretchr/testify/require
github.com/stretchr/testify/assert
# golang.org/x/exp v0.0.0-20190426190305-956cc1757749
golang.org/x/exp/rand
# golang.org/x/sys v0.0.0-20190804053845-51ab0e2deafa
golang.org/x/sys
11 changes: 11 additions & 0 deletions vfs/fadvise_generic.go
@@ -0,0 +1,11 @@
// Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use
// of this source code is governed by a BSD-style license that can be found in
// the LICENSE file.

// +build !linux

package vfs

func fadviseRandom(f uintptr) error {
return nil
}
14 changes: 14 additions & 0 deletions vfs/fadvise_linux.go
@@ -0,0 +1,14 @@
// Copyright 2019 The LevelDB-Go and Pebble Authors. All rights reserved. Use
// of this source code is governed by a BSD-style license that can be found in
// the LICENSE file.

// +build linux

package vfs

import "golang.org/x/sys/unix"

// Calls Fadvise with FADV_RANDOM to disable readahead on a file descriptor.
func fadviseRandom(f uintptr) error {
return unix.Fadvise(int(f), 0, 0, unix.FADV_RANDOM)
}
4 changes: 3 additions & 1 deletion vfs/mem_fs.go
Expand Up @@ -40,6 +40,8 @@ type memFS struct {
root *memNode
}

var _ FS = &memFS{}

func (y *memFS) String() string {
y.mu.Lock()
defer y.mu.Unlock()
Expand Down Expand Up @@ -190,7 +192,7 @@ func (y *memFS) open(fullname string, allowEmptyName bool) (File, error) {
return ret, nil
}

func (y *memFS) Open(fullname string) (File, error) {
func (y *memFS) Open(fullname string, opts ...OpenOption) (File, error) {
return y.open(fullname, false /* allowEmptyName */)
}

Expand Down
36 changes: 32 additions & 4 deletions vfs/vfs.go
Expand Up @@ -23,6 +23,13 @@ type File interface {
Sync() error
}

// OpenOptions provide an interface to do work on file handles in the Open()
// call.
type OpenOption interface {
// Apply is called on the file handle after it's opened.
Apply(File)
}

// FS is a namespace for files.
//
// The names are filepath names: they may be / separated or \ separated,
Expand All @@ -35,8 +42,8 @@ type FS interface {
// Link creates newname as a hard link to the oldname file.
Link(oldname, newname string) error

// Open opens the named file for reading.
Open(name string) (File, error)
// Open opens the named file for reading. openOptions provides
Open(name string, opts ...OpenOption) (File, error)

// OpenDir opens the named directory for syncing.
OpenDir(name string) (File, error)
Expand Down Expand Up @@ -95,8 +102,15 @@ func (defaultFS) Link(oldname, newname string) error {
return os.Link(oldname, newname)
}

func (defaultFS) Open(name string) (File, error) {
return os.OpenFile(name, os.O_RDONLY|syscall.O_CLOEXEC, 0)
func (defaultFS) Open(name string, opts ...OpenOption) (File, error) {
file, err := os.OpenFile(name, os.O_RDONLY|syscall.O_CLOEXEC, 0)
if err != nil {
return nil, err
}
for _, opt := range opts {
opt.Apply(file)
}
return file, nil
}

func (defaultFS) OpenDir(name string) (File, error) {
Expand Down Expand Up @@ -127,3 +141,17 @@ func (defaultFS) List(dir string) ([]string, error) {
func (defaultFS) Stat(name string) (os.FileInfo, error) {
return os.Stat(name)
}

// RandomReadsOption is an OpenOption that optimizes opened file handle for
// random reads, by calling fadvise() with POSIX_FADV_RANDOM on Linux systems
// to disable readahead. Only works when specified to defaultFS.
type RandomReadsOption struct{}

var _ OpenOption = &RandomReadsOption{}

// Apply implements the OpenOption interface.
func (RandomReadsOption) Apply(f File) {
if osFile, ok := f.(*os.File); ok {
_ = fadviseRandom(osFile.Fd())
}
}

0 comments on commit 1804676

Please sign in to comment.