Skip to content

Commit

Permalink
fuse: Increase MAX_KERNEL_WRITE to 1 MiB & enable CAP_MAX_PAGES
Browse files Browse the repository at this point in the history
Kernel 4.20 allows writes & reads up to 1 MiB (before: 128 kiB)
via CAP_MAX_PAGES & MaxPages.

Instead of exposing MaxPages in the API, we follow what libfuse
does, and calculate MaxPages from MaxWrite (rounding up).

Contrary to what libfuse does, we also set max_read to the same
value as MaxWrite. This prevents reads getting larger than writes
due to the rounding-up for MaxPages, which is unexpected. This
also changes the default behavoir of go-fuse, which was 64 kiB
writes, but 128 kiB for reads. Now it is 128 kiB for both.

The tests are implemented in the fs package because it's
easier there. They also test MaxReadAhead.

Tested on Linux 4.19.0 and Linux 6.1.7 via all.bash,
and on 6.1.7 also via the gocryptfs test suite.

Supersedes #347

Change-Id: I5a1d4ee91945155c367888da7a90814a24a9ee6e
  • Loading branch information
rfjakob committed Jan 27, 2023
1 parent 915cf54 commit 265a392
Show file tree
Hide file tree
Showing 4 changed files with 315 additions and 9 deletions.
259 changes: 259 additions & 0 deletions fs/maxwrite_test.go
@@ -0,0 +1,259 @@
// Copyright 2022 the Go-FUSE Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package fs

import (
"context"
"fmt"
"io/ioutil"
"strconv"
"strings"
"sync"
"syscall"
"testing"

"golang.org/x/sys/unix"

"github.com/hanwen/go-fuse/v2/fuse"
)

type maxWriteTestRoot struct {
Inode

sync.Mutex
// largest observed read size
largestRead int
// largest observed write size
largestWrite int
}

// https://github.com/torvalds/linux/blob/e2ae0d4a6b0ba461542f0fd0ba0b828658013e9f/include/linux/pagemap.h#L999
const VM_READAHEAD = 131072

var _ = (NodeOnAdder)((*maxWriteTestRoot)(nil))

func (n *maxWriteTestRoot) OnAdd(ctx context.Context) {
n.Inode.AddChild("file", n.Inode.NewInode(ctx, &maxWriteTestNode{maxWriteTestRoot: n}, StableAttr{}), false)
}

func (n *maxWriteTestRoot) resetStats() {
n.Lock()
n.largestWrite = 0
n.largestRead = 0
n.Unlock()
}

type maxWriteTestNode struct {
Inode

maxWriteTestRoot *maxWriteTestRoot
}

var _ = (NodeGetattrer)((*maxWriteTestNode)(nil))

func (n *maxWriteTestNode) Getattr(ctx context.Context, f FileHandle, out *fuse.AttrOut) syscall.Errno {
out.Size = 1024 * 1024 * 1024 // 1 GiB
return 0
}

var _ = (NodeOpener)((*maxWriteTestNode)(nil))

func (n *maxWriteTestNode) Open(ctx context.Context, flags uint32) (fh FileHandle, fuseFlags uint32, errno syscall.Errno) {
return &maxWriteTestFH{n.maxWriteTestRoot}, 0, OK
}

type maxWriteTestFH struct {
maxWriteTestRoot *maxWriteTestRoot
}

var _ = (FileReader)((*maxWriteTestFH)(nil))

func (fh *maxWriteTestFH) Read(ctx context.Context, data []byte, off int64) (fuse.ReadResult, syscall.Errno) {
fh.maxWriteTestRoot.Lock()
if fh.maxWriteTestRoot.largestRead < len(data) {
fh.maxWriteTestRoot.largestRead = len(data)
}
fh.maxWriteTestRoot.Unlock()
return fuse.ReadResultData(data), 0
}

var _ = (FileWriter)((*maxWriteTestFH)(nil))

func (fh *maxWriteTestFH) Write(ctx context.Context, data []byte, off int64) (written uint32, errno syscall.Errno) {
fh.maxWriteTestRoot.Lock()
if fh.maxWriteTestRoot.largestWrite < len(data) {
fh.maxWriteTestRoot.largestWrite = len(data)
}
fh.maxWriteTestRoot.Unlock()
return uint32(len(data)), 0
}

// TestMaxWrite checks that combinations of the MaxWrite, MaxReadAhead, max_read
// options result in the expected observed read and write sizes from the kernel.
func TestMaxWrite(t *testing.T) {
testcases := []fuse.MountOptions{
{
MaxWrite: 4 * 1024, // 4 kiB (one page) = lower limit in all Linux versions
},
{
MaxWrite: 8 * 1024,
},
{
MaxWrite: 9999, // let's see what happens if this is unaligned
},
{
MaxWrite: 64 * 1024, // 64 kiB = go-fuse default
},
{
MaxWrite: 128 * 1024, // 128 kiB = upper limit in Linux v4.19 and older
},
{
MaxWrite: 1024 * 1024, // 1 MiB = upper limit in Linux v4.20+
},
// cycle through readahead values
{
MaxWrite: 128 * 1024,
MaxReadAhead: 4 * 1024,
},
{
MaxWrite: 128 * 1024,
MaxReadAhead: 8 * 1024,
},
{
MaxWrite: 128 * 1024,
MaxReadAhead: 16 * 1024,
},
{
MaxWrite: 128 * 1024,
MaxReadAhead: 32 * 1024,
},
{
MaxWrite: 128 * 1024,
MaxReadAhead: 64 * 1024,
},
{
MaxWrite: 128 * 1024,
MaxReadAhead: 128 * 1024,
},
{
// both at default
},
{
// default MaxWrite
MaxReadAhead: 4 * 1024,
},
}

for _, tc := range testcases {
name := fmt.Sprintf("MaxWr%d.MaxRa%d", tc.MaxWrite, tc.MaxReadAhead)
t.Run(name, func(t *testing.T) {
root := &maxWriteTestRoot{}
root.resetStats()

mntDir, srv, clean := testMount(t, root, &Options{MountOptions: tc})
defer clean()

readAheadWant := tc.MaxReadAhead
if readAheadWant == 0 {
readAheadWant = VM_READAHEAD
}
readAheadHave := bdiReadahead(mntDir)
if readAheadHave != readAheadWant {
t.Errorf("Readahead mismatch: have=bdiReadahead=%d want=%d", readAheadHave, readAheadWant)
}

actualMaxWrite := tc.MaxWrite
if srv.KernelSettings().Flags&fuse.CAP_MAX_PAGES == 0 && actualMaxWrite > 128*1024 {
// Kernel 4.19 and lower don't have CAP_MAX_PAGES and limit to 128 kiB.
actualMaxWrite = 128 * 1024
} else if tc.MaxWrite == 0 {
actualMaxWrite = 128 * 1024
}

// Try to make 2 MiB requests, which is more than the kernel supports, so
// we will observe the imposed limits in the actual request sizes.
buf := make([]byte, 2*1024*1024)

// Direct I/O
fdDirect, err := syscall.Open(mntDir+"/file", syscall.O_RDWR|syscall.O_DIRECT, 0600)
if err != nil {
t.Fatal(err)
}
defer syscall.Close(fdDirect)

_, err = syscall.Pwrite(fdDirect, buf, 0)
if err != nil {
t.Errorf("write failed: %v", err)
}
root.Lock()
if root.largestWrite != actualMaxWrite {
t.Errorf("Direct I/O largestWrite: have=%d, want=%d", root.largestWrite, actualMaxWrite)
}
root.Unlock()

_, err = syscall.Pread(fdDirect, buf, 0)
if err != nil {
t.Errorf("read failed: %v", err)
}
root.Lock()
if root.largestRead != actualMaxWrite {
t.Errorf("Direct I/O largestRead: have=%d, want=%d", root.largestRead, actualMaxWrite)
}
root.Unlock()

root.resetStats()

// Buffered I/O
fdBuffered, err := syscall.Open(mntDir+"/file", syscall.O_RDWR, 0600)
if err != nil {
t.Fatal(err)
}
defer syscall.Close(fdBuffered)

// Buffered read
_, err = syscall.Pread(fdBuffered, buf, 0)
if err != nil {
t.Errorf("read failed: %v", err)
}
root.Lock()
// On Linux 4.19, I get exactly tc.MaxReadAhead, while on 6.0 I also get
// larger reads up to 128 kiB. We log the results but don't expect anything.
t.Logf("Buffered I/O largestRead: have=%d", root.largestRead)
root.Unlock()

// Buffered write
_, err = syscall.Pwrite(fdBuffered, buf, 0)
if err != nil {
t.Errorf("write failed: %v", err)
}
root.Lock()
if root.largestWrite != actualMaxWrite {
t.Errorf("Buffered I/O largestWrite: have=%d, want=%d", root.largestWrite, actualMaxWrite)
}
root.Unlock()
})
}
}

// bdiReadahead extracts the readahead size (in bytes) of the filesystem at mnt from
// /sys/class/bdi/%d:%d/read_ahead_kb .
func bdiReadahead(mnt string) int {
var st syscall.Stat_t
err := syscall.Stat(mnt, &st)
if err != nil {
panic(err)
}
path := fmt.Sprintf("/sys/class/bdi/%d:%d/read_ahead_kb", unix.Major(st.Dev), unix.Minor(st.Dev))
buf, err := ioutil.ReadFile(path)
if err != nil {
panic(err)
}
trimmed := strings.TrimSpace(string(buf))
val, err := strconv.Atoi(trimmed)
if err != nil {
panic(err)
}
return val * 1024
}
38 changes: 34 additions & 4 deletions fuse/api.go
Expand Up @@ -153,12 +153,42 @@ type MountOptions struct {
// async I/O. Concurrency for synchronous I/O is not limited.
MaxBackground int

// Write size to use. If 0, use default. This number is
// capped at the kernel maximum.
// MaxWrite is the max size for read and write requests. If 0, use
// go-fuse default (currently 64 kiB).
// This number is internally capped at MAX_KERNEL_WRITE (higher values don't make
// sense).
//
// Non-direct-io reads are mostly served via kernel readahead, which is
// additionally subject to the MaxReadAhead limit.
//
// Implementation notes:
//
// There's four values the Linux kernel looks at when deciding the request size:
// * MaxWrite, passed via InitOut.MaxWrite. Limits the WRITE size.
// * max_read, passed via a string mount option. Limits the READ size.
// go-fuse sets max_read equal to MaxWrite.
// You can see the current max_read value in /proc/self/mounts .
// * MaxPages, passed via InitOut.MaxPages. In Linux 4.20 and later, the value
// can go up to 1 MiB and go-fuse calculates the MaxPages value acc.
// to MaxWrite, rounding up.
// On older kernels, the value is fixed at 128 kiB and the
// passed value is ignored. No request can be larger than MaxPages, so
// READ and WRITE are effectively capped at MaxPages.
// * MaxReadAhead, passed via InitOut.MaxReadAhead.
MaxWrite int

// Max read ahead to use. If 0, use default. This number is
// capped at the kernel maximum.
// MaxReadAhead is the max read ahead size to use. It controls how much data the
// kernel reads in advance to satisfy future read requests from applications.
// How much exactly is subject to clever heuristics in the kernel
// (see https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/mm/readahead.c?h=v6.2-rc5#n375
// if you are brave) and hence also depends on the kernel version.
//
// If 0, use kernel default. This number is capped at the kernel maximum
// (128 kiB on Linux) and cannot be larger than MaxWrite.
//
// MaxReadAhead only affects buffered reads (=non-direct-io), but even then, the
// kernel can and does send larger reads to satisfy read reqests from applications
// (up to MaxWrite or VM_READAHEAD_PAGES=128 kiB, whichever is less).
MaxReadAhead int

// If IgnoreSecurityLabels is set, all security related xattr
Expand Down
15 changes: 14 additions & 1 deletion fuse/opcode.go
Expand Up @@ -70,6 +70,12 @@ const (
_OP_NOTIFY_DELETE = uint32(104) // protocol version 18

_OPCODE_COUNT = uint32(105)

// Constants from Linux kernel fs/fuse/fuse_i.h
// Default MaxPages value in all kernel versions
_FUSE_DEFAULT_MAX_PAGES_PER_REQ = 32
// Upper MaxPages limit in Linux v4.20+ (v4.19 and older: 32)
_FUSE_MAX_MAX_PAGES = 256
)

////////////////////////////////////////////////////////////////
Expand All @@ -90,7 +96,7 @@ func doInit(server *Server, req *request) {
server.reqMu.Lock()
server.kernelSettings = *input
server.kernelSettings.Flags = input.Flags & (CAP_ASYNC_READ | CAP_BIG_WRITES | CAP_FILE_OPS |
CAP_READDIRPLUS | CAP_NO_OPEN_SUPPORT | CAP_PARALLEL_DIROPS)
CAP_READDIRPLUS | CAP_NO_OPEN_SUPPORT | CAP_PARALLEL_DIROPS | CAP_MAX_PAGES)

if server.opts.EnableLocks {
server.kernelSettings.Flags |= CAP_FLOCK_LOCKS | CAP_POSIX_LOCKS
Expand Down Expand Up @@ -123,6 +129,11 @@ func doInit(server *Server, req *request) {
if input.Minor >= 13 {
server.setSplice()
}

// maxPages is the maximum request size we want the kernel to use, in units of
// memory pages (usually 4kiB). Linux v4.19 and older ignore this and always use
// 128kiB.
maxPages := (server.opts.MaxWrite-1)/syscall.Getpagesize() + 1 // Round up
server.reqMu.Unlock()

out := (*InitOut)(req.outData())
Expand All @@ -134,6 +145,7 @@ func doInit(server *Server, req *request) {
MaxWrite: uint32(server.opts.MaxWrite),
CongestionThreshold: uint16(server.opts.MaxBackground * 3 / 4),
MaxBackground: uint16(server.opts.MaxBackground),
MaxPages: uint16(maxPages),
}

if server.opts.MaxReadAhead != 0 && uint32(server.opts.MaxReadAhead) < out.MaxReadAhead {
Expand Down Expand Up @@ -536,6 +548,7 @@ func getHandler(o uint32) *operationHandler {
return operationHandlers[o]
}

// maximum size of all input headers
var maxInputSize uintptr

func init() {
Expand Down

0 comments on commit 265a392

Please sign in to comment.