Skip to content

Commit

Permalink
fix request size too large when use direct io
Browse files Browse the repository at this point in the history
The fuse directio call path is as follows

fuse_read_fill+0xa8/0xb0
fuse_send_read+0x3f/0xb0
fuse_direct_io+0x34a/0x5f0
__fuse_direct_read+0x4e/0x70
fuse_file_read_iter+0x9e/0x140
new_sync_read+0xde/0x120
__vfs_read+0x27/0x40
vfs_read+0x94/0x190
ksys_read+0x4e/0xd0

under the direct io path, fuse initiates a request whose request size is determined
by the combination of the user-supplied buffer size and max_read mount parameters.

size_t nmax = write ? fc->max_write : fc->max_read;
size_t count = iov_iter_count(iter);
size_t nbytes = min(count, nmax);
nres = fuse_send_read(req, io, pos, nbytes, owner);

so we have a problem with the checking of the request size in the code, we always
compare the request size with MAX_BUFFER_SIZE, but in fact the maximum value of the
request size depends on max_read, in virtiofs scenario max_read is UINT_MAX by default,
and in fuse scenario it is possible to adjust max_read by the mounting parameter.

the current implementation of fuse-backend-rs uses a fixed buffer to store the fuse response
the default value of this buffer is as follows, but in fact, the kernel in the direct io path,
the size of the request may be larger than the length of this buffer, which leads to the buffer
is not enough to fill the read content, resulting in read failure. so here we limit the size of
max_read to the length of our buffer so that the fuse kernel will not send requests that exceed
the length of the buffer. in virtiofs scene max_read can't be adjusted, his default is UINT_MAX,
but we don't have to worry about it, because the buffer is allocated by the kernel driver, we
just use this buffer to fill the response, so we don't need to do any adjustment.

Signed-off-by: zyfjeff <zyfjeff@linux.alibaba.com>
  • Loading branch information
zyfjeff authored and eryugey committed Apr 11, 2024
1 parent 68df7ba commit de3231b
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 26 deletions.
6 changes: 0 additions & 6 deletions src/api/server/async_io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -322,12 +322,6 @@ impl<F: AsyncFileSystem + Sync> Server<F> {
..
} = ctx.r.read_obj().map_err(Error::DecodeMessage)?;

if size > MAX_BUFFER_SIZE {
return ctx
.async_reply_error_explicit(io::Error::from_raw_os_error(libc::ENOMEM))
.await;
}

let owner = if read_flags & READ_LOCKOWNER != 0 {
Some(lock_owner)
} else {
Expand Down
19 changes: 0 additions & 19 deletions src/api/server/sync_io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -436,10 +436,6 @@ impl<F: FileSystem + Sync> Server<F> {
..
} = ctx.r.read_obj().map_err(Error::DecodeMessage)?;

if size > MAX_BUFFER_SIZE {
return ctx.reply_error_explicit(io::Error::from_raw_os_error(libc::ENOMEM));
}

let owner = if read_flags & READ_LOCKOWNER != 0 {
Some(lock_owner)
} else {
Expand Down Expand Up @@ -495,10 +491,6 @@ impl<F: FileSystem + Sync> Server<F> {
..
} = ctx.r.read_obj().map_err(Error::DecodeMessage)?;

if size > MAX_BUFFER_SIZE {
return ctx.reply_error_explicit(io::Error::from_raw_os_error(libc::ENOMEM));
}

let owner = if fuse_flags & WRITE_LOCKOWNER != 0 {
Some(lock_owner)
} else {
Expand Down Expand Up @@ -618,9 +610,6 @@ impl<F: FileSystem + Sync> Server<F> {

pub(super) fn getxattr<S: BitmapSlice>(&self, mut ctx: SrvContext<'_, F, S>) -> Result<usize> {
let GetxattrIn { size, .. } = ctx.r.read_obj().map_err(Error::DecodeMessage)?;
if size > MAX_BUFFER_SIZE {
return ctx.reply_error_explicit(io::Error::from_raw_os_error(libc::ENOMEM));
}

let buf =
ServerUtil::get_message_body(&mut ctx.r, &ctx.in_header, size_of::<GetxattrIn>())?;
Expand All @@ -647,10 +636,6 @@ impl<F: FileSystem + Sync> Server<F> {
pub(super) fn listxattr<S: BitmapSlice>(&self, mut ctx: SrvContext<'_, F, S>) -> Result<usize> {
let GetxattrIn { size, .. } = ctx.r.read_obj().map_err(Error::DecodeMessage)?;

if size > MAX_BUFFER_SIZE {
return ctx.reply_error_explicit(io::Error::from_raw_os_error(libc::ENOMEM));
}

match self.fs.listxattr(ctx.context(), ctx.nodeid(), size) {
Ok(ListxattrReply::Names(val)) => ctx.reply_ok(None::<u8>, Some(&val)),
Ok(ListxattrReply::Count(count)) => {
Expand Down Expand Up @@ -820,10 +805,6 @@ impl<F: FileSystem + Sync> Server<F> {
fh, offset, size, ..
} = ctx.r.read_obj().map_err(Error::DecodeMessage)?;

if size > MAX_BUFFER_SIZE {
return ctx.reply_error_explicit(io::Error::from_raw_os_error(libc::ENOMEM));
}

let available_bytes = ctx.w.available_bytes();
if available_bytes < size as usize {
return ctx.reply_error_explicit(io::Error::from_raw_os_error(libc::ENOMEM));
Expand Down
15 changes: 14 additions & 1 deletion src/transport/fusedev/linux_session.rs
Original file line number Diff line number Diff line change
Expand Up @@ -434,12 +434,25 @@ fn fuse_kern_mount(
let meta = mountpoint
.metadata()
.map_err(|e| SessionFailure(format!("stat {mountpoint:?}: {e}")))?;
// the current implementation of fuse-backend-rs uses a fixed buffer to store the fuse response,
// the default value of this buffer is as follows, but in fact, the kernel in the direct io path,
// the size of the request may be larger than the length of this buffer (this is determined by
// the max_read option to determine the maximum size of kernel requests, the default value is
// a very large number), which leads to the buffer is not enough to fill the read content,
// resulting in read failure. so here we limit the size of max_read to the length of our buffer,
// so that the fuse kernel will not send requests that exceed the length of the buffer.
// in virtiofs scene max_read can't be adjusted, his default is UINT_MAX, but we don't have to
// worry about it, because the buffer is allocated by the kernel driver, we just use this buffer
// to fill the response, so we don't need to do any adjustment.
let max_read = FUSE_KERN_BUF_PAGES * pagesize() + FUSE_HEADER_SIZE;

let mut opts = format!(
"default_permissions,fd={},rootmode={:o},user_id={},group_id={}",
"default_permissions,fd={},rootmode={:o},user_id={},group_id={},max_read={}",
file.as_raw_fd(),
meta.permissions().mode() & libc::S_IFMT,
getuid(),
getgid(),
max_read
);
if allow_other {
opts.push_str(",allow_other");
Expand Down

0 comments on commit de3231b

Please sign in to comment.