Skip to content

Commit

Permalink
Improve system call wrappers
Browse files Browse the repository at this point in the history
This change improves copy_file_range(), sendfile(), splice(), openpty(),
closefrom(), close_range(), fadvise() and posix_fadvise() in addition to
writing tests that confirm things like errno and seeking behavior across
platforms. We now less aggressively polyfill behavior with some of these
functions when the platform support isn't available. Please see:

https://justine.lol/cosmopolitan/functions.html
  • Loading branch information
jart committed Sep 19, 2022
1 parent 224c12f commit c7a8cd2
Show file tree
Hide file tree
Showing 89 changed files with 1,151 additions and 414 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ o/$(MODE): \
libc/disclaimer.inc \
rx:build/bootstrap \
rx:o/third_party/gcc \
/proc/self/status \
/proc/stat \
rw:/dev/null \
w:o/stack.log \
/etc/hosts \
Expand Down
1 change: 0 additions & 1 deletion examples/script.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
#include "libc/errno.h"
#include "libc/fmt/conv.h"
#include "libc/intrin/bswap.h"
#include "libc/intrin/kprintf.h"
#include "libc/log/bsd.h"
#include "libc/macros.internal.h"
#include "libc/mem/mem.h"
Expand Down
93 changes: 93 additions & 0 deletions libc/calls/_ptsname.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
╞══════════════════════════════════════════════════════════════════════════════╡
│ Copyright 2022 Justine Alexandra Roberts Tunney │
│ │
│ Permission to use, copy, modify, and/or distribute this software for │
│ any purpose with or without fee is hereby granted, provided that the │
│ above copyright notice and this permission notice appear in all copies. │
│ │
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
│ PERFORMANCE OF THIS SOFTWARE. │
╚─────────────────────────────────────────────────────────────────────────────*/
#include "libc/calls/syscall-sysv.internal.h"
#include "libc/calls/syscall_support-sysv.internal.h"
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/fmt/itoa.h"
#include "libc/str/str.h"
#include "libc/sysv/consts/termios.h"
#include "libc/sysv/errfuns.h"

extern const unsigned FIODGNAME;
extern const unsigned TIOCPTSNAME;
extern const unsigned TIOCPTYGNAME;

struct fiodgname_arg {
int len;
void *buf;
};

struct ptmget {
int cfd;
int sfd;
char cn[1024];
char sn[1024];
};

int _ptsname(int fd, char *buf, size_t size) {
int pty;
size_t n;
struct ptmget t;

if (_isptmaster(fd)) {
return -1;
}

t.sn[0] = '/';
t.sn[1] = 'd';
t.sn[2] = 'e';
t.sn[3] = 'v';
t.sn[4] = '/';
t.sn[5] = 0;

if (IsLinux()) {
if (sys_ioctl(fd, TIOCGPTN, &pty)) return -1;
t.sn[5] = 'p';
t.sn[6] = 't';
t.sn[7] = 's';
t.sn[8] = '/';
FormatInt32(t.sn + 9, pty);
} else if (IsXnu()) {
if (sys_ioctl(fd, TIOCPTYGNAME, t.sn)) {
return -1;
}
} else if (IsFreebsd()) {
struct fiodgname_arg fgn = {sizeof(t.sn) - 5, t.sn + 5};
if (sys_ioctl(fd, FIODGNAME, &fgn) == -1) {
if (errno == EINVAL) {
errno = ERANGE;
}
return -1;
}
} else if (IsNetbsd()) {
if (sys_ioctl(fd, TIOCPTSNAME, &t)) {
return -1;
}
} else {
return enosys();
}

if ((n = strlen(t.sn)) < size) {
memcpy(buf, t.sn, n + 1);
return 0;
} else {
return erange();
}
}
1 change: 1 addition & 0 deletions libc/calls/calls.mk
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ o/$(MODE)/libc/calls/ntcontext2linux.o: private \

# we always want -O3 because:
# it makes the code size smaller too
o/$(MODE)/libc/calls/termios2host.o \
o/$(MODE)/libc/calls/sigenter-freebsd.o \
o/$(MODE)/libc/calls/sigenter-netbsd.o \
o/$(MODE)/libc/calls/sigenter-openbsd.o \
Expand Down
36 changes: 15 additions & 21 deletions libc/calls/close_range.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@
│ PERFORMANCE OF THIS SOFTWARE. │
╚─────────────────────────────────────────────────────────────────────────────*/
#include "libc/calls/calls.h"
#include "libc/intrin/strace.internal.h"
#include "libc/calls/syscall-sysv.internal.h"
#include "libc/errno.h"
#include "libc/limits.h"
#include "libc/dce.h"
#include "libc/intrin/strace.internal.h"
#include "libc/sysv/errfuns.h"

/**
* Closes inclusive range of file descriptors, e.g.
Expand All @@ -32,33 +32,27 @@
* }
* }
*
* This is supported on Linux 5.9+, FreeBSD, and OpenBSD. On FreeBSD,
* `flags` must be zero. On OpenBSD, we call closefrom(int) so `last`
* should be `-1` in order to get OpenBSD support, otherwise `ENOSYS`
* will be returned. We also polyfill closefrom on FreeBSD since it's
* available on older kernels.
* The following flags are available:
*
* On Linux, the following flags are supported:
* - `CLOSE_RANGE_UNSHARE` (Linux-only)
* - `CLOSE_RANGE_CLOEXEC` (Linux-only)
*
* - CLOSE_RANGE_UNSHARE
* - CLOSE_RANGE_CLOEXEC
* This is only supported on Linux 5.9+ and FreeBSD 13+. Consider using
* closefrom() which will work on OpenBSD too.
*
* @return 0 on success, or -1 w/ errno
* @error ENOSYS if not Linux 5.9+ / FreeBSD / OpenBSD
* @error EBADF on OpenBSD if `first` is greater than highest fd
* @error EINVAL if flags are bad or first is greater than last
* @error EMFILE if a weird race condition happens on Linux
* @error EINTR possibly on OpenBSD
* @error ENOSYS if not Linux 5.9+ or FreeBSD 13+
* @error ENOMEM on Linux maybe
* @see closefrom()
*/
int close_range(unsigned int first, unsigned int last, unsigned int flags) {
int rc, err;
err = errno;
if ((rc = sys_close_range(first, last, flags)) == -1) {
if (errno == ENOSYS && first <= INT_MAX && last == UINT_MAX && !flags) {
errno = err;
rc = sys_closefrom(first);
}
int rc;
if (IsLinux() || IsFreebsd()) {
rc = sys_close_range(first, last, flags);
} else {
rc = enosys();
}
STRACE("close_range(%d, %d, %#x) → %d% m", first, last, flags, rc);
return rc;
Expand Down
26 changes: 14 additions & 12 deletions libc/calls/closefrom.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@
│ PERFORMANCE OF THIS SOFTWARE. │
╚─────────────────────────────────────────────────────────────────────────────*/
#include "libc/calls/calls.h"
#include "libc/intrin/strace.internal.h"
#include "libc/calls/syscall-sysv.internal.h"
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/intrin/strace.internal.h"
#include "libc/limits.h"
#include "libc/sysv/errfuns.h"

Expand All @@ -34,26 +35,27 @@
* }
*
* @return 0 on success, or -1 w/ errno
* @error ENOSYS if not Linux 5.9+ / FreeBSD / OpenBSD
* @error EBADF if `first` is negative
* @error EBADF on OpenBSD if `first` is greater than highest fd
* @error EINVAL if flags are bad or first is greater than last
* @error EMFILE if a weird race condition happens on Linux
* @error ENOSYS if not Linux 5.9+, FreeBSD 8+, or OpenBSD
* @error EINTR possibly on OpenBSD
* @error ENOMEM on Linux maybe
* @note supported on Linux 5.9+, FreeBSD 8+, and OpenBSD
*/
int closefrom(int first) {
int rc, err;
if (first >= 0) {
err = errno;
if ((rc = sys_close_range(first, -1, 0)) == -1) {
if (errno == ENOSYS) {
errno = err;
rc = sys_closefrom(first);
}
}
} else {
if (IsNetbsd() || IsWindows() || IsMetal()) {
rc = enosys();
} else if (first < 0) {
// consistent with openbsd
// freebsd allows this but it's dangerous
// necessary on linux due to type signature
rc = ebadf();
} else if (IsLinux()) {
rc = sys_close_range(first, 0xffffffffu, 0);
} else {
rc = sys_closefrom(first);
}
STRACE("closefrom(%d) → %d% m", first, rc);
return rc;
Expand Down
115 changes: 115 additions & 0 deletions libc/calls/copy_file_range.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
│vi: set net ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi│
╞══════════════════════════════════════════════════════════════════════════════╡
│ Copyright 2022 Justine Alexandra Roberts Tunney │
│ │
│ Permission to use, copy, modify, and/or distribute this software for │
│ any purpose with or without fee is hereby granted, provided that the │
│ above copyright notice and this permission notice appear in all copies. │
│ │
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
│ PERFORMANCE OF THIS SOFTWARE. │
╚─────────────────────────────────────────────────────────────────────────────*/
#include "libc/calls/calls.h"
#include "libc/calls/struct/sigset.h"
#include "libc/calls/struct/sigset.internal.h"
#include "libc/calls/syscall-sysv.internal.h"
#include "libc/dce.h"
#include "libc/errno.h"
#include "libc/intrin/asan.internal.h"
#include "libc/intrin/describeflags.internal.h"
#include "libc/intrin/strace.internal.h"
#include "libc/sysv/consts/sig.h"
#include "libc/sysv/errfuns.h"
#include "libc/thread/thread.h"

static struct CopyFileRange {
pthread_once_t once;
bool ok;
} g_copy_file_range;

static bool HasCopyFileRange(void) {
bool ok;
int e, rc;
e = errno;
if (IsLinux()) {
// We modernize our detection by a few years for simplicity.
// This system call is chosen since it's listed by pledge().
// https://www.cygwin.com/bugzilla/show_bug.cgi?id=26338
ok = sys_close_range(-1, -2, 0) == -1 && errno == EINVAL;
} else if (IsFreebsd()) {
ok = sys_copy_file_range(-1, 0, -1, 0, 0, 0) == -1 && errno == EBADF;
} else {
ok = false;
}
errno = e;
return ok;
}

static void copy_file_range_init(void) {
g_copy_file_range.ok = HasCopyFileRange();
}

/**
* Transfers data between files.
*
* If this system call is available (Linux c. 2018 or FreeBSD c. 2021)
* and the file system supports it (e.g. ext4) and the source and dest
* files are on the same file system, then this system call shall make
* copies go about 2x faster.
*
* This implementation requires Linux 5.9+ even though the system call
* was introduced in Linux 4.5. That's to ensure ENOSYS works reliably
* due to a faulty backport, that happened in RHEL7. FreeBSD detection
* on the other hand will work fine.
*
* @param infd is source file, which should be on same file system
* @param opt_in_out_inoffset may be specified for pread() behavior
* @param outfd should be a writable file, but not `O_APPEND`
* @param opt_in_out_outoffset may be specified for pwrite() behavior
* @param uptobytes is maximum number of bytes to transfer
* @param flags is reserved for future use and must be zero
* @return number of bytes transferred, or -1 w/ errno
* @raise EXDEV if source and destination are on different filesystems
* @raise EBADF if `infd` or `outfd` aren't open files or append-only
* @raise EPERM if `fdout` refers to an immutable file on Linux
* @raise EINVAL if ranges overlap or `flags` is non-zero
* @raise EFBIG if `setrlimit(RLIMIT_FSIZE)` is exceeded
* @raise EFAULT if one of the pointers memory is bad
* @raise ERANGE if overflow happens computing ranges
* @raise ENOSPC if file system has run out of space
* @raise ETXTBSY if source or dest is a swap file
* @raise EINTR if a signal was delivered instead
* @raise EISDIR if source or dest is a directory
* @raise ENOSYS if not Linux 5.9+ or FreeBSD 13+
* @raise EIO if a low-level i/o error happens
* @see sendfile() for seekable → socket
* @see splice() for fd ↔ pipe
*/
ssize_t copy_file_range(int infd, int64_t *opt_in_out_inoffset, int outfd,
int64_t *opt_in_out_outoffset, size_t uptobytes,
uint32_t flags) {
ssize_t rc;
pthread_once(&g_copy_file_range.once, copy_file_range_init);
if (!g_copy_file_range.ok) {
rc = enosys();
} else if (IsAsan() && ((opt_in_out_inoffset &&
!__asan_is_valid(opt_in_out_inoffset, 8)) ||
(opt_in_out_outoffset &&
!__asan_is_valid(opt_in_out_outoffset, 8)))) {
rc = efault();
} else {
rc = sys_copy_file_range(infd, opt_in_out_inoffset, outfd,
opt_in_out_outoffset, uptobytes, flags);
}
STRACE("copy_file_range(%d, %s, %d, %s, %'zu, %#x) → %'ld% m", infd,
DescribeInOutInt64(rc, opt_in_out_inoffset), outfd,
DescribeInOutInt64(rc, opt_in_out_outoffset), uptobytes, flags);
return rc;
}
6 changes: 6 additions & 0 deletions libc/calls/fadvise-nt.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "libc/calls/syscall_support-nt.internal.h"
#include "libc/nt/createfile.h"
#include "libc/nt/enum/fileflagandattributes.h"
#include "libc/nt/enum/filetype.h"
#include "libc/nt/files.h"
#include "libc/nt/runtime.h"
#include "libc/sysv/consts/madv.h"
Expand All @@ -33,6 +34,7 @@ textwindows int sys_fadvise_nt(int fd, uint64_t offset, uint64_t len,
int rc, flags, mode;
uint32_t perm, share, attr;

if ((int64_t)len < 0) return einval();
if (!__isfdkind(fd, kFdFile)) return ebadf();
h1 = g_fds.p[fd].handle;
mode = g_fds.p[fd].mode;
Expand All @@ -57,6 +59,10 @@ textwindows int sys_fadvise_nt(int fd, uint64_t offset, uint64_t len,
return -1;
}

if (GetFileType(h1) == kNtFileTypePipe) {
return espipe();
}

// MSDN says only these are allowed, otherwise it returns EINVAL.
attr &= kNtFileFlagBackupSemantics | kNtFileFlagDeleteOnClose |
kNtFileFlagNoBuffering | kNtFileFlagOpenNoRecall |
Expand Down
Loading

0 comments on commit c7a8cd2

Please sign in to comment.