From 93ed838b9621e6492644f1a6c94827309e291318 Mon Sep 17 00:00:00 2001 From: Patrick Mooney Date: Wed, 9 Dec 2015 19:41:20 +0000 Subject: [PATCH] OS-4830 lxbrand convert select/poll to IKE Reviewed by: Jerry Jelinek Reviewed by: Robert Mustacchi --- usr/src/common/brand/lx/lx_signum.h | 28 + usr/src/lib/brand/lx/lx_brand/Makefile.com | 1 - .../lib/brand/lx/lx_brand/common/lx_brand.c | 16 +- .../brand/lx/lx_brand/common/poll_select.c | 289 ------- usr/src/lib/brand/lx/lx_brand/common/signal.c | 255 ------- usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h | 26 - .../lib/brand/lx/lx_brand/sys/lx_syscall.h | 5 - usr/src/uts/common/brand/lx/os/lx_signal.c | 50 ++ usr/src/uts/common/brand/lx/os/lx_syscall.c | 16 +- usr/src/uts/common/brand/lx/sys/lx_signal.h | 32 + usr/src/uts/common/brand/lx/sys/lx_syscalls.h | 4 + usr/src/uts/common/brand/lx/syscall/lx_poll.c | 714 ++++++++++++++++++ usr/src/uts/common/syscall/poll.c | 325 ++++---- usr/src/uts/intel/Makefile.files | 2 + 14 files changed, 1019 insertions(+), 744 deletions(-) delete mode 100644 usr/src/lib/brand/lx/lx_brand/common/poll_select.c create mode 100644 usr/src/uts/common/brand/lx/os/lx_signal.c create mode 100644 usr/src/uts/common/brand/lx/sys/lx_signal.h create mode 100644 usr/src/uts/common/brand/lx/syscall/lx_poll.c diff --git a/usr/src/common/brand/lx/lx_signum.h b/usr/src/common/brand/lx/lx_signum.h index 4db2baa5f5f9..b6c5f32731bb 100644 --- a/usr/src/common/brand/lx/lx_signum.h +++ b/usr/src/common/brand/lx/lx_signum.h @@ -79,6 +79,34 @@ extern int lx_ltos_signo(int, int); extern int lx_stol_status(int, int); extern int lx_stol_sigcode(int); +/* + * NOTE: Linux uses different definitions for 'sigset_t's and 'sigaction_t's + * depending on whether the definition is for user space or the kernel. + * + * The definitions below MUST correspond to the Linux kernel versions, + * as glibc will do the necessary translation from the Linux user + * versions. + */ +#if defined(_LP64) +#define LX_NSIG_WORDS 1 +#define LX_WSHIFT 6 +#elif defined(_ILP32) +#define LX_NSIG_WORDS 2 +#define LX_WSHIFT 5 +#else +#error "LX only supports LP64 and ILP32" +#endif + +typedef struct { + ulong_t __bits[LX_NSIG_WORDS]; +} lx_sigset_t; + +#define LX_NBITS (sizeof (ulong_t) * NBBY) +#define lx_sigmask(n) (1UL << (((n) - 1) % LX_NBITS)) +#define lx_sigword(n) (((ulong_t)((n) - 1)) >> LX_WSHIFT) +#define lx_sigismember(s, n) (lx_sigmask(n) & (s)->__bits[lx_sigword(n)]) +#define lx_sigaddset(s, n) ((s)->__bits[lx_sigword(n)] |= lx_sigmask(n)) + #ifdef __cplusplus } #endif diff --git a/usr/src/lib/brand/lx/lx_brand/Makefile.com b/usr/src/lib/brand/lx/lx_brand/Makefile.com index c6c19cde69f3..f199626f70fe 100644 --- a/usr/src/lib/brand/lx/lx_brand/Makefile.com +++ b/usr/src/lib/brand/lx/lx_brand/Makefile.com @@ -46,7 +46,6 @@ COBJS = aio.o \ mount.o \ mount_nfs.o \ pgrp.o \ - poll_select.o \ priority.o \ ptrace.o \ sched.o \ diff --git a/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c b/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c index 701123a0f734..ecb922675135 100644 --- a/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c +++ b/usr/src/lib/brand/lx/lx_brand/common/lx_brand.c @@ -947,7 +947,7 @@ static lx_syscall_handler_t lx_handlers[] = { lx_stat64, /* 4: stat */ lx_fstat64, /* 5: fstat */ lx_lstat64, /* 6: lstat */ - lx_poll, /* 7: poll */ + NULL, /* 7: poll */ lx_lseek, /* 8: lseek */ lx_mmap, /* 9: mmap */ lx_mprotect, /* 10: mprotect */ @@ -963,7 +963,7 @@ static lx_syscall_handler_t lx_handlers[] = { NULL, /* 20: writev */ lx_access, /* 21: access */ NULL, /* 22: pipe */ - lx_select, /* 23: select */ + NULL, /* 23: select */ NULL, /* 24: sched_yield */ lx_remap, /* 25: mremap */ lx_msync, /* 26: msync */ @@ -1210,8 +1210,8 @@ static lx_syscall_handler_t lx_handlers[] = { lx_readlinkat, /* 267: readlinkat */ NULL, /* 268: fchmodat */ lx_faccessat, /* 269: faccessat */ - lx_pselect6, /* 270: pselect6 */ - lx_ppoll, /* 271: ppoll */ + NULL, /* 270: pselect6 */ + NULL, /* 271: ppoll */ NULL, /* 272: unshare */ NULL, /* 273: set_robust_list */ NULL, /* 274: get_robust_list */ @@ -1413,7 +1413,7 @@ static lx_syscall_handler_t lx_handlers[] = { lx_setfsgid16, /* 139: setfsgid16 */ lx_llseek, /* 140: llseek */ NULL, /* 141: getdents */ - lx_select, /* 142: select */ + NULL, /* 142: select */ lx_flock, /* 143: flock */ lx_msync, /* 144: msync */ NULL, /* 145: readv */ @@ -1439,7 +1439,7 @@ static lx_syscall_handler_t lx_handlers[] = { lx_getresuid16, /* 165: getresuid16 */ NULL, /* 166: vm86 */ lx_query_module, /* 167: query_module */ - lx_poll, /* 168: poll */ + NULL, /* 168: poll */ NULL, /* 169: nfsservctl */ NULL, /* 170: setresgid16 */ lx_getresgid16, /* 171: getresgid16 */ @@ -1579,8 +1579,8 @@ static lx_syscall_handler_t lx_handlers[] = { lx_readlinkat, /* 305: readlinkat */ NULL, /* 306: fchmodat */ lx_faccessat, /* 307: faccessat */ - lx_pselect6, /* 308: pselect6 */ - lx_ppoll, /* 309: ppoll */ + NULL, /* 308: pselect6 */ + NULL, /* 309: ppoll */ NULL, /* 310: unshare */ NULL, /* 311: set_robust_list */ NULL, /* 312: get_robust_list */ diff --git a/usr/src/lib/brand/lx/lx_brand/common/poll_select.c b/usr/src/lib/brand/lx/lx_brand/common/poll_select.c deleted file mode 100644 index 793ff3bba6ec..000000000000 --- a/usr/src/lib/brand/lx/lx_brand/common/poll_select.c +++ /dev/null @@ -1,289 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * Copyright (c) 2015, Joyent, Inc. All rights reserved. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if defined(_ILP32) -extern int select_large_fdset(int nfds, fd_set *in0, fd_set *out0, fd_set *ex0, - struct timeval *tv); -#endif - -long -lx_select(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, - uintptr_t p5) -{ - int nfds = (int)p1; - fd_set *rfdsp = NULL; - fd_set *wfdsp = NULL; - fd_set *efdsp = NULL; - struct timeval tv, *tvp = NULL; - int fd_set_len = howmany(nfds, 8); - int r; - int res; - hrtime_t start = NULL, end; - - lx_debug("\tselect(%d, 0x%p, 0x%p, 0x%p, 0x%p)", p1, p2, p3, p4, p5); - - if (nfds > 0) { - if (p2 != NULL) { - rfdsp = malloc(fd_set_len); - if (rfdsp == NULL) { - res = -ENOMEM; - goto err; - } - if (uucopy((void *)p2, rfdsp, fd_set_len) != 0) { - res = -errno; - goto err; - } - } - if (p3 != NULL) { - wfdsp = malloc(fd_set_len); - if (wfdsp == NULL) { - res = -ENOMEM; - goto err; - } - if (uucopy((void *)p3, wfdsp, fd_set_len) != 0) { - res = -errno; - goto err; - } - } - if (p4 != NULL) { - efdsp = malloc(fd_set_len); - if (efdsp == NULL) { - res = -ENOMEM; - goto err; - } - if (uucopy((void *)p4, efdsp, fd_set_len) != 0) { - res = -errno; - goto err; - } - } - } - if (p5 != NULL) { - tvp = &tv; - if (uucopy((void *)p5, &tv, sizeof (tv)) != 0) { - res = -errno; - goto err; - } - start = gethrtime(); - } - -#if defined(_LP64) - r = select(nfds, rfdsp, wfdsp, efdsp, tvp); -#else - if (nfds >= FD_SETSIZE) - r = select_large_fdset(nfds, rfdsp, wfdsp, efdsp, tvp); - else - r = select(nfds, rfdsp, wfdsp, efdsp, tvp); -#endif - if (r < 0) { - res = -errno; - goto err; - } - - if (tvp != NULL) { - long long tv_total; - - /* - * Linux updates the timeval parameter for select() calls - * with the amount of time that left before the select - * would have timed out. - */ - end = gethrtime(); - tv_total = (tv.tv_sec * MICROSEC) + tv.tv_usec; - tv_total -= ((end - start) / (NANOSEC / MICROSEC)); - if (tv_total < 0) { - tv.tv_sec = 0; - tv.tv_usec = 0; - } else { - tv.tv_sec = tv_total / MICROSEC; - tv.tv_usec = tv_total % MICROSEC; - } - - if (uucopy(&tv, (void *)p5, sizeof (tv)) != 0) { - res = -errno; - goto err; - } - } - - if ((rfdsp != NULL) && (uucopy(rfdsp, (void *)p2, fd_set_len) != 0)) { - res = -errno; - goto err; - } - if ((wfdsp != NULL) && (uucopy(wfdsp, (void *)p3, fd_set_len) != 0)) { - res = -errno; - goto err; - } - if ((efdsp != NULL) && (uucopy(efdsp, (void *)p4, fd_set_len) != 0)) { - res = -errno; - goto err; - } - - res = r; - -err: - if (rfdsp != NULL) - free(rfdsp); - if (wfdsp != NULL) - free(wfdsp); - if (efdsp != NULL) - free(efdsp); - return (res); -} - -long -lx_poll(uintptr_t p1, uintptr_t p2, uintptr_t p3) -{ - struct pollfd *lfds = NULL; - struct pollfd *sfds = NULL; - nfds_t nfds = (nfds_t)p2; - int fds_size, i, rval, revents, res; - - /* - * Little emulation is needed if nfds == 0. - * If p1 happens to be NULL, it'll be dealt with later. - */ - if (nfds == 0) { - if ((rval = poll(NULL, 0, (int)p3)) < 0) - return (-errno); - - return (rval); - } - - /* - * Note: we are assuming that the Linux and Illumos pollfd - * structures are identical. Copy in the linux poll structure. - */ - fds_size = sizeof (struct pollfd) * nfds; - lfds = (struct pollfd *)malloc(fds_size); - if (lfds == NULL) { - res = -ENOMEM; - goto err; - } - if (uucopy((void *)p1, lfds, fds_size) != 0) { - res = -errno; - goto err; - } - - /* - * The poll system call modifies the poll structures passed in - * so we'll need to make an extra copy of them. - */ - sfds = (struct pollfd *)malloc(fds_size); - if (sfds == NULL) { - res = -ENOMEM; - goto err; - } - - /* Convert the Linux events bitmask into the Illumos equivalent. */ - for (i = 0; i < nfds; i++) { - /* - * If the caller is polling for an unsupported event, we - * have to bail out. - */ - if (lfds[i].events & ~LX_POLL_SUPPORTED_EVENTS) { - lx_unsupported("unsupported poll events requested: " - "events=0x%x", lfds[i].events); - res = -ENOTSUP; - goto err; - } - - sfds[i].fd = lfds[i].fd; - sfds[i].events = lfds[i].events & LX_POLL_COMMON_EVENTS; - if (lfds[i].events & LX_POLLWRNORM) - sfds[i].events |= POLLWRNORM; - if (lfds[i].events & LX_POLLWRBAND) - sfds[i].events |= POLLWRBAND; - if (lfds[i].events & LX_POLLRDHUP) - sfds[i].events |= POLLRDHUP; - sfds[i].revents = 0; - } - - lx_debug("\tpoll(0x%p, %u, %d)", sfds, nfds, (int)p3); - - if ((rval = poll(sfds, nfds, (int)p3)) < 0) { - res = -errno; - goto err; - } - - /* Convert the Illumos revents bitmask into the Linux equivalent */ - for (i = 0; i < nfds; i++) { - revents = sfds[i].revents & LX_POLL_COMMON_EVENTS; - if (sfds[i].revents & POLLWRBAND) - revents |= LX_POLLWRBAND; - if (sfds[i].revents & POLLRDHUP) - revents |= LX_POLLRDHUP; - - /* - * Be careful because on Illumos POLLOUT and POLLWRNORM - * are defined to the same values but on Linux they - * are not. - */ - if (sfds[i].revents & POLLOUT) { - if ((lfds[i].events & LX_POLLOUT) == 0) - revents &= ~LX_POLLOUT; - if (lfds[i].events & LX_POLLWRNORM) - revents |= LX_POLLWRNORM; - } - - lfds[i].revents = revents; - } - - /* Copy out the results */ - if (uucopy(lfds, (void *)p1, fds_size) != 0) { - res = -errno; - goto err; - } - - res = rval; - -err: - if (lfds != NULL) - free(lfds); - if (sfds != NULL) - free(sfds); - return (res); -} diff --git a/usr/src/lib/brand/lx/lx_brand/common/signal.c b/usr/src/lib/brand/lx/lx_brand/common/signal.c index 383b1d36ae51..80f1bf09a1fd 100644 --- a/usr/src/lib/brand/lx/lx_brand/common/signal.c +++ b/usr/src/lib/brand/lx/lx_brand/common/signal.c @@ -315,12 +315,6 @@ static int lx_no_abort_handler = 0; static void lx_sigdeliver(int, siginfo_t *, ucontext_t *, size_t, void (*)(), void (*)(), struct lx_sigaction *); -/* - * Cache result of process.max-file-descriptor to avoid calling getrctl() - * for each lx_ppoll(). - */ -static rlim_t maxfd = 0; - /* * stol_stack() and ltos_stack() convert between Illumos and Linux stack_t * structures. @@ -2261,255 +2255,6 @@ lx_siginit(void) return (0); } -/* - * This code strongly resembles lx_poll(), but is here to be able to take - * advantage of the Linux signal helper routines. - */ -long -lx_ppoll(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, uintptr_t p5) -{ - struct pollfd *lfds, *sfds; - nfds_t nfds = (nfds_t)p2; - timespec_t ts, *tsp = NULL; - int fds_size, i, rval, revents; - lx_sigset_t lxsig, *lxsigp = NULL; - sigset_t sigset, *sp = NULL; - rctlblk_t *rblk; - - lx_debug("\tppoll(0x%p, %d, 0x%p, 0x%p, %d)", p1, p2, p3, p4, p5); - - if (p3 != NULL) { - if (uucopy((void *)p3, &ts, sizeof (ts)) != 0) - return (-errno); - - tsp = &ts; - } - - if (p4 != NULL) { - if (uucopy((void *)p4, &lxsig, sizeof (lxsig)) != 0) - return (-errno); - - lxsigp = &lxsig; - if ((size_t)p5 != sizeof (lx_sigset_t)) - return (-EINVAL); - - if (lxsigp) { - if ((rval = ltos_sigset(lxsigp, &sigset)) != 0) - return (rval); - - sp = &sigset; - } - } - - /* - * Deal with the NULL fds[] case. - */ - if (nfds == 0 || p1 == NULL) { - if ((rval = ppoll(NULL, 0, tsp, sp)) < 0) - return (-errno); - - return (rval); - } - - if (maxfd == 0) { - if ((rblk = (rctlblk_t *)SAFE_ALLOCA(rctlblk_size())) == NULL) - return (-ENOMEM); - - if (getrctl("process.max-file-descriptor", NULL, rblk, - RCTL_FIRST) == -1) - return (-EINVAL); - - maxfd = rctlblk_get_value(rblk); - } - - if (nfds > maxfd) - return (-EINVAL); - - /* - * Note: we are assuming that the Linux and Illumos pollfd - * structures are identical. Copy in the Linux poll structure. - */ - fds_size = sizeof (struct pollfd) * nfds; - lfds = (struct pollfd *)SAFE_ALLOCA(fds_size); - if (lfds == NULL) - return (-ENOMEM); - if (uucopy((void *)p1, lfds, fds_size) != 0) - return (-errno); - - /* - * The poll system call modifies the poll structures passed in - * so we'll need to make an extra copy of them. - */ - sfds = (struct pollfd *)SAFE_ALLOCA(fds_size); - if (sfds == NULL) - return (-ENOMEM); - - /* Convert the Linux events bitmask into the Illumos equivalent. */ - for (i = 0; i < nfds; i++) { - /* - * If the caller is polling for an unsupported event, we - * have to bail out. - */ - if (lfds[i].events & ~LX_POLL_SUPPORTED_EVENTS) { - lx_unsupported("unsupported poll events requested: " - "events=0x%x", lfds[i].events); - return (-ENOTSUP); - } - - sfds[i].fd = lfds[i].fd; - sfds[i].events = lfds[i].events & LX_POLL_COMMON_EVENTS; - if (lfds[i].events & LX_POLLWRNORM) - sfds[i].events |= POLLWRNORM; - if (lfds[i].events & LX_POLLWRBAND) - sfds[i].events |= POLLWRBAND; - if (lfds[i].events & LX_POLLRDHUP) - sfds[i].events |= POLLRDHUP; - sfds[i].revents = 0; - } - - if ((rval = ppoll(sfds, nfds, tsp, sp)) < 0) - return (-errno); - - /* Convert the Illumos revents bitmask into the Linux equivalent */ - for (i = 0; i < nfds; i++) { - revents = sfds[i].revents & LX_POLL_COMMON_EVENTS; - if (sfds[i].revents & POLLWRBAND) - revents |= LX_POLLWRBAND; - if (sfds[i].revents & POLLRDHUP) - revents |= LX_POLLRDHUP; - - /* - * Be careful because on Illumos POLLOUT and POLLWRNORM - * are defined to the same values but on Linux they - * are not. - */ - if (sfds[i].revents & POLLOUT) { - if ((lfds[i].events & LX_POLLOUT) == 0) - revents &= ~LX_POLLOUT; - if (lfds[i].events & LX_POLLWRNORM) - revents |= LX_POLLWRNORM; - } - - lfds[i].revents = revents; - } - - /* Copy out the results */ - if (uucopy(lfds, (void *)p1, fds_size) != 0) - return (-errno); - - return (rval); -} - -/* - * This code stongly resemebles lx_select(), but is here to be able to take - * advantage of the Linux signal helper routines. - */ -long -lx_pselect6(uintptr_t p1, uintptr_t p2, uintptr_t p3, uintptr_t p4, - uintptr_t p5, uintptr_t p6) -{ - int nfds = (int)p1; - fd_set *rfdsp = NULL; - fd_set *wfdsp = NULL; - fd_set *efdsp = NULL; - timespec_t ts, *tsp = NULL; - int fd_set_len = howmany(nfds, 8); - int r; - sigset_t sigset, *sp = NULL; - - lx_debug("\tpselect6(%d, 0x%p, 0x%p, 0x%p, 0x%p, 0x%p)", - p1, p2, p3, p4, p4, p6); - - if (nfds > 0) { - if (p2 != NULL) { - rfdsp = SAFE_ALLOCA(fd_set_len); - if (rfdsp == NULL) - return (-ENOMEM); - if (uucopy((void *)p2, rfdsp, fd_set_len) != 0) - return (-errno); - } - if (p3 != NULL) { - wfdsp = SAFE_ALLOCA(fd_set_len); - if (wfdsp == NULL) - return (-ENOMEM); - if (uucopy((void *)p3, wfdsp, fd_set_len) != 0) - return (-errno); - } - if (p4 != NULL) { - efdsp = SAFE_ALLOCA(fd_set_len); - if (efdsp == NULL) - return (-ENOMEM); - if (uucopy((void *)p4, efdsp, fd_set_len) != 0) - return (-errno); - } - } - - if (p5 != NULL) { - if (uucopy((void *)p5, &ts, sizeof (ts)) != 0) - return (-errno); - - tsp = &ts; - } - - if (p6 != NULL) { - /* - * To force the number of arguments to be no more than six, - * Linux bundles both the sigset and the size into a structure - * that becomes the sixth argument. - */ - struct { - lx_sigset_t *addr; - size_t size; - } lx_sigset; - - if (uucopy((void *)p6, &lx_sigset, sizeof (lx_sigset)) != 0) - return (-errno); - - /* - * Yes, that's right: Linux forces a size to be passed only - * so it can check that it's the size of a sigset_t. - */ - if (lx_sigset.size != sizeof (lx_sigset_t)) - return (-EINVAL); - - /* - * This is where we check if the sigset is *really* NULL. - */ - if (lx_sigset.addr) { - if ((r = ltos_sigset(lx_sigset.addr, &sigset)) != 0) - return (r); - - sp = &sigset; - } - } - -#if defined(_LP64) - r = pselect(nfds, rfdsp, wfdsp, efdsp, tsp, sp); -#else - if (nfds >= FD_SETSIZE) - r = pselect_large_fdset(nfds, rfdsp, wfdsp, efdsp, tsp, sp); - else - r = pselect(nfds, rfdsp, wfdsp, efdsp, tsp, sp); -#endif - - if (r < 0) - return (-errno); - - /* - * For pselect6(), we don't honor the strange Linux select() semantics - * with respect to the timestruc parameter because glibc ignores it - * anyway -- just copy out the fd pointers and return. - */ - if ((rfdsp != NULL) && (uucopy(rfdsp, (void *)p2, fd_set_len) != 0)) - return (-errno); - if ((wfdsp != NULL) && (uucopy(wfdsp, (void *)p3, fd_set_len) != 0)) - return (-errno); - if ((efdsp != NULL) && (uucopy(efdsp, (void *)p4, fd_set_len) != 0)) - return (-errno); - - return (r); -} - /* * The first argument is the pid (Linux tgid) to send the signal to, second * argument is the signal to send (an lx signal), and third is the siginfo_t diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h index 64cdb205987c..9d775244102a 100644 --- a/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_signal.h @@ -65,32 +65,6 @@ extern "C" { #if !defined(_ASM) -/* - * NOTE: Linux uses different definitions for sigset_ts and sigaction_ts - * depending on whether the definition is for user space or the kernel. - * - * The definitions below MUST correspond to the Linux kernel versions, - * as glibc will do the necessary translation from the Linux user - * versions. - */ -#if defined(_LP64) -#define LX_NSIG_WORDS 1 -#define LX_WSHIFT 6 -#else /* is _ILP32 */ -#define LX_NSIG_WORDS 2 -#define LX_WSHIFT 5 -#endif - -typedef struct { - ulong_t __bits[LX_NSIG_WORDS]; -} lx_sigset_t; - -#define LX_NBITS (sizeof (ulong_t) * NBBY) -#define lx_sigmask(n) (1UL << (((n) - 1) % LX_NBITS)) -#define lx_sigword(n) (((ulong_t)((n) - 1)) >> LX_WSHIFT) -#define lx_sigismember(s, n) (lx_sigmask(n) & (s)->__bits[lx_sigword(n)]) -#define lx_sigaddset(s, n) ((s)->__bits[lx_sigword(n)] |= lx_sigmask(n)) - typedef struct lx_sigaction { void (*lxsa_handler)(); int lxsa_flags; diff --git a/usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h b/usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h index 9ff19002dbd9..a3f199decebb 100644 --- a/usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h +++ b/usr/src/lib/brand/lx/lx_brand/sys/lx_syscall.h @@ -93,11 +93,6 @@ extern long lx_listen(int, int); extern long lx_shutdown(int, int); extern long lx_socketpair(int, int, int, int *); -extern long lx_select(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); -extern long lx_pselect6(uintptr_t, uintptr_t, uintptr_t, uintptr_t, - uintptr_t, uintptr_t); -extern long lx_poll(uintptr_t, uintptr_t, uintptr_t); -extern long lx_ppoll(uintptr_t, uintptr_t, uintptr_t, uintptr_t, uintptr_t); extern long lx_settimeofday(uintptr_t, uintptr_t); extern long lx_getrusage(uintptr_t, uintptr_t); extern long lx_mknod(uintptr_t, uintptr_t, uintptr_t); diff --git a/usr/src/uts/common/brand/lx/os/lx_signal.c b/usr/src/uts/common/brand/lx/os/lx_signal.c new file mode 100644 index 000000000000..53e0cecc144d --- /dev/null +++ b/usr/src/uts/common/brand/lx/os/lx_signal.c @@ -0,0 +1,50 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. + */ + +#include +#include +#include +#include + +void +lx_ltos_sigset(lx_sigset_t *lsigp, k_sigset_t *ssigp) +{ + int lx_sig, sig; + + sigemptyset(ssigp); + for (lx_sig = 1; lx_sig <= LX_NSIG; lx_sig++) { + if (lx_sigismember(lsigp, lx_sig) && + ((sig = ltos_signo[lx_sig]) > 0)) + sigaddset(ssigp, sig); + } + + /* Emulate sigutok() restrictions */ + ssigp->__sigbits[0] &= (FILLSET0 & ~CANTMASK0); + ssigp->__sigbits[1] &= (FILLSET1 & ~CANTMASK1); + ssigp->__sigbits[2] &= (FILLSET2 & ~CANTMASK2); +} + +void +lx_stol_sigset(k_sigset_t *ssigp, lx_sigset_t *lsigp) +{ + int sig, lx_sig; + + bzero(lsigp, sizeof (lx_sigset_t)); + for (sig = 1; sig < NSIG; sig++) { + if (sigismember(ssigp, sig) && + ((lx_sig = stol_signo[sig]) > 0)) + lx_sigaddset(lsigp, lx_sig); + } +} diff --git a/usr/src/uts/common/brand/lx/os/lx_syscall.c b/usr/src/uts/common/brand/lx/os/lx_syscall.c index b47b82be2990..3d8dfa63961d 100644 --- a/usr/src/uts/common/brand/lx/os/lx_syscall.c +++ b/usr/src/uts/common/brand/lx/os/lx_syscall.c @@ -758,7 +758,7 @@ lx_sysent_t lx_sysent32[] = { {"setfsgid16", NULL, 0, 1}, /* 139 */ {"llseek", NULL, 0, 5}, /* 140 */ {"getdents", lx_getdents_32, 0, 3}, /* 141 */ - {"select", NULL, 0, 5}, /* 142 */ + {"select", lx_select, 0, 5}, /* 142 */ {"flock", NULL, 0, 2}, /* 143 */ {"msync", NULL, 0, 3}, /* 144 */ {"readv", lx_readv, 0, 3}, /* 145 */ @@ -784,7 +784,7 @@ lx_sysent_t lx_sysent32[] = { {"getresuid16", NULL, 0, 3}, /* 165 */ {"vm86", NULL, NOSYS_NO_EQUIV, 0}, /* 166 */ {"query_module", NULL, 0, 5}, /* 167 */ - {"poll", NULL, 0, 3}, /* 168 */ + {"poll", lx_poll, 0, 3}, /* 168 */ {"nfsservctl", NULL, NOSYS_KERNEL, 0}, /* 169 */ {"setresgid16", lx_setresgid16, 0, 3}, /* 170 */ {"getresgid16", NULL, 0, 3}, /* 171 */ @@ -928,8 +928,8 @@ lx_sysent_t lx_sysent32[] = { {"readlinkat", NULL, 0, 4}, /* 305 */ {"fchmodat", lx_fchmodat, 0, 3}, /* 306 */ {"faccessat", NULL, 0, 4}, /* 307 */ - {"pselect6", NULL, LX_SYS_EBPARG6, 6}, /* 308 */ - {"ppoll", NULL, 0, 5}, /* 309 */ + {"pselect6", lx_pselect, LX_SYS_EBPARG6, 6}, /* 308 */ + {"ppoll", lx_ppoll, 0, 5}, /* 309 */ {"unshare", NULL, NOSYS_NULL, 0}, /* 310 */ {"set_robust_list", lx_set_robust_list, 0, 2}, /* 311 */ {"get_robust_list", lx_get_robust_list, 0, 3}, /* 312 */ @@ -994,7 +994,7 @@ lx_sysent_t lx_sysent64[] = { {"stat", NULL, 0, 2}, /* 4 */ {"fstat", NULL, 0, 2}, /* 5 */ {"lstat", NULL, 0, 2}, /* 6 */ - {"poll", NULL, 0, 3}, /* 7 */ + {"poll", lx_poll, 0, 3}, /* 7 */ {"lseek", NULL, 0, 3}, /* 8 */ {"mmap", NULL, 0, 6}, /* 9 */ {"mprotect", NULL, 0, 3}, /* 10 */ @@ -1010,7 +1010,7 @@ lx_sysent_t lx_sysent64[] = { {"writev", lx_writev, 0, 3}, /* 20 */ {"access", NULL, 0, 2}, /* 21 */ {"pipe", lx_pipe, 0, 1}, /* 22 */ - {"select", NULL, 0, 5}, /* 23 */ + {"select", lx_select, 0, 5}, /* 23 */ {"sched_yield", lx_sched_yield, 0, 0}, /* 24 */ {"mremap", NULL, 0, 5}, /* 25 */ {"msync", NULL, 0, 3}, /* 26 */ @@ -1257,8 +1257,8 @@ lx_sysent_t lx_sysent64[] = { {"readlinkat", NULL, 0, 4}, /* 267 */ {"fchmodat", lx_fchmodat, 0, 3}, /* 268 */ {"faccessat", NULL, 0, 4}, /* 269 */ - {"pselect6", NULL, 0, 6}, /* 270 */ - {"ppoll", NULL, 0, 5}, /* 271 */ + {"pselect6", lx_pselect, 0, 6}, /* 270 */ + {"ppoll", lx_ppoll, 0, 5}, /* 271 */ {"unshare", NULL, NOSYS_NULL, 0}, /* 272 */ {"set_robust_list", lx_set_robust_list, 0, 2}, /* 273 */ {"get_robust_list", lx_get_robust_list, 0, 3}, /* 274 */ diff --git a/usr/src/uts/common/brand/lx/sys/lx_signal.h b/usr/src/uts/common/brand/lx/sys/lx_signal.h new file mode 100644 index 000000000000..552c36238ba7 --- /dev/null +++ b/usr/src/uts/common/brand/lx/sys/lx_signal.h @@ -0,0 +1,32 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. + */ + +#ifndef _LX_SIGNAL_H +#define _LX_SIGNAL_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +extern void lx_ltos_sigset(lx_sigset_t *, k_sigset_t *); +extern void lx_stol_sigset(k_sigset_t *, lx_sigset_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* _LX_SIGNAL_H */ diff --git a/usr/src/uts/common/brand/lx/sys/lx_syscalls.h b/usr/src/uts/common/brand/lx/sys/lx_syscalls.h index 7a32a1e1d86d..1cdd1a775d5a 100644 --- a/usr/src/uts/common/brand/lx/sys/lx_syscalls.h +++ b/usr/src/uts/common/brand/lx/sys/lx_syscalls.h @@ -89,8 +89,11 @@ extern long lx_open(); extern long lx_openat(); extern long lx_pipe(); extern long lx_pipe2(); +extern long lx_poll(); +extern long lx_ppoll(); extern long lx_prctl(); extern long lx_prlimit64(); +extern long lx_pselect(); extern long lx_ptrace(); extern long lx_read(); extern long lx_readv(); @@ -103,6 +106,7 @@ extern long lx_sched_rr_get_interval(); extern long lx_sched_setparam(); extern long lx_sched_setscheduler(); extern long lx_sched_yield(); +extern long lx_select(); extern long lx_send(); extern long lx_sendmsg(); extern long lx_sendto(); diff --git a/usr/src/uts/common/brand/lx/syscall/lx_poll.c b/usr/src/uts/common/brand/lx/syscall/lx_poll.c new file mode 100644 index 000000000000..e56aea98bf49 --- /dev/null +++ b/usr/src/uts/common/brand/lx/syscall/lx_poll.c @@ -0,0 +1,714 @@ +/* + * This file and its contents are supplied under the terms of the + * Common Development and Distribution License ("CDDL"), version 1.0. + * You may only use this file in accordance with the terms of version + * 1.0 of the CDDL. + * + * A full copy of the text of the CDDL should have accompanied this + * source. A copy of the CDDL is also available via the Internet at + * http://www.illumos.org/license/CDDL. + */ + +/* + * Copyright 2015 Joyent, Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* From uts/common/syscall/poll.c */ +extern int poll_copyin(pollstate_t *, pollfd_t *, nfds_t); +extern int poll_common(pollstate_t *, pollfd_t *, nfds_t, timespec_t *, int *); + +/* + * These events are identical between Linux and SunOS + */ +#define LX_POLLIN 0x001 +#define LX_POLLPRI 0x002 +#define LX_POLLOUT 0x004 +#define LX_POLLERR 0x008 +#define LX_POLLHUP 0x010 +#define LX_POLLNVAL 0x020 +#define LX_POLLRDNORM 0x040 +#define LX_POLLRDBAND 0x080 + +#define LX_POLL_COMMON_EVENTS (LX_POLLIN | LX_POLLPRI | LX_POLLOUT | \ + LX_POLLERR | LX_POLLHUP | LX_POLLNVAL | LX_POLLRDNORM | LX_POLLRDBAND) + +/* + * These events differ between Linux and SunOS + */ +#define LX_POLLWRNORM 0x0100 +#define LX_POLLWRBAND 0x0200 +#define LX_POLLRDHUP 0x2000 + + +#define LX_POLL_SUPPORTED_EVENTS \ + (LX_POLL_COMMON_EVENTS | LX_POLLWRNORM | LX_POLLWRBAND | LX_POLLRDHUP) + + +static int +lx_poll_copyin(pollstate_t *ps, pollfd_t *fds, nfds_t nfds, short *oldevt) +{ + int i, error = 0; + pollfd_t *pollfdp; + + if ((error = poll_copyin(ps, fds, nfds)) != 0) { + return (error); + } + pollfdp = ps->ps_pollfd; + + /* Convert the Linux events bitmask into SunOS equivalent. */ + for (i = 0; i < nfds; i++) { + short lx_events = pollfdp[i].events; + short events; + + /* + * If the caller is polling for an unsupported event, we + * have to bail out. + */ + if (lx_events & ~LX_POLL_SUPPORTED_EVENTS) { + return (ENOTSUP); + } + + events = lx_events & LX_POLL_COMMON_EVENTS; + if (lx_events & LX_POLLWRNORM) + events |= POLLWRNORM; + if (lx_events & LX_POLLWRBAND) + events |= POLLWRBAND; + if (lx_events & LX_POLLRDHUP) + events |= POLLRDHUP; + pollfdp[i].events = events; + oldevt[i] = lx_events; + } + return (0); +} + +static int +lx_poll_copyout(pollfd_t *pollfdp, pollfd_t *fds, nfds_t nfds, short *oldevt) +{ + int i; + + /* + * Convert SunOS revents bitmask into Linux equivalent and restore + * cached events field which was swizzled by lx_poll_copyin. + */ + for (i = 0; i < nfds; i++) { + short revents = pollfdp[i].revents; + short lx_revents = revents & LX_POLL_COMMON_EVENTS; + short orig_events = oldevt[i]; + + if (revents & POLLWRBAND) + lx_revents |= LX_POLLWRBAND; + if (revents & POLLRDHUP) + lx_revents |= LX_POLLRDHUP; + /* + * Because POLLOUT and POLLWRNORM are native defined as the + * same value, care must be taken when translating them to + * Linux where they differ. + */ + if (revents & POLLOUT) { + if ((orig_events & LX_POLLOUT) == 0) + lx_revents &= ~LX_POLLOUT; + if (orig_events & LX_POLLWRNORM) + lx_revents |= LX_POLLWRNORM; + } + + pollfdp[i].revents = lx_revents; + pollfdp[i].events = orig_events; + } + + if (copyout(pollfdp, fds, sizeof (pollfd_t) * nfds) != 0) + return (EFAULT); + + return (0); +} + +static long +lx_poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp) +{ + kthread_t *t = curthread; + klwp_t *lwp = ttolwp(t); + proc_t *p = ttoproc(t); + pollstate_t *ps = NULL; + pollfd_t *pollfdp = NULL; + short *oldevt = NULL; + int error = 0, fdcnt = 0; + + /* + * Reset our signal mask, if requested. + */ + if (ksetp != NULL) { + mutex_enter(&p->p_lock); + schedctl_finish_sigblock(t); + lwp->lwp_sigoldmask = t->t_hold; + t->t_hold = *ksetp; + t->t_flag |= T_TOMASK; + /* + * Call cv_reltimedwait_sig() just to check for signals. + * We will return immediately with either 0 or -1. + */ + if (!cv_reltimedwait_sig(&t->t_delay_cv, &p->p_lock, 0, + TR_CLOCK_TICK)) { + mutex_exit(&p->p_lock); + error = EINTR; + goto pollout; + } + mutex_exit(&p->p_lock); + } + + /* + * Initialize pollstate and copy in pollfd data if present. + */ + if (nfds != 0) { + if (nfds > p->p_fno_ctl) { + mutex_enter(&p->p_lock); + (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE], + p->p_rctls, p, RCA_SAFE); + mutex_exit(&p->p_lock); + error = EINVAL; + goto pollout; + } + + /* + * Need to allocate memory for pollstate before anything + * because the mutex and cv are created in this space + */ + ps = pollstate_create(); + if (ps->ps_pcache == NULL) + ps->ps_pcache = pcache_alloc(); + + /* + * Certain event types which are distinct on Linux are aliased + * against each other on illumos. In order properly translate + * back into the Linux format, the original events of interest + * are stored in 'oldevt' for use during lx_poll_copyout. + */ + oldevt = kmem_alloc(nfds * sizeof (short), KM_SLEEP); + if ((error = lx_poll_copyin(ps, fds, nfds, oldevt)) != 0) + goto pollout; + pollfdp = ps->ps_pollfd; + } + + /* + * Perform the actual poll. + */ + error = poll_common(ps, fds, nfds, tsp, &fdcnt); + +pollout: + /* + * If we changed the signal mask but we received no signal then restore + * the signal mask. Otherwise psig() will deal with the signal mask. + */ + if (ksetp != NULL) { + mutex_enter(&p->p_lock); + if (lwp->lwp_cursig == 0) { + t->t_hold = lwp->lwp_sigoldmask; + t->t_flag &= ~T_TOMASK; + } + mutex_exit(&p->p_lock); + } + + /* + * Copy out the events and return the fdcnt to the user. + */ + if (nfds != 0 && error == 0) { + error = lx_poll_copyout(pollfdp, fds, nfds, oldevt); + } + if (oldevt != NULL) { + kmem_free(oldevt, nfds * sizeof (short)); + } + if (error) { + return (set_errno(error)); + } + return (fdcnt); +} + +long +lx_poll(pollfd_t *fds, nfds_t nfds, int timeout) +{ + timespec_t ts, *tsp = NULL; + + if (timeout >= 0) { + ts.tv_sec = timeout / MILLISEC; + ts.tv_nsec = (timeout % MILLISEC) * MICROSEC; + tsp = &ts; + } + + return (lx_poll_common(fds, nfds, tsp, NULL)); +} + +long +lx_ppoll(pollfd_t *fds, nfds_t nfds, timespec_t *timeoutp, lx_sigset_t *setp) +{ + timespec_t ts, *tsp = NULL; + k_sigset_t kset, *ksetp = NULL; + + /* + * Copy in timeout and sigmask. + */ + if (timeoutp != NULL) { + if (get_udatamodel() == DATAMODEL_NATIVE) { + if (copyin(timeoutp, &ts, sizeof (ts))) + return (set_errno(EFAULT)); + } else { + timespec32_t ts32; + + if (copyin(timeoutp, &ts32, sizeof (ts32))) + return (set_errno(EFAULT)); + TIMESPEC32_TO_TIMESPEC(&ts, &ts32) + } + + if (itimerspecfix(&ts)) + return (set_errno(EINVAL)); + tsp = &ts; + } + if (setp != NULL) { + lx_sigset_t lset; + + if (copyin(setp, &lset, sizeof (lset))) + return (set_errno(EFAULT)); + lx_ltos_sigset(&lset, &kset); + ksetp = &kset; + } + + return (lx_poll_common(fds, nfds, tsp, ksetp)); +} + +typedef struct lx_select_buf_s { + long *lsb_rfds; + long *lsb_wfds; + long *lsb_efds; + unsigned int lsb_size; +} lx_select_buf_t; + +/* + * Size (in bytes) of buffer appropriate for fd_set copyin/copyout. + * Linux uses buffers of 'long' to accomplish this. + */ +#define LX_FD_SET_BYTES (sizeof (long)) +#define LX_FD_SET_BITS (8 * LX_FD_SET_BYTES) +#define LX_FD_SET_SIZE(nfds) \ + ((((nfds) + (LX_FD_SET_BITS - 1)) / LX_FD_SET_BITS) * LX_FD_SET_BYTES) + +static int +lx_select_copyin(pollstate_t *ps, lx_select_buf_t *sbuf, int nfds, + long *rfds, long *wfds, long *efds) +{ + int n; + long *in, *out, *ex; + long absent = 0; + pollfd_t *pfd; + nfds_t old_nfds; + + /* + * Just like pollsys and lx_poll, attempt to reuse ps_pollfd if it is + * appropriately sized. See poll_copyin for more detail. + */ + old_nfds = ps->ps_nfds; + if (nfds != old_nfds) { + kmem_free(ps->ps_pollfd, old_nfds * sizeof (pollfd_t)); + pfd = kmem_alloc(nfds * sizeof (pollfd_t), KM_SLEEP); + ps->ps_pollfd = pfd; + ps->ps_nfds = nfds; + } else { + pfd = ps->ps_pollfd; + } + + if (rfds != NULL) { + if (copyin(rfds, sbuf->lsb_rfds, sbuf->lsb_size) != 0) { + return (EFAULT); + } + } + if (wfds != NULL) { + if (copyin(wfds, sbuf->lsb_wfds, sbuf->lsb_size) != 0) { + return (EFAULT); + } + } + if (efds != NULL) { + if (copyin(efds, sbuf->lsb_efds, sbuf->lsb_size) != 0) { + return (EFAULT); + } + } + + /* + * For each fd, if any bits are set convert them into the appropriate + * pollfd struct. (Derived from libc's select logic) + */ + in = (rfds != NULL) ? sbuf->lsb_rfds : &absent; + out = (wfds != NULL) ? sbuf->lsb_wfds : &absent; + ex = (efds != NULL) ? sbuf->lsb_efds : &absent; + for (n = 0; n < nfds; n += LX_FD_SET_BITS) { + unsigned long b, m, j; + + b = (unsigned long)(*in | *out | *ex); + m = 1; + for (j = 0; j < LX_FD_SET_BITS; j++) { + int fd = n + j; + + if (fd >= nfds) + return (0); + pfd->events = 0; + if (b & 1) { + pfd->fd = fd; + if (*in & m) + pfd->events |= POLLRDNORM; + if (*out & m) + pfd->events |= POLLWRNORM; + if (*ex & m) + pfd->events |= POLLRDBAND; + } else { + pfd->fd = -1; + } + pfd++; + b >>= 1; + m <<= 1; + } + + if (rfds != NULL) + in++; + if (wfds != NULL) + out++; + if (efds != NULL) + ex++; + } + return (0); +} + +static int +lx_select_copyout(pollfd_t *pollfdp, lx_select_buf_t *sbuf, int nfds, + long *rfds, long *wfds, long *efds, int *fdcnt) +{ + int n; + pollfd_t *pfd; + long rv = 0; + + /* + * If poll did not find any fds of interest, we can just zero out the + * fd_set fields for copyout. + */ + if (*fdcnt == 0) { + if (rfds != NULL) { + bzero(sbuf->lsb_rfds, sbuf->lsb_size); + } + if (wfds != NULL) { + bzero(sbuf->lsb_wfds, sbuf->lsb_size); + } + if (efds != NULL) { + bzero(sbuf->lsb_efds, sbuf->lsb_size); + } + goto copyout; + } + + /* + * For each fd, if any bits are set convert them into the appropriate + * pollfd struct. (Derived from libc's select logic) + */ + pfd = pollfdp; + for (n = 0; n < nfds; n += LX_FD_SET_BITS) { + unsigned long m, j; + long in = 0, out = 0, ex = 0; + + m = 1; + for (j = 0; j < LX_FD_SET_BITS; j++) { + if ((n + j) >= nfds) + break; + if (pfd->revents != 0) { + if (pfd->revents & POLLNVAL) { + return (EBADF); + } + if (pfd->revents & POLLRDNORM) { + in |= m; + rv++; + } + if (pfd->revents & POLLWRNORM) { + out |= m; + rv++; + } + if (pfd->revents & POLLRDBAND) { + ex |= m; + rv++; + } + /* + * Only set this bit on return if we asked + * about input conditions. + */ + if ((pfd->revents & (POLLHUP|POLLERR)) && + (pfd->events & POLLRDNORM)) { + if ((in & m) == 0) { + /* wasn't already set */ + rv++; + } + in |= m; + } + /* + * Only set this bit on return if we asked + * about output conditions. + */ + if ((pfd->revents & (POLLHUP|POLLERR)) && + (pfd->events & POLLWRNORM)) { + if ((out & m) == 0) { + /* wasn't already set */ + rv++; + } + out |= m; + } + /* + * Only set this bit on return if we asked + * about output conditions. + */ + if ((pfd->revents & (POLLHUP|POLLERR)) && + (pfd->events & POLLRDBAND)) { + if ((ex & m) == 0) { + /* wasn't already set */ + rv++; + } + ex |= m; + } + } + m <<= 1; + pfd++; + } + if (rfds != NULL) + sbuf->lsb_rfds[n] = in; + if (wfds != NULL) + sbuf->lsb_wfds[n] = out; + if (efds != NULL) + sbuf->lsb_efds[n] = ex; + } + +copyout: + if (rfds != NULL) { + if (copyout(sbuf->lsb_rfds, rfds, sbuf->lsb_size) != 0) { + return (EFAULT); + } + } + if (wfds != NULL) { + if (copyout(sbuf->lsb_wfds, wfds, sbuf->lsb_size) != 0) { + return (EFAULT); + } + } + if (efds != NULL) { + if (copyout(sbuf->lsb_efds, efds, sbuf->lsb_size) != 0) { + return (EFAULT); + } + } + *fdcnt = rv; + return (0); +} + + +static long +lx_select_common(int nfds, long *rfds, long *wfds, long *efds, + timespec_t *tsp, k_sigset_t *ksetp) +{ + kthread_t *t = curthread; + klwp_t *lwp = ttolwp(t); + proc_t *p = ttoproc(t); + pollstate_t *ps = NULL; + pollfd_t *pollfdp = NULL, *fake_fds = NULL; + lx_select_buf_t sbuf = {0}; + int error = 0, fdcnt = 0; + + if (nfds < 0) { + return (set_errno(EINVAL)); + } + + /* + * Reset our signal mask, if requested. + */ + if (ksetp != NULL) { + mutex_enter(&p->p_lock); + schedctl_finish_sigblock(t); + lwp->lwp_sigoldmask = t->t_hold; + t->t_hold = *ksetp; + t->t_flag |= T_TOMASK; + /* + * Call cv_reltimedwait_sig() just to check for signals. + * We will return immediately with either 0 or -1. + */ + if (!cv_reltimedwait_sig(&t->t_delay_cv, &p->p_lock, 0, + TR_CLOCK_TICK)) { + mutex_exit(&p->p_lock); + error = EINTR; + goto out; + } + mutex_exit(&p->p_lock); + } + + /* + * Because poll caching uses the userspace pollfd_t pointer to verify + * cache reuse validity, a simulated value must be supplied when + * emulating Linux select(2). The first non-NULL pointer from + * rfds/wfds/efds is used for this purpose. + */ + if (rfds != NULL) { + fake_fds = (pollfd_t *)rfds; + } else if (wfds != NULL) { + fake_fds = (pollfd_t *)wfds; + } else if (efds != NULL) { + fake_fds = (pollfd_t *)efds; + } else { + /* + * A non-zero nfds was supplied but all three fd_set pointers + * were null. Fall back to doing a simple timeout. + */ + nfds = 0; + } + + /* + * Initialize pollstate and copy in pollfd data if present. + */ + if (nfds != 0) { + if (nfds > p->p_fno_ctl) { + mutex_enter(&p->p_lock); + (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE], + p->p_rctls, p, RCA_SAFE); + mutex_exit(&p->p_lock); + error = EINVAL; + goto out; + } + + /* + * Need to allocate memory for pollstate before anything + * because the mutex and cv are created in this space + */ + ps = pollstate_create(); + if (ps->ps_pcache == NULL) + ps->ps_pcache = pcache_alloc(); + + sbuf.lsb_size = LX_FD_SET_SIZE(nfds); + if (rfds != NULL) + sbuf.lsb_rfds = kmem_alloc(sbuf.lsb_size, KM_SLEEP); + if (wfds != NULL) + sbuf.lsb_wfds = kmem_alloc(sbuf.lsb_size, KM_SLEEP); + if (efds != NULL) + sbuf.lsb_efds = kmem_alloc(sbuf.lsb_size, KM_SLEEP); + + error = lx_select_copyin(ps, &sbuf, nfds, rfds, wfds, efds); + if (error != 0) { + goto out; + } + + pollfdp = ps->ps_pollfd; + } + + /* + * Perform the actual poll. + */ + error = poll_common(ps, fake_fds, (nfds_t)nfds, tsp, &fdcnt); + +out: + /* + * If we changed the signal mask but we received no signal then restore + * the signal mask. Otherwise psig() will deal with the signal mask. + */ + if (ksetp != NULL) { + mutex_enter(&p->p_lock); + if (lwp->lwp_cursig == 0) { + t->t_hold = lwp->lwp_sigoldmask; + t->t_flag &= ~T_TOMASK; + } + mutex_exit(&p->p_lock); + } + + /* + * Copy out the events and return the fdcnt to the user. + */ + if (error == 0 && nfds != 0) { + error = lx_select_copyout(pollfdp, &sbuf, nfds, rfds, wfds, + efds, &fdcnt); + } + if (sbuf.lsb_size != 0) { + if (sbuf.lsb_rfds != NULL) + kmem_free(sbuf.lsb_rfds, sbuf.lsb_size); + if (sbuf.lsb_wfds != NULL) + kmem_free(sbuf.lsb_wfds, sbuf.lsb_size); + if (sbuf.lsb_efds != NULL) + kmem_free(sbuf.lsb_efds, sbuf.lsb_size); + } + if (error) { + return (set_errno(error)); + } + return (fdcnt); +} + +long +lx_select(int nfds, long *rfds, long *wfds, long *efds, + struct timeval *timeoutp) +{ + timespec_t ts, *tsp = NULL; + + if (timeoutp != NULL) { + if (get_udatamodel() == DATAMODEL_NATIVE) { + struct timeval tv; + + if (copyin(timeoutp, &tv, sizeof (tv))) + return (set_errno(EFAULT)); + ts.tv_sec = tv.tv_sec; + ts.tv_nsec = tv.tv_usec * (NANOSEC / MICROSEC); + } else { + struct timeval32 tv32; + + if (copyin(timeoutp, &tv32, sizeof (tv32))) + return (set_errno(EFAULT)); + ts.tv_sec = tv32.tv_sec; + ts.tv_nsec = tv32.tv_usec * (NANOSEC / MICROSEC); + } + + if (itimerspecfix(&ts)) + return (set_errno(EINVAL)); + tsp = &ts; + } + + return (lx_select_common(nfds, rfds, wfds, efds, tsp, NULL)); +} + +long +lx_pselect(int nfds, long *rfds, long *wfds, long *efds, + timespec_t *timeoutp, lx_sigset_t *setp) +{ + timespec_t ts, *tsp = NULL; + k_sigset_t kset, *ksetp = NULL; + + /* + * Copy in timeout and sigmask. + */ + if (timeoutp != NULL) { + if (get_udatamodel() == DATAMODEL_NATIVE) { + if (copyin(timeoutp, &ts, sizeof (ts))) + return (set_errno(EFAULT)); + } else { + timespec32_t ts32; + + if (copyin(timeoutp, &ts32, sizeof (ts32))) + return (set_errno(EFAULT)); + TIMESPEC32_TO_TIMESPEC(&ts, &ts32) + } + + if (itimerspecfix(&ts)) + return (set_errno(EINVAL)); + tsp = &ts; + } + if (setp != NULL) { + lx_sigset_t lset; + + if (copyin(setp, &lset, sizeof (lset))) + return (set_errno(EFAULT)); + lx_ltos_sigset(&lset, &kset); + ksetp = &kset; + } + + return (lx_select_common(nfds, rfds, wfds, efds, tsp, ksetp)); +} diff --git a/usr/src/uts/common/syscall/poll.c b/usr/src/uts/common/syscall/poll.c index cc125f127aa8..ba918759c8b5 100644 --- a/usr/src/uts/common/syscall/poll.c +++ b/usr/src/uts/common/syscall/poll.c @@ -317,20 +317,58 @@ polllock(pollhead_t *php, kmutex_t *lp) return (0); } -static int -poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp) +int +poll_copyin(pollstate_t *ps, pollfd_t *fds, nfds_t nfds) +{ + pollfd_t *pollfdp; + nfds_t old_nfds; + + /* + * NOTE: for performance, buffers are saved across poll() calls. + * The theory is that if a process polls heavily, it tends to poll + * on the same set of descriptors. Therefore, we only reallocate + * buffers when nfds changes. There is no hysteresis control, + * because there is no data to suggest that this is necessary; + * the penalty of reallocating is not *that* great in any event. + */ + old_nfds = ps->ps_nfds; + if (nfds != old_nfds) { + kmem_free(ps->ps_pollfd, old_nfds * sizeof (pollfd_t)); + pollfdp = kmem_alloc(nfds * sizeof (pollfd_t), KM_SLEEP); + ps->ps_pollfd = pollfdp; + ps->ps_nfds = nfds; + } + + pollfdp = ps->ps_pollfd; + if (copyin(fds, pollfdp, nfds * sizeof (pollfd_t))) { + return (EFAULT); + } + + if (fds == NULL) { + /* + * If the process has page 0 mapped, then the copyin() above + * will succeed even if fds is NULL. However, our cached + * poll lists are keyed by the address of the passed-in fds + * structure, and we use the value NULL to indicate an unused + * poll cache list entry. As such, we elect not to support + * NULL as a valid (user) memory address and fail the poll() + * call. + */ + return (EFAULT); + } + return (0); +} + +int +poll_common(pollstate_t *ps, pollfd_t *fds, nfds_t nfds, timespec_t *tsp, + int *fdcnt) { kthread_t *t = curthread; - klwp_t *lwp = ttolwp(t); proc_t *p = ttoproc(t); - int fdcnt = 0; - int i; hrtime_t deadline; /* hrtime value when we want to return */ pollfd_t *pollfdp; - pollstate_t *ps; pollcache_t *pcp; int error = 0; - nfds_t old_nfds; int cacheindex = 0; /* which cache set is used */ /* @@ -348,29 +386,7 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp) } /* - * Reset our signal mask, if requested. - */ - if (ksetp != NULL) { - mutex_enter(&p->p_lock); - schedctl_finish_sigblock(t); - lwp->lwp_sigoldmask = t->t_hold; - t->t_hold = *ksetp; - t->t_flag |= T_TOMASK; - /* - * Call cv_reltimedwait_sig() just to check for signals. - * We will return immediately with either 0 or -1. - */ - if (!cv_reltimedwait_sig(&t->t_delay_cv, &p->p_lock, 0, - TR_CLOCK_TICK)) { - mutex_exit(&p->p_lock); - error = EINTR; - goto pollout; - } - mutex_exit(&p->p_lock); - } - - /* - * Check to see if this guy just wants to use poll() as a timeout. + * Check to see if the caller just wants to use poll() as a timeout. * If yes then bypass all the other stuff and make him sleep. */ if (nfds == 0) { @@ -385,66 +401,14 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp) &t->t_delay_lock, deadline)) > 0) continue; mutex_exit(&t->t_delay_lock); - error = (error == 0) ? EINTR : 0; } - goto pollout; - } - - if (nfds > p->p_fno_ctl) { - mutex_enter(&p->p_lock); - (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE], - p->p_rctls, p, RCA_SAFE); - mutex_exit(&p->p_lock); - error = EINVAL; - goto pollout; - } - - /* - * Need to allocate memory for pollstate before anything because - * the mutex and cv are created in this space - */ - ps = pollstate_create(); - - if (ps->ps_pcache == NULL) - ps->ps_pcache = pcache_alloc(); - pcp = ps->ps_pcache; - - /* - * NOTE: for performance, buffers are saved across poll() calls. - * The theory is that if a process polls heavily, it tends to poll - * on the same set of descriptors. Therefore, we only reallocate - * buffers when nfds changes. There is no hysteresis control, - * because there is no data to suggest that this is necessary; - * the penalty of reallocating is not *that* great in any event. - */ - old_nfds = ps->ps_nfds; - if (nfds != old_nfds) { - - kmem_free(ps->ps_pollfd, old_nfds * sizeof (pollfd_t)); - pollfdp = kmem_alloc(nfds * sizeof (pollfd_t), KM_SLEEP); - ps->ps_pollfd = pollfdp; - ps->ps_nfds = nfds; + *fdcnt = 0; + return ((error == 0) ? EINTR : 0); } + VERIFY(ps != NULL); pollfdp = ps->ps_pollfd; - if (copyin(fds, pollfdp, nfds * sizeof (pollfd_t))) { - error = EFAULT; - goto pollout; - } - - if (fds == NULL) { - /* - * If the process has page 0 mapped, then the copyin() above - * will succeed even if fds is NULL. However, our cached - * poll lists are keyed by the address of the passed-in fds - * structure, and we use the value NULL to indicate an unused - * poll cache list entry. As such, we elect not to support - * NULL as a valid (user) memory address and fail the poll() - * call. - */ - error = EINVAL; - goto pollout; - } + VERIFY(pollfdp != NULL); /* * If this thread polls for the first time, allocate ALL poll @@ -460,10 +424,10 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp) /* * poll and cache this poll fd list in ps_pcacheset[0]. */ - error = pcacheset_cache_list(ps, fds, &fdcnt, cacheindex); - if (fdcnt || error) { + error = pcacheset_cache_list(ps, fds, fdcnt, cacheindex); + if (error || *fdcnt) { mutex_exit(&ps->ps_lock); - goto pollout; + return (error); } } else { pollcacheset_t *pcset = ps->ps_pcacheset; @@ -488,11 +452,11 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp) * the callee will guarantee the consistency * of cached poll list and cache content. */ - error = pcacheset_resolve(ps, nfds, &fdcnt, + error = pcacheset_resolve(ps, nfds, fdcnt, cacheindex); if (error) { mutex_exit(&ps->ps_lock); - goto pollout; + return (error); } break; } @@ -509,11 +473,11 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp) * found an unused entry. Use it to cache * this poll list. */ - error = pcacheset_cache_list(ps, fds, &fdcnt, + error = pcacheset_cache_list(ps, fds, fdcnt, cacheindex); - if (fdcnt || error) { + if (error || *fdcnt) { mutex_exit(&ps->ps_lock); - goto pollout; + return (error); } break; } @@ -527,10 +491,10 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp) cacheindex = pcacheset_replace(ps); ASSERT(cacheindex < ps->ps_nsets); pcset[cacheindex].pcs_usradr = (uintptr_t)fds; - error = pcacheset_resolve(ps, nfds, &fdcnt, cacheindex); + error = pcacheset_resolve(ps, nfds, fdcnt, cacheindex); if (error) { mutex_exit(&ps->ps_lock); - goto pollout; + return (error); } } } @@ -548,8 +512,8 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp) mutex_enter(&pcp->pc_lock); for (;;) { pcp->pc_flag = 0; - error = pcache_poll(pollfdp, ps, nfds, &fdcnt, cacheindex); - if (fdcnt || error) { + error = pcache_poll(pollfdp, ps, nfds, fdcnt, cacheindex); + if (error || *fdcnt) { mutex_exit(&pcp->pc_lock); mutex_exit(&ps->ps_lock); break; @@ -595,13 +559,116 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp) mutex_enter(&pcp->pc_lock); } + return (error); +} + +/* + * This is the system call trap that poll(), + * select() and pselect() are built upon. + * It is a private interface between libc and the kernel. + */ +int +pollsys(pollfd_t *fds, nfds_t nfds, timespec_t *timeoutp, sigset_t *setp) +{ + kthread_t *t = curthread; + klwp_t *lwp = ttolwp(t); + proc_t *p = ttoproc(t); + timespec_t ts; + timespec_t *tsp; + k_sigset_t kset; + pollstate_t *ps = NULL; + pollfd_t *pollfdp = NULL; + int error = 0, fdcnt = 0; + + /* + * Copy in timeout + */ + if (timeoutp == NULL) { + tsp = NULL; + } else { + if (get_udatamodel() == DATAMODEL_NATIVE) { + if (copyin(timeoutp, &ts, sizeof (ts))) + return (set_errno(EFAULT)); + } else { + timespec32_t ts32; + + if (copyin(timeoutp, &ts32, sizeof (ts32))) + return (set_errno(EFAULT)); + TIMESPEC32_TO_TIMESPEC(&ts, &ts32) + } + + if (itimerspecfix(&ts)) + return (set_errno(EINVAL)); + tsp = &ts; + } + + /* + * Copy in and reset signal mask, if requested. + */ + if (setp != NULL) { + sigset_t set; + + if (copyin(setp, &set, sizeof (set))) + return (set_errno(EFAULT)); + sigutok(&set, &kset); + + mutex_enter(&p->p_lock); + schedctl_finish_sigblock(t); + lwp->lwp_sigoldmask = t->t_hold; + t->t_hold = kset; + t->t_flag |= T_TOMASK; + /* + * Call cv_reltimedwait_sig() just to check for signals. + * We will return immediately with either 0 or -1. + */ + if (!cv_reltimedwait_sig(&t->t_delay_cv, &p->p_lock, 0, + TR_CLOCK_TICK)) { + mutex_exit(&p->p_lock); + error = EINTR; + goto pollout; + } + mutex_exit(&p->p_lock); + } + + /* + * Initialize pollstate and copy in pollfd data if present. + * If nfds == 0, we will skip all of the copying and check steps and + * proceed directly into poll_common to process the supplied timeout. + */ + if (nfds != 0) { + if (nfds > p->p_fno_ctl) { + mutex_enter(&p->p_lock); + (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE], + p->p_rctls, p, RCA_SAFE); + mutex_exit(&p->p_lock); + error = EINVAL; + goto pollout; + } + + /* + * Need to allocate memory for pollstate before anything + * because the mutex and cv are created in this space + */ + ps = pollstate_create(); + if (ps->ps_pcache == NULL) + ps->ps_pcache = pcache_alloc(); + + if ((error = poll_copyin(ps, fds, nfds)) != 0) + goto pollout; + pollfdp = ps->ps_pollfd; + } + + /* + * Perform the actual poll. + */ + error = poll_common(ps, fds, nfds, tsp, &fdcnt); + pollout: /* - * If we changed the signal mask but we received - * no signal then restore the signal mask. - * Otherwise psig() will deal with the signal mask. + * If we changed the signal mask but we received no signal then restore + * the signal mask. Otherwise psig() will deal with the signal mask. */ - if (ksetp != NULL) { + if (setp != NULL) { mutex_enter(&p->p_lock); if (lwp->lwp_cursig == 0) { t->t_hold = lwp->lwp_sigoldmask; @@ -612,12 +679,10 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp) if (error) return (set_errno(error)); - /* * Copy out the events and return the fdcnt to the user. */ - if (nfds != 0 && - copyout(pollfdp, fds, nfds * sizeof (pollfd_t))) + if (nfds != 0 && copyout(pollfdp, fds, nfds * sizeof (pollfd_t))) return (set_errno(EFAULT)); #ifdef DEBUG @@ -625,7 +690,7 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp) * Another sanity check: */ if (fdcnt) { - int reventcnt = 0; + int i, reventcnt = 0; for (i = 0; i < nfds; i++) { if (pollfdp[i].fd < 0) { @@ -638,6 +703,8 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp) } ASSERT(fdcnt == reventcnt); } else { + int i; + for (i = 0; i < nfds; i++) { ASSERT(pollfdp[i].revents == 0); } @@ -647,52 +714,6 @@ poll_common(pollfd_t *fds, nfds_t nfds, timespec_t *tsp, k_sigset_t *ksetp) return (fdcnt); } -/* - * This is the system call trap that poll(), - * select() and pselect() are built upon. - * It is a private interface between libc and the kernel. - */ -int -pollsys(pollfd_t *fds, nfds_t nfds, timespec_t *timeoutp, sigset_t *setp) -{ - timespec_t ts; - timespec_t *tsp; - sigset_t set; - k_sigset_t kset; - k_sigset_t *ksetp; - model_t datamodel = get_udatamodel(); - - if (timeoutp == NULL) - tsp = NULL; - else { - if (datamodel == DATAMODEL_NATIVE) { - if (copyin(timeoutp, &ts, sizeof (ts))) - return (set_errno(EFAULT)); - } else { - timespec32_t ts32; - - if (copyin(timeoutp, &ts32, sizeof (ts32))) - return (set_errno(EFAULT)); - TIMESPEC32_TO_TIMESPEC(&ts, &ts32) - } - - if (itimerspecfix(&ts)) - return (set_errno(EINVAL)); - tsp = &ts; - } - - if (setp == NULL) - ksetp = NULL; - else { - if (copyin(setp, &set, sizeof (set))) - return (set_errno(EFAULT)); - sigutok(&set, &kset); - ksetp = &kset; - } - - return (poll_common(fds, nfds, tsp, ksetp)); -} - /* * Clean up any state left around by poll(2). Called when a thread exits. */ diff --git a/usr/src/uts/intel/Makefile.files b/usr/src/uts/intel/Makefile.files index 1091c7ad735c..6ec848acf949 100644 --- a/usr/src/uts/intel/Makefile.files +++ b/usr/src/uts/intel/Makefile.files @@ -314,11 +314,13 @@ LX_BRAND_OBJS = \ lx_open.o \ lx_pid.o \ lx_pipe.o \ + lx_poll.o \ lx_prctl.o \ lx_ptrace.o \ lx_rlimit.o \ lx_rw.o \ lx_sched.o \ + lx_signal.o \ lx_signum.o \ lx_socket.o \ lx_sync.o \