This repository has been archived by the owner on May 22, 2023. It is now read-only.
/
kern_event.c
9152 lines (8009 loc) · 237 KB
/
kern_event.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* Copyright (c) 2000-2020 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
* This file contains Original Code and/or Modifications of Original Code
* as defined in and that are subject to the Apple Public Source License
* Version 2.0 (the 'License'). You may not use this file except in
* compliance with the License. The rights granted to you under the License
* may not be used to create, or enable the creation or redistribution of,
* unlawful or unlicensed copies of an Apple operating system, or to
* circumvent, violate, or enable the circumvention or violation of, any
* terms of an Apple operating system software license agreement.
*
* Please obtain a copy of the License at
* http://www.opensource.apple.com/apsl/ and read it before using this file.
*
* The Original Code and all software distributed under the License are
* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
* EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
* INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
* Please see the License for the specific language governing rights and
* limitations under the License.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_END@
*
*/
/*-
* Copyright (c) 1999,2000,2001 Jonathan Lemon <jlemon@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* @(#)kern_event.c 1.0 (3/31/2000)
*/
#include <stdint.h>
#include <machine/atomic.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/filedesc.h>
#include <sys/kernel.h>
#include <sys/proc_internal.h>
#include <sys/kauth.h>
#include <sys/malloc.h>
#include <sys/unistd.h>
#include <sys/file_internal.h>
#include <sys/fcntl.h>
#include <sys/select.h>
#include <sys/queue.h>
#include <sys/event.h>
#include <sys/eventvar.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/stat.h>
#include <sys/syscall.h> // SYS_* constants
#include <sys/sysctl.h>
#include <sys/uio.h>
#include <sys/sysproto.h>
#include <sys/user.h>
#include <sys/vnode_internal.h>
#include <string.h>
#include <sys/proc_info.h>
#include <sys/codesign.h>
#include <sys/pthread_shims.h>
#include <sys/kdebug.h>
#include <os/base.h>
#include <pexpert/pexpert.h>
#include <kern/locks.h>
#include <kern/clock.h>
#include <kern/cpu_data.h>
#include <kern/policy_internal.h>
#include <kern/thread_call.h>
#include <kern/sched_prim.h>
#include <kern/waitq.h>
#include <kern/zalloc.h>
#include <kern/kalloc.h>
#include <kern/assert.h>
#include <kern/ast.h>
#include <kern/thread.h>
#include <kern/kcdata.h>
#include <pthread/priority_private.h>
#include <pthread/workqueue_syscalls.h>
#include <pthread/workqueue_internal.h>
#include <libkern/libkern.h>
#include "net/net_str_id.h"
#include <mach/task.h>
#include <libkern/section_keywords.h>
#if CONFIG_MEMORYSTATUS
#include <sys/kern_memorystatus.h>
#endif
#if DEVELOPMENT || DEBUG
#define KEVENT_PANIC_ON_WORKLOOP_OWNERSHIP_LEAK (1U << 0)
#define KEVENT_PANIC_ON_NON_ENQUEUED_PROCESS (1U << 1)
TUNABLE(uint32_t, kevent_debug_flags, "kevent_debug", 0);
#endif
static LCK_GRP_DECLARE(kq_lck_grp, "kqueue");
SECURITY_READ_ONLY_EARLY(vm_packing_params_t) kn_kq_packing_params =
VM_PACKING_PARAMS(KNOTE_KQ_PACKED);
extern mach_port_name_t ipc_entry_name_mask(mach_port_name_t name); /* osfmk/ipc/ipc_entry.h */
extern int cansignal(struct proc *, kauth_cred_t, struct proc *, int); /* bsd/kern/kern_sig.c */
#define KEV_EVTID(code) BSDDBG_CODE(DBG_BSD_KEVENT, (code))
/*
* If you need accounting for KM_KQUEUE consider using
* KALLOC_HEAP_DEFINE to define a zone view.
*/
#define KM_KQUEUE KHEAP_DEFAULT
#define KQ_EVENT NO_EVENT64
static int kqueue_select(struct fileproc *fp, int which, void *wq_link_id,
vfs_context_t ctx);
static int kqueue_close(struct fileglob *fg, vfs_context_t ctx);
static int kqueue_kqfilter(struct fileproc *fp, struct knote *kn,
struct kevent_qos_s *kev);
static int kqueue_drain(struct fileproc *fp, vfs_context_t ctx);
static const struct fileops kqueueops = {
.fo_type = DTYPE_KQUEUE,
.fo_read = fo_no_read,
.fo_write = fo_no_write,
.fo_ioctl = fo_no_ioctl,
.fo_select = kqueue_select,
.fo_close = kqueue_close,
.fo_drain = kqueue_drain,
.fo_kqfilter = kqueue_kqfilter,
};
static inline int kevent_modern_copyout(struct kevent_qos_s *, user_addr_t *);
static int kevent_register_wait_prepare(struct knote *kn, struct kevent_qos_s *kev, int result);
static void kevent_register_wait_block(struct turnstile *ts, thread_t handoff_thread,
thread_continue_t cont, struct _kevent_register *cont_args) __dead2;
static void kevent_register_wait_return(struct _kevent_register *cont_args) __dead2;
static void kevent_register_wait_cleanup(struct knote *kn);
static struct kqtailq *kqueue_get_suppressed_queue(kqueue_t kq, struct knote *kn);
static void kqueue_threadreq_initiate(struct kqueue *kq, workq_threadreq_t, kq_index_t qos, int flags);
static void kqworkq_unbind(proc_t p, workq_threadreq_t);
static thread_qos_t kqworkq_unbind_locked(struct kqworkq *kqwq, workq_threadreq_t, thread_t thread);
static workq_threadreq_t kqworkq_get_request(struct kqworkq *kqwq, kq_index_t qos_index);
static void kqworkloop_unbind(struct kqworkloop *kwql);
enum kqwl_unbind_locked_mode {
KQWL_OVERRIDE_DROP_IMMEDIATELY,
KQWL_OVERRIDE_DROP_DELAYED,
};
static void kqworkloop_unbind_locked(struct kqworkloop *kwql, thread_t thread,
enum kqwl_unbind_locked_mode how);
static void kqworkloop_unbind_delayed_override_drop(thread_t thread);
static kq_index_t kqworkloop_override(struct kqworkloop *kqwl);
static void kqworkloop_set_overcommit(struct kqworkloop *kqwl);
enum {
KQWL_UTQ_NONE,
/*
* The wakeup qos is the qos of QUEUED knotes.
*
* This QoS is accounted for with the events override in the
* kqr_override_index field. It is raised each time a new knote is queued at
* a given QoS. The kqwl_wakeup_indexes field is a superset of the non empty
* knote buckets and is recomputed after each event delivery.
*/
KQWL_UTQ_UPDATE_WAKEUP_QOS,
KQWL_UTQ_UPDATE_STAYACTIVE_QOS,
KQWL_UTQ_RECOMPUTE_WAKEUP_QOS,
KQWL_UTQ_UNBINDING, /* attempt to rebind */
KQWL_UTQ_PARKING,
/*
* The wakeup override is for suppressed knotes that have fired again at
* a higher QoS than the one for which they are suppressed already.
* This override is cleared when the knote suppressed list becomes empty.
*/
KQWL_UTQ_UPDATE_WAKEUP_OVERRIDE,
KQWL_UTQ_RESET_WAKEUP_OVERRIDE,
/*
* The QoS is the maximum QoS of an event enqueued on this workloop in
* userland. It is copied from the only EVFILT_WORKLOOP knote with
* a NOTE_WL_THREAD_REQUEST bit set allowed on this workloop. If there is no
* such knote, this QoS is 0.
*/
KQWL_UTQ_SET_QOS_INDEX,
KQWL_UTQ_REDRIVE_EVENTS,
};
static void kqworkloop_update_threads_qos(struct kqworkloop *kqwl, int op, kq_index_t qos);
static int kqworkloop_end_processing(struct kqworkloop *kqwl, int flags, int kevent_flags);
static struct knote *knote_alloc(void);
static void knote_free(struct knote *kn);
static int kq_add_knote(struct kqueue *kq, struct knote *kn,
struct knote_lock_ctx *knlc, struct proc *p);
static struct knote *kq_find_knote_and_kq_lock(struct kqueue *kq,
struct kevent_qos_s *kev, bool is_fd, struct proc *p);
static void knote_activate(kqueue_t kqu, struct knote *kn, int result);
static void knote_dequeue(kqueue_t kqu, struct knote *kn);
static void knote_apply_touch(kqueue_t kqu, struct knote *kn,
struct kevent_qos_s *kev, int result);
static void knote_suppress(kqueue_t kqu, struct knote *kn);
static void knote_unsuppress(kqueue_t kqu, struct knote *kn);
static void knote_drop(kqueue_t kqu, struct knote *kn, struct knote_lock_ctx *knlc);
// both these functions may dequeue the knote and it is up to the caller
// to enqueue the knote back
static void knote_adjust_qos(struct kqueue *kq, struct knote *kn, int result);
static void knote_reset_priority(kqueue_t kqu, struct knote *kn, pthread_priority_t pp);
static ZONE_DECLARE(knote_zone, "knote zone",
sizeof(struct knote), ZC_CACHING | ZC_ZFREE_CLEARMEM);
static ZONE_DECLARE(kqfile_zone, "kqueue file zone",
sizeof(struct kqfile), ZC_ZFREE_CLEARMEM);
static ZONE_DECLARE(kqworkq_zone, "kqueue workq zone",
sizeof(struct kqworkq), ZC_ZFREE_CLEARMEM);
static ZONE_DECLARE(kqworkloop_zone, "kqueue workloop zone",
sizeof(struct kqworkloop), ZC_CACHING | ZC_ZFREE_CLEARMEM);
#define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask))
static int filt_no_attach(struct knote *kn, struct kevent_qos_s *kev);
static void filt_no_detach(struct knote *kn);
static int filt_bad_event(struct knote *kn, long hint);
static int filt_bad_touch(struct knote *kn, struct kevent_qos_s *kev);
static int filt_bad_process(struct knote *kn, struct kevent_qos_s *kev);
SECURITY_READ_ONLY_EARLY(static struct filterops) bad_filtops = {
.f_attach = filt_no_attach,
.f_detach = filt_no_detach,
.f_event = filt_bad_event,
.f_touch = filt_bad_touch,
.f_process = filt_bad_process,
};
#if CONFIG_MEMORYSTATUS
extern const struct filterops memorystatus_filtops;
#endif /* CONFIG_MEMORYSTATUS */
extern const struct filterops fs_filtops;
extern const struct filterops sig_filtops;
extern const struct filterops machport_filtops;
extern const struct filterops pipe_nfiltops;
extern const struct filterops pipe_rfiltops;
extern const struct filterops pipe_wfiltops;
extern const struct filterops ptsd_kqops;
extern const struct filterops ptmx_kqops;
extern const struct filterops soread_filtops;
extern const struct filterops sowrite_filtops;
extern const struct filterops sock_filtops;
extern const struct filterops soexcept_filtops;
extern const struct filterops spec_filtops;
extern const struct filterops bpfread_filtops;
extern const struct filterops necp_fd_rfiltops;
extern const struct filterops fsevent_filtops;
extern const struct filterops vnode_filtops;
extern const struct filterops tty_filtops;
const static struct filterops file_filtops;
const static struct filterops kqread_filtops;
const static struct filterops proc_filtops;
const static struct filterops timer_filtops;
const static struct filterops user_filtops;
const static struct filterops workloop_filtops;
/*
*
* Rules for adding new filters to the system:
* Public filters:
* - Add a new "EVFILT_" option value to bsd/sys/event.h (typically a negative value)
* in the exported section of the header
* - Update the EVFILT_SYSCOUNT value to reflect the new addition
* - Add a filterops to the sysfilt_ops array. Public filters should be added at the end
* of the Public Filters section in the array.
* Private filters:
* - Add a new "EVFILT_" value to bsd/sys/event.h (typically a positive value)
* in the XNU_KERNEL_PRIVATE section of the header
* - Update the EVFILTID_MAX value to reflect the new addition
* - Add a filterops to the sysfilt_ops. Private filters should be added at the end of
* the Private filters section of the array.
*/
static_assert(EVFILTID_MAX < UINT8_MAX, "kn_filtid expects this to be true");
static const struct filterops * const sysfilt_ops[EVFILTID_MAX] = {
/* Public Filters */
[~EVFILT_READ] = &file_filtops,
[~EVFILT_WRITE] = &file_filtops,
[~EVFILT_AIO] = &bad_filtops,
[~EVFILT_VNODE] = &file_filtops,
[~EVFILT_PROC] = &proc_filtops,
[~EVFILT_SIGNAL] = &sig_filtops,
[~EVFILT_TIMER] = &timer_filtops,
[~EVFILT_MACHPORT] = &machport_filtops,
[~EVFILT_FS] = &fs_filtops,
[~EVFILT_USER] = &user_filtops,
[~EVFILT_UNUSED_11] = &bad_filtops,
[~EVFILT_VM] = &bad_filtops,
[~EVFILT_SOCK] = &file_filtops,
#if CONFIG_MEMORYSTATUS
[~EVFILT_MEMORYSTATUS] = &memorystatus_filtops,
#else
[~EVFILT_MEMORYSTATUS] = &bad_filtops,
#endif
[~EVFILT_EXCEPT] = &file_filtops,
[~EVFILT_WORKLOOP] = &workloop_filtops,
/* Private filters */
[EVFILTID_KQREAD] = &kqread_filtops,
[EVFILTID_PIPE_N] = &pipe_nfiltops,
[EVFILTID_PIPE_R] = &pipe_rfiltops,
[EVFILTID_PIPE_W] = &pipe_wfiltops,
[EVFILTID_PTSD] = &ptsd_kqops,
[EVFILTID_SOREAD] = &soread_filtops,
[EVFILTID_SOWRITE] = &sowrite_filtops,
[EVFILTID_SCK] = &sock_filtops,
[EVFILTID_SOEXCEPT] = &soexcept_filtops,
[EVFILTID_SPEC] = &spec_filtops,
[EVFILTID_BPFREAD] = &bpfread_filtops,
[EVFILTID_NECP_FD] = &necp_fd_rfiltops,
[EVFILTID_FSEVENT] = &fsevent_filtops,
[EVFILTID_VN] = &vnode_filtops,
[EVFILTID_TTY] = &tty_filtops,
[EVFILTID_PTMX] = &ptmx_kqops,
/* fake filter for detached knotes, keep last */
[EVFILTID_DETACHED] = &bad_filtops,
};
/* waitq prepost callback */
void waitq_set__CALLING_PREPOST_HOOK__(waitq_set_prepost_hook_t *kq_hook);
static inline bool
kqr_thread_bound(workq_threadreq_t kqr)
{
return kqr->tr_state == WORKQ_TR_STATE_BOUND;
}
static inline bool
kqr_thread_requested_pending(workq_threadreq_t kqr)
{
workq_tr_state_t tr_state = kqr->tr_state;
return tr_state > WORKQ_TR_STATE_IDLE && tr_state < WORKQ_TR_STATE_BOUND;
}
static inline bool
kqr_thread_requested(workq_threadreq_t kqr)
{
return kqr->tr_state != WORKQ_TR_STATE_IDLE;
}
static inline thread_t
kqr_thread_fast(workq_threadreq_t kqr)
{
assert(kqr_thread_bound(kqr));
return kqr->tr_thread;
}
static inline thread_t
kqr_thread(workq_threadreq_t kqr)
{
return kqr_thread_bound(kqr) ? kqr->tr_thread : THREAD_NULL;
}
static inline struct kqworkloop *
kqr_kqworkloop(workq_threadreq_t kqr)
{
if (kqr->tr_flags & WORKQ_TR_FLAG_WORKLOOP) {
return __container_of(kqr, struct kqworkloop, kqwl_request);
}
return NULL;
}
static inline kqueue_t
kqr_kqueue(proc_t p, workq_threadreq_t kqr)
{
kqueue_t kqu;
if (kqr->tr_flags & WORKQ_TR_FLAG_WORKLOOP) {
kqu.kqwl = kqr_kqworkloop(kqr);
} else {
kqu.kqwq = p->p_fd->fd_wqkqueue;
assert(kqr >= kqu.kqwq->kqwq_request &&
kqr < kqu.kqwq->kqwq_request + KQWQ_NBUCKETS);
}
return kqu;
}
/*
* kqueue/note lock implementations
*
* The kqueue lock guards the kq state, the state of its queues,
* and the kqueue-aware status and locks of individual knotes.
*
* The kqueue workq lock is used to protect state guarding the
* interaction of the kqueue with the workq. This state cannot
* be guarded by the kq lock - as it needs to be taken when we
* already have the waitq set lock held (during the waitq hook
* callback). It might be better to use the waitq lock itself
* for this, but the IRQ requirements make that difficult).
*
* Knote flags, filter flags, and associated data are protected
* by the underlying object lock - and are only ever looked at
* by calling the filter to get a [consistent] snapshot of that
* data.
*/
static inline void
kqlock(kqueue_t kqu)
{
lck_spin_lock(&kqu.kq->kq_lock);
}
static inline void
kqlock_held(__assert_only kqueue_t kqu)
{
LCK_SPIN_ASSERT(&kqu.kq->kq_lock, LCK_ASSERT_OWNED);
}
static inline void
kqunlock(kqueue_t kqu)
{
lck_spin_unlock(&kqu.kq->kq_lock);
}
static inline void
knhash_lock(struct filedesc *fdp)
{
lck_mtx_lock(&fdp->fd_knhashlock);
}
static inline void
knhash_unlock(struct filedesc *fdp)
{
lck_mtx_unlock(&fdp->fd_knhashlock);
}
/* wait event for knote locks */
static inline event_t
knote_lock_wev(struct knote *kn)
{
return (event_t)(&kn->kn_hook);
}
/* wait event for kevent_register_wait_* */
static inline event64_t
knote_filt_wev64(struct knote *kn)
{
/* kdp_workloop_sync_wait_find_owner knows about this */
return CAST_EVENT64_T(kn);
}
/* wait event for knote_post/knote_drop */
static inline event64_t
knote_post_wev64(struct knote *kn)
{
return CAST_EVENT64_T(&kn->kn_kevent);
}
/*!
* @function knote_has_qos
*
* @brief
* Whether the knote has a regular QoS.
*
* @discussion
* kn_qos_override is:
* - 0 on kqfiles
* - THREAD_QOS_LAST for special buckets (stayactive, manager)
*
* Other values mean the knote participates to QoS propagation.
*/
static inline bool
knote_has_qos(struct knote *kn)
{
return kn->kn_qos_override > 0 && kn->kn_qos_override < THREAD_QOS_LAST;
}
#pragma mark knote locks
/*
* Enum used by the knote_lock_* functions.
*
* KNOTE_KQ_LOCK_ALWAYS
* The function will always return with the kq lock held.
*
* KNOTE_KQ_LOCK_ON_SUCCESS
* The function will return with the kq lock held if it was successful
* (knote_lock() is the only function that can fail).
*
* KNOTE_KQ_LOCK_ON_FAILURE
* The function will return with the kq lock held if it was unsuccessful
* (knote_lock() is the only function that can fail).
*
* KNOTE_KQ_UNLOCK:
* The function returns with the kq unlocked.
*/
enum kqlocking {
KNOTE_KQ_LOCK_ALWAYS,
KNOTE_KQ_LOCK_ON_SUCCESS,
KNOTE_KQ_LOCK_ON_FAILURE,
KNOTE_KQ_UNLOCK,
};
static struct knote_lock_ctx *
knote_lock_ctx_find(kqueue_t kqu, struct knote *kn)
{
struct knote_lock_ctx *ctx;
LIST_FOREACH(ctx, &kqu.kq->kq_knlocks, knlc_link) {
if (ctx->knlc_knote == kn) {
return ctx;
}
}
panic("knote lock context not found: %p", kn);
__builtin_trap();
}
/* slowpath of knote_lock() */
__attribute__((noinline))
static bool __result_use_check
knote_lock_slow(kqueue_t kqu, struct knote *kn,
struct knote_lock_ctx *knlc, int kqlocking)
{
struct knote_lock_ctx *owner_lc;
struct uthread *uth = current_uthread();
wait_result_t wr;
kqlock_held(kqu);
owner_lc = knote_lock_ctx_find(kqu, kn);
#if DEBUG || DEVELOPMENT
knlc->knlc_state = KNOTE_LOCK_CTX_WAITING;
#endif
owner_lc->knlc_waiters++;
/*
* Make our lock context visible to knote_unlock()
*/
uth->uu_knlock = knlc;
wr = lck_spin_sleep_with_inheritor(&kqu.kq->kq_lock, LCK_SLEEP_UNLOCK,
knote_lock_wev(kn), owner_lc->knlc_thread,
THREAD_UNINT | THREAD_WAIT_NOREPORT, TIMEOUT_WAIT_FOREVER);
if (wr == THREAD_RESTART) {
/*
* We haven't been woken up by knote_unlock() but knote_unlock_cancel.
* We need to cleanup the state since no one did.
*/
uth->uu_knlock = NULL;
#if DEBUG || DEVELOPMENT
assert(knlc->knlc_state == KNOTE_LOCK_CTX_WAITING);
knlc->knlc_state = KNOTE_LOCK_CTX_UNLOCKED;
#endif
if (kqlocking == KNOTE_KQ_LOCK_ALWAYS ||
kqlocking == KNOTE_KQ_LOCK_ON_FAILURE) {
kqlock(kqu);
}
return false;
} else {
if (kqlocking == KNOTE_KQ_LOCK_ALWAYS ||
kqlocking == KNOTE_KQ_LOCK_ON_SUCCESS) {
kqlock(kqu);
#if DEBUG || DEVELOPMENT
/*
* This state is set under the lock so we can't
* really assert this unless we hold the lock.
*/
assert(knlc->knlc_state == KNOTE_LOCK_CTX_LOCKED);
#endif
}
return true;
}
}
/*
* Attempts to take the "knote" lock.
*
* Called with the kqueue lock held.
*
* Returns true if the knote lock is acquired, false if it has been dropped
*/
static bool __result_use_check
knote_lock(kqueue_t kqu, struct knote *kn, struct knote_lock_ctx *knlc,
enum kqlocking kqlocking)
{
kqlock_held(kqu);
#if DEBUG || DEVELOPMENT
assert(knlc->knlc_state == KNOTE_LOCK_CTX_UNLOCKED);
#endif
knlc->knlc_knote = kn;
knlc->knlc_thread = current_thread();
knlc->knlc_waiters = 0;
if (__improbable(kn->kn_status & KN_LOCKED)) {
return knote_lock_slow(kqu, kn, knlc, kqlocking);
}
/*
* When the knote will be dropped, the knote lock is taken before
* KN_DROPPING is set, and then the knote will be removed from any
* hash table that references it before the lock is canceled.
*/
assert((kn->kn_status & KN_DROPPING) == 0);
LIST_INSERT_HEAD(&kqu.kq->kq_knlocks, knlc, knlc_link);
kn->kn_status |= KN_LOCKED;
#if DEBUG || DEVELOPMENT
knlc->knlc_state = KNOTE_LOCK_CTX_LOCKED;
#endif
if (kqlocking == KNOTE_KQ_UNLOCK ||
kqlocking == KNOTE_KQ_LOCK_ON_FAILURE) {
kqunlock(kqu);
}
return true;
}
/*
* Unlocks a knote successfully locked with knote_lock().
*
* Called with the kqueue lock held.
*
* Returns with the kqueue lock held according to KNOTE_KQ_* mode.
*/
static void
knote_unlock(kqueue_t kqu, struct knote *kn,
struct knote_lock_ctx *knlc, enum kqlocking kqlocking)
{
kqlock_held(kqu);
assert(knlc->knlc_knote == kn);
assert(kn->kn_status & KN_LOCKED);
#if DEBUG || DEVELOPMENT
assert(knlc->knlc_state == KNOTE_LOCK_CTX_LOCKED);
#endif
LIST_REMOVE(knlc, knlc_link);
if (knlc->knlc_waiters) {
thread_t thread = THREAD_NULL;
wakeup_one_with_inheritor(knote_lock_wev(kn), THREAD_AWAKENED,
LCK_WAKE_DEFAULT, &thread);
/*
* knote_lock_slow() publishes the lock context of waiters
* in uthread::uu_knlock.
*
* Reach out and make this context the new owner.
*/
struct uthread *ut = get_bsdthread_info(thread);
struct knote_lock_ctx *next_owner_lc = ut->uu_knlock;
assert(next_owner_lc->knlc_knote == kn);
next_owner_lc->knlc_waiters = knlc->knlc_waiters - 1;
LIST_INSERT_HEAD(&kqu.kq->kq_knlocks, next_owner_lc, knlc_link);
#if DEBUG || DEVELOPMENT
next_owner_lc->knlc_state = KNOTE_LOCK_CTX_LOCKED;
#endif
ut->uu_knlock = NULL;
thread_deallocate_safe(thread);
} else {
kn->kn_status &= ~KN_LOCKED;
}
if ((kn->kn_status & KN_MERGE_QOS) && !(kn->kn_status & KN_POSTING)) {
/*
* No f_event() in flight anymore, we can leave QoS "Merge" mode
*
* See knote_adjust_qos()
*/
kn->kn_status &= ~KN_MERGE_QOS;
}
if (kqlocking == KNOTE_KQ_UNLOCK) {
kqunlock(kqu);
}
#if DEBUG || DEVELOPMENT
knlc->knlc_state = KNOTE_LOCK_CTX_UNLOCKED;
#endif
}
/*
* Aborts all waiters for a knote lock, and unlock the knote.
*
* Called with the kqueue lock held.
*
* Returns with the kqueue unlocked.
*/
static void
knote_unlock_cancel(struct kqueue *kq, struct knote *kn,
struct knote_lock_ctx *knlc)
{
kqlock_held(kq);
assert(knlc->knlc_knote == kn);
assert(kn->kn_status & KN_LOCKED);
assert(kn->kn_status & KN_DROPPING);
LIST_REMOVE(knlc, knlc_link);
kn->kn_status &= ~KN_LOCKED;
kqunlock(kq);
if (knlc->knlc_waiters) {
wakeup_all_with_inheritor(knote_lock_wev(kn), THREAD_RESTART);
}
#if DEBUG || DEVELOPMENT
knlc->knlc_state = KNOTE_LOCK_CTX_UNLOCKED;
#endif
}
/*
* Call the f_event hook of a given filter.
*
* Takes a use count to protect against concurrent drops.
*/
static void
knote_post(struct knote *kn, long hint)
{
struct kqueue *kq = knote_get_kq(kn);
int dropping, result;
kqlock(kq);
if (__improbable(kn->kn_status & (KN_DROPPING | KN_VANISHED))) {
return kqunlock(kq);
}
if (__improbable(kn->kn_status & KN_POSTING)) {
panic("KNOTE() called concurrently on knote %p", kn);
}
kn->kn_status |= KN_POSTING;
kqunlock(kq);
result = filter_call(knote_fops(kn), f_event(kn, hint));
kqlock(kq);
dropping = (kn->kn_status & KN_DROPPING);
if (!dropping && (result & FILTER_ACTIVE)) {
knote_activate(kq, kn, result);
}
if ((kn->kn_status & KN_LOCKED) == 0) {
/*
* There's no other f_* call in flight, we can leave QoS "Merge" mode.
*
* See knote_adjust_qos()
*/
kn->kn_status &= ~(KN_POSTING | KN_MERGE_QOS);
} else {
kn->kn_status &= ~KN_POSTING;
}
if (__improbable(dropping)) {
waitq_wakeup64_all((struct waitq *)&kq->kq_wqs, knote_post_wev64(kn),
THREAD_AWAKENED, WAITQ_ALL_PRIORITIES);
}
kqunlock(kq);
}
/*
* Called by knote_drop() to wait for the last f_event() caller to be done.
*
* - kq locked at entry
* - kq unlocked at exit
*/
static void
knote_wait_for_post(struct kqueue *kq, struct knote *kn)
{
wait_result_t wr = THREAD_NOT_WAITING;
kqlock_held(kq);
assert(kn->kn_status & KN_DROPPING);
if (kn->kn_status & KN_POSTING) {
wr = waitq_assert_wait64((struct waitq *)&kq->kq_wqs,
knote_post_wev64(kn), THREAD_UNINT | THREAD_WAIT_NOREPORT,
TIMEOUT_WAIT_FOREVER);
}
kqunlock(kq);
if (wr == THREAD_WAITING) {
thread_block(THREAD_CONTINUE_NULL);
}
}
#pragma mark knote helpers for filters
OS_ALWAYS_INLINE
void
knote_set_error(struct knote *kn, int error)
{
kn->kn_flags |= EV_ERROR;
kn->kn_sdata = error;
}
OS_ALWAYS_INLINE
int64_t
knote_low_watermark(const struct knote *kn)
{
return (kn->kn_sfflags & NOTE_LOWAT) ? kn->kn_sdata : 1;
}
/*!
* @function knote_fill_kevent_with_sdata
*
* @brief
* Fills in a kevent from the current content of a knote.
*
* @discussion
* This is meant to be called from filter's f_event hooks.
* The kevent data is filled with kn->kn_sdata.
*
* kn->kn_fflags is cleared if kn->kn_flags has EV_CLEAR set.
*
* Using knote_fill_kevent is typically preferred.
*/
OS_ALWAYS_INLINE
void
knote_fill_kevent_with_sdata(struct knote *kn, struct kevent_qos_s *kev)
{
#define knote_assert_aliases(name1, offs1, name2) \
static_assert(offsetof(struct kevent_qos_s, name1) + offs1 == \
offsetof(struct kevent_internal_s, name2), \
"kevent_qos_s::" #name1 " and kevent_internal_s::" #name2 "need to alias")
/*
* All the code makes assumptions on these aliasing,
* so make sure we fail the build if we ever ever ever break them.
*/
knote_assert_aliases(ident, 0, kei_ident);
#ifdef __LITTLE_ENDIAN__
knote_assert_aliases(filter, 0, kei_filter); // non trivial overlap
knote_assert_aliases(filter, 1, kei_filtid); // non trivial overlap
#else
knote_assert_aliases(filter, 0, kei_filtid); // non trivial overlap
knote_assert_aliases(filter, 1, kei_filter); // non trivial overlap
#endif
knote_assert_aliases(flags, 0, kei_flags);
knote_assert_aliases(qos, 0, kei_qos);
knote_assert_aliases(udata, 0, kei_udata);
knote_assert_aliases(fflags, 0, kei_fflags);
knote_assert_aliases(xflags, 0, kei_sfflags); // non trivial overlap
knote_assert_aliases(data, 0, kei_sdata); // non trivial overlap
knote_assert_aliases(ext, 0, kei_ext);
#undef knote_assert_aliases
/*
* Fix the differences between kevent_qos_s and kevent_internal_s:
* - xflags is where kn_sfflags lives, we need to zero it
* - fixup the high bits of `filter` where kn_filtid lives
*/
*kev = *(struct kevent_qos_s *)&kn->kn_kevent;
kev->xflags = 0;
kev->filter |= 0xff00;
if (kn->kn_flags & EV_CLEAR) {
kn->kn_fflags = 0;
}
}
/*!
* @function knote_fill_kevent
*
* @brief
* Fills in a kevent from the current content of a knote.
*
* @discussion
* This is meant to be called from filter's f_event hooks.
* The kevent data is filled with the passed in data.
*
* kn->kn_fflags is cleared if kn->kn_flags has EV_CLEAR set.
*/
OS_ALWAYS_INLINE
void
knote_fill_kevent(struct knote *kn, struct kevent_qos_s *kev, int64_t data)
{
knote_fill_kevent_with_sdata(kn, kev);
kev->filter = kn->kn_filter;
kev->data = data;
}
#pragma mark file_filtops
static int
filt_fileattach(struct knote *kn, struct kevent_qos_s *kev)
{
return fo_kqfilter(kn->kn_fp, kn, kev);
}
SECURITY_READ_ONLY_EARLY(static struct filterops) file_filtops = {
.f_isfd = 1,
.f_attach = filt_fileattach,
};
#pragma mark kqread_filtops
#define f_flag fp_glob->fg_flag
#define f_ops fp_glob->fg_ops
#define f_data fp_glob->fg_data
#define f_lflags fp_glob->fg_lflags
static void
filt_kqdetach(struct knote *kn)
{
struct kqfile *kqf = (struct kqfile *)kn->kn_fp->f_data;
struct kqueue *kq = &kqf->kqf_kqueue;
kqlock(kq);
KNOTE_DETACH(&kqf->kqf_sel.si_note, kn);
kqunlock(kq);
}
static int
filt_kqueue(struct knote *kn, __unused long hint)
{
struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
return kq->kq_count > 0;
}
static int
filt_kqtouch(struct knote *kn, struct kevent_qos_s *kev)
{
#pragma unused(kev)
struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
int res;
kqlock(kq);
res = (kq->kq_count > 0);
kqunlock(kq);
return res;
}
static int
filt_kqprocess(struct knote *kn, struct kevent_qos_s *kev)
{
struct kqueue *kq = (struct kqueue *)kn->kn_fp->f_data;
int res = 0;
kqlock(kq);
if (kq->kq_count) {
knote_fill_kevent(kn, kev, kq->kq_count);
res = 1;
}
kqunlock(kq);
return res;
}
SECURITY_READ_ONLY_EARLY(static struct filterops) kqread_filtops = {
.f_isfd = 1,
.f_detach = filt_kqdetach,
.f_event = filt_kqueue,
.f_touch = filt_kqtouch,
.f_process = filt_kqprocess,
};
#pragma mark proc_filtops
static int
filt_procattach(struct knote *kn, __unused struct kevent_qos_s *kev)
{
struct proc *p;
assert(PID_MAX < NOTE_PDATAMASK);
if ((kn->kn_sfflags & (NOTE_TRACK | NOTE_TRACKERR | NOTE_CHILD)) != 0) {
knote_set_error(kn, ENOTSUP);
return 0;