-
Notifications
You must be signed in to change notification settings - Fork 392
/
tracee.bpf.c
4171 lines (3429 loc) · 134 KB
/
tracee.bpf.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// +build ignore
// ^^ this is a golang build tag meant to exclude this C file from compilation by the CGO compiler
/*
Note: This file is licenced differently from the rest of the project
SPDX-License-Identifier: GPL-2.0
Copyright (C) Aqua Security inc.
*/
#ifndef CORE
/* In Linux 5.4 asm_inline was introduced, but it's not supported by clang.
* Redefine it to just asm to enable successful compilation.
* see https://github.com/iovisor/bcc/commit/2d1497cde1cc9835f759a707b42dea83bee378b8 for more details
* Note: types.h should be included before defining asm_inline or compilation might break
*/
#include <linux/types.h>
#ifdef asm_inline
#undef asm_inline
#define asm_inline asm
#endif
#include <uapi/linux/ptrace.h>
#include <uapi/linux/in.h>
#include <uapi/linux/in6.h>
#include <uapi/linux/uio.h>
#include <uapi/linux/un.h>
#include <uapi/linux/utsname.h>
#include <linux/binfmts.h>
#include <linux/cred.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/mm_types.h>
#include <linux/mount.h>
#include <linux/nsproxy.h>
#include <linux/ns_common.h>
#include <linux/pid_namespace.h>
#include <linux/ipc_namespace.h>
#include <net/net_namespace.h>
#include <linux/utsname.h>
#include <linux/cgroup.h>
#include <linux/security.h>
#include <linux/socket.h>
#include <linux/version.h>
#define KBUILD_MODNAME "tracee"
#include <net/sock.h>
#include <net/inet_sock.h>
#include <net/ipv6.h>
#include <net/tcp_states.h>
#include <linux/ipv6.h>
#include <uapi/linux/bpf.h>
#include <linux/bpf.h>
#include <linux/kconfig.h>
#include <linux/version.h>
#include <linux/if_ether.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/pkt_cls.h>
#include <linux/tcp.h>
#else
//CO:RE is enabled
#include <vmlinux.h>
#include "co_re_missing_definitions.h"
#endif
#undef container_of
#include <bpf_core_read.h>
#include <bpf_helpers.h>
#include <bpf_tracing.h>
#include <bpf_endian.h>
#if defined(bpf_target_x86)
#define PT_REGS_PARM6(ctx) ((ctx)->r9)
#elif defined(bpf_target_arm64)
#define PT_REGS_PARM6(x) (((PT_REGS_ARM64 *)(x))->regs[5])
#endif
#ifdef CORE
extern bool CONFIG_ARCH_HAS_SYSCALL_WRAPPER __kconfig;
#endif
#define MAX_PERCPU_BUFSIZE (1 << 15) // This value is actually set by the kernel as an upper bound
#define MAX_STRING_SIZE 4096 // Choosing this value to be the same as PATH_MAX
#define MAX_BYTES_ARR_SIZE 4096 // Max size of bytes array, arbitrarily chosen
#define MAX_STACK_ADDRESSES 1024 // Max amount of different stack trace addresses to buffer in the Map
#define MAX_STACK_DEPTH 20 // Max depth of each stack trace to track
#define MAX_STR_FILTER_SIZE 16 // Max string filter size should be bounded to the size of the compared values (comm, uts)
#define FILE_MAGIC_HDR_SIZE 32 // Number of bytes to save from a file's header (for magic_write event)
#define FILE_MAGIC_MASK 31 // Mask used to pass verifier when submitting magic_write event bytes
#define SUBMIT_BUF_IDX 0
#define STRING_BUF_IDX 1
#define FILE_BUF_IDX 2
#define MAX_BUFFERS 3
#define SEND_VFS_WRITE 1
#define SEND_MPROTECT 2
#define SEND_META_SIZE 20
#define ALERT_MMAP_W_X 1
#define ALERT_MPROT_X_ADD 2
#define ALERT_MPROT_W_ADD 3
#define ALERT_MPROT_W_REM 4
#define TAIL_VFS_WRITE 0
#define TAIL_VFS_WRITEV 1
#define TAIL_SEND_BIN 2
#define MAX_TAIL_CALL 3
#define NONE_T 0UL
#define INT_T 1UL
#define UINT_T 2UL
#define LONG_T 3UL
#define ULONG_T 4UL
#define OFF_T_T 5UL
#define MODE_T_T 6UL
#define DEV_T_T 7UL
#define SIZE_T_T 8UL
#define POINTER_T 9UL
#define STR_T 10UL
#define STR_ARR_T 11UL
#define SOCKADDR_T 12UL
#define ALERT_T 13UL
#define BYTES_T 14UL
#define U16_T 15UL
#define CRED_T 16UL
#define TYPE_MAX 255UL
#define TAG_NONE 0UL
#if defined(bpf_target_x86)
#define SYS_OPEN 2
#define SYS_MMAP 9
#define SYS_MPROTECT 10
#define SYS_RT_SIGRETURN 15
#define SYS_EXECVE 59
#define SYS_EXIT 60
#define SYS_EXIT_GROUP 231
#define SYS_OPENAT 257
#define SYS_EXECVEAT 322
#define SYSCALL_CONNECT 42
#define SYSCALL_ACCEPT 43
#define SYSCALL_ACCEPT4 288
#define SYSCALL_LISTEN 50
#define SYSCALL_BIND 49
#elif defined(bpf_target_arm64)
#define SYS_OPEN 1000 // undefined in arm64
#define SYS_MMAP 222
#define SYS_MPROTECT 226
#define SYS_RT_SIGRETURN 139
#define SYS_EXECVE 221
#define SYS_EXIT 93
#define SYS_EXIT_GROUP 94
#define SYS_OPENAT 56
#define SYS_EXECVEAT 281
#define SYSCALL_CONNECT 203
#define SYSCALL_ACCEPT 202
#define SYSCALL_ACCEPT4 242
#define SYSCALL_LISTEN 201
#define SYSCALL_BIND 200
#endif
#define RAW_SYS_ENTER 1000
#define RAW_SYS_EXIT 1001
#define SCHED_PROCESS_FORK 1002
#define SCHED_PROCESS_EXEC 1003
#define SCHED_PROCESS_EXIT 1004
#define DO_EXIT 1005
#define CAP_CAPABLE 1006
#define VFS_WRITE 1007
#define VFS_WRITEV 1008
#define MEM_PROT_ALERT 1009
#define COMMIT_CREDS 1010
#define SWITCH_TASK_NS 1011
#define MAGIC_WRITE 1012
#define CGROUP_ATTACH_TASK 1013
#define SECURITY_BPRM_CHECK 1014
#define SECURITY_FILE_OPEN 1015
#define SECURITY_INODE_UNLINK 1016
#define SECURITY_SOCKET_CREATE 1017
#define SECURITY_SOCKET_LISTEN 1018
#define SECURITY_SOCKET_CONNECT 1019
#define SECURITY_SOCKET_ACCEPT 1020
#define SECURITY_SOCKET_BIND 1021
#define SECURITY_SB_MOUNT 1022
#define SECURITY_BPF 1023
#define SECURITY_BPF_MAP 1024
#define SECURITY_KERNEL_READ_FILE 1025
#define MAX_EVENT_ID 1026
#define NET_PACKET 0
#define DEBUG_NET_SECURITY_BIND 1
#define DEBUG_NET_UDP_SENDMSG 2
#define DEBUG_NET_UDP_DISCONNECT 3
#define DEBUG_NET_UDP_DESTROY_SOCK 4
#define DEBUG_NET_UDPV6_DESTROY_SOCK 5
#define DEBUG_NET_INET_SOCK_SET_STATE 6
#define DEBUG_NET_TCP_CONNECT 7
#define CONFIG_SHOW_SYSCALL 1
#define CONFIG_EXEC_ENV 2
#define CONFIG_CAPTURE_FILES 3
#define CONFIG_EXTRACT_DYN_CODE 4
#define CONFIG_TRACEE_PID 5
#define CONFIG_CAPTURE_STACK_TRACES 6
#define CONFIG_UID_FILTER 7
#define CONFIG_MNT_NS_FILTER 8
#define CONFIG_PID_NS_FILTER 9
#define CONFIG_UTS_NS_FILTER 10
#define CONFIG_COMM_FILTER 11
#define CONFIG_PID_FILTER 12
#define CONFIG_CONT_FILTER 13
#define CONFIG_FOLLOW_FILTER 14
#define CONFIG_NEW_PID_FILTER 15
#define CONFIG_NEW_CONT_FILTER 16
#define CONFIG_DEBUG_NET 17
// get_config(CONFIG_XXX_FILTER) returns 0 if not enabled
#define FILTER_IN 1
#define FILTER_OUT 2
#define UID_LESS 0
#define UID_GREATER 1
#define PID_LESS 2
#define PID_GREATER 3
#define MNTNS_LESS 4
#define MNTNS_GREATER 5
#define PIDNS_LESS 6
#define PIDNS_GREATER 7
#define LESS_NOT_SET 0
#define GREATER_NOT_SET ULLONG_MAX
#define DEV_NULL_STR 0
#define CONT_ID_LEN 12
#ifndef CORE
#if LINUX_VERSION_CODE < KERNEL_VERSION(5, 2, 0)
// Use lower values on older kernels, where the instruction limit is 4096
#define MAX_STR_ARR_ELEM 40
#define MAX_ARGS_STR_ARR_ELEM 15
#define MAX_PATH_PREF_SIZE 64
#define MAX_PATH_COMPONENTS 20
#define MAX_BIN_CHUNKS 110
#else
// Otherwise, the sky is the limit (complexity limit of 1 million verified instructions)
#define MAX_STR_ARR_ELEM 128
#define MAX_ARGS_STR_ARR_ELEM 128
#define MAX_PATH_PREF_SIZE 128
#define MAX_PATH_COMPONENTS 48
#define MAX_BIN_CHUNKS 256
#endif
#else
// XXX: In the future, these values will be global volatile constants that
// can be set at runtime from userspace go code. This way we can dynamically
// set them based on kernel version. libbpfgo needs this feature first.
// For now setting the lower limit is the safest option.
#define MAX_STR_ARR_ELEM 40
#define MAX_ARGS_STR_ARR_ELEM 15
#define MAX_PATH_PREF_SIZE 64
#define MAX_PATH_COMPONENTS 20
#define MAX_BIN_CHUNKS 110
#endif
#ifndef CORE
#define READ_KERN(ptr) ({ typeof(ptr) _val; \
__builtin_memset(&_val, 0, sizeof(_val)); \
bpf_probe_read(&_val, sizeof(_val), &ptr); \
_val; \
})
#else
// Try using READ_KERN here, just don't embed them in each other
#define READ_KERN(ptr) ({ typeof(ptr) _val; \
__builtin_memset(&_val, 0, sizeof(_val)); \
bpf_core_read(&_val, sizeof(_val), &ptr); \
_val; \
})
#endif
#define BPF_MAP(_name, _type, _key_type, _value_type, _max_entries) \
struct bpf_map_def SEC("maps") _name = { \
.type = _type, \
.key_size = sizeof(_key_type), \
.value_size = sizeof(_value_type), \
.max_entries = _max_entries, \
};
#define BPF_HASH(_name, _key_type, _value_type) \
BPF_MAP(_name, BPF_MAP_TYPE_HASH, _key_type, _value_type, 10240);
#define BPF_LRU_HASH(_name, _key_type, _value_type) \
BPF_MAP(_name, BPF_MAP_TYPE_LRU_HASH, _key_type, _value_type, 10240);
#define BPF_ARRAY(_name, _value_type, _max_entries) \
BPF_MAP(_name, BPF_MAP_TYPE_ARRAY, u32, _value_type, _max_entries);
#define BPF_PERCPU_ARRAY(_name, _value_type, _max_entries) \
BPF_MAP(_name, BPF_MAP_TYPE_PERCPU_ARRAY, u32, _value_type, _max_entries);
#define BPF_PROG_ARRAY(_name, _max_entries) \
BPF_MAP(_name, BPF_MAP_TYPE_PROG_ARRAY, u32, u32, _max_entries);
#define BPF_PERF_OUTPUT(_name) \
BPF_MAP(_name, BPF_MAP_TYPE_PERF_EVENT_ARRAY, int, __u32, 1024);
// Stack Traces are slightly different
// in that the value is 1 big byte array
// of the stack addresses
#define BPF_STACK_TRACE(_name, _max_entries) \
struct bpf_map_def SEC("maps") _name = { \
.type = BPF_MAP_TYPE_STACK_TRACE, \
.key_size = sizeof(u32), \
.value_size = sizeof(size_t) * MAX_STACK_DEPTH, \
.max_entries = _max_entries, \
};
#ifdef RHEL_RELEASE_CODE
#if (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(8, 0))
#define RHEL_RELEASE_GT_8_0
#endif
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 18, 0)
#error Minimal required kernel version is 4.18
#endif
/*=============================== INTERNAL STRUCTS ===========================*/
typedef struct event_context {
u64 ts; // Timestamp
u32 pid; // PID as in the userspace term
u32 tid; // TID as in the userspace term
u32 ppid; // Parent PID as in the userspace term
u32 host_pid; // PID in host pid namespace
u32 host_tid; // TID in host pid namespace
u32 host_ppid; // Parent PID in host pid namespace
u32 uid;
u32 mnt_id;
u32 pid_id;
char comm[TASK_COMM_LEN];
char uts_name[TASK_COMM_LEN];
char cont_id[16]; // Container ID, padding to 16 to keep the context struct aligned
u32 eventid;
s64 retval;
u32 stack_id;
u8 argnum;
} context_t;
typedef struct args {
unsigned long args[7]; // the last element of this array is used to save the function entry timestamp
} args_t;
typedef struct bin_args {
u8 type;
u8 metadata[SEND_META_SIZE];
char *ptr;
loff_t start_off;
unsigned int full_size;
u8 iov_idx;
u8 iov_len;
struct iovec *vec;
} bin_args_t;
typedef struct simple_buf {
u8 buf[MAX_PERCPU_BUFSIZE];
} buf_t;
typedef struct path_filter {
char path[MAX_PATH_PREF_SIZE];
} path_filter_t;
typedef struct string_filter {
char str[MAX_STR_FILTER_SIZE];
} string_filter_t;
typedef struct container_id {
char id[CONT_ID_LEN+1];
} container_id_t;
typedef struct alert {
u64 ts; // Timestamp
u32 msg; // Encoded message
u8 payload; // Non zero if payload is sent to userspace
} alert_t;
// For a good summary about capabilities, see https://lwn.net/Articles/636533/
typedef struct slim_cred {
uid_t uid; /* real UID of the task */
gid_t gid; /* real GID of the task */
uid_t suid; /* saved UID of the task */
gid_t sgid; /* saved GID of the task */
uid_t euid; /* effective UID of the task */
gid_t egid; /* effective GID of the task */
uid_t fsuid; /* UID for VFS ops */
gid_t fsgid; /* GID for VFS ops */
u64 cap_inheritable; /* caps our children can inherit */
u64 cap_permitted; /* caps we're permitted */
u64 cap_effective; /* caps we can actually use */
u64 cap_bset; /* capability bounding set */
u64 cap_ambient; /* Ambient capability set */
} slim_cred_t;
typedef struct network_connection_v4 {
u32 local_address;
u16 local_port;
u32 remote_address;
u16 remote_port;
} net_conn_v4_t;
typedef struct network_connection_v6 {
struct in6_addr local_address;
u16 local_port;
struct in6_addr remote_address;
u16 remote_port;
u32 flowinfo;
u32 scope_id;
} net_conn_v6_t;
typedef struct local_net_id {
struct in6_addr address;
u16 port;
u16 protocol;
} local_net_id_t;
typedef struct net_packet {
uint64_t ts;
u32 event_id;
u32 host_tid;
char comm[TASK_COMM_LEN];
u32 len;
struct in6_addr src_addr, dst_addr;
__be16 src_port, dst_port;
u8 protocol;
} net_packet_t;
typedef struct net_debug {
uint64_t ts;
u32 event_id;
u32 host_tid;
char comm[TASK_COMM_LEN];
struct in6_addr local_addr, remote_addr;
__be16 local_port, remote_port;
u8 protocol;
int old_state;
int new_state;
u64 sk_ptr;
} net_debug_t;
typedef struct net_ctx {
u32 host_tid;
char comm[TASK_COMM_LEN];
} net_ctx_t;
typedef struct net_ctx_ext {
u32 host_tid;
char comm[TASK_COMM_LEN];
__be16 local_port;
} net_ctx_ext_t;
/*================================ KERNEL STRUCTS =============================*/
#ifndef CORE
struct mnt_namespace {
atomic_t count;
struct ns_common ns;
// ...
};
struct mount {
struct hlist_node mnt_hash;
struct mount *mnt_parent;
struct dentry *mnt_mountpoint;
struct vfsmount mnt;
// ...
};
#endif
/*=================================== MAPS =====================================*/
BPF_HASH(config_map, u32, u32); // Various configurations
BPF_HASH(chosen_events_map, u32, u32); // Events chosen by the user
BPF_HASH(traced_pids_map, u32, u32); // Keep track of traced pids
BPF_HASH(new_pids_map, u32, u32); // Keep track of the processes of newly executed binaries
BPF_HASH(new_pidns_map, u32, u32); // Keep track of new pid namespaces
BPF_HASH(pid_to_cont_id_map, u32, container_id_t); // Map pid to container id
BPF_HASH(args_map, u64, args_t); // Persist args info between function entry and return
BPF_HASH(ret_map, u64, u64); // Persist return value to be used in tail calls
BPF_HASH(inequality_filter, u32, u64); // Used to filter events by some uint field either by < or >
BPF_HASH(uid_filter, u32, u32); // Used to filter events by UID, for specific UIDs either by == or !=
BPF_HASH(pid_filter, u32, u32); // Used to filter events by PID
BPF_HASH(mnt_ns_filter, u64, u32); // Used to filter events by mount namespace id
BPF_HASH(pid_ns_filter, u64, u32); // Used to filter events by pid namespace id
BPF_HASH(uts_ns_filter, string_filter_t, u32); // Used to filter events by uts namespace name
BPF_HASH(comm_filter, string_filter_t, u32); // Used to filter events by command name
BPF_HASH(bin_args_map, u64, bin_args_t); // Persist args for send_bin funtion
BPF_HASH(sys_32_to_64_map, u32, u32); // Map 32bit syscalls numbers to 64bit syscalls numbers
BPF_HASH(params_types_map, u32, u64); // Encoded parameters types for event
BPF_HASH(params_names_map, u32, u64); // Encoded parameters names for event
BPF_HASH(sockfd_map, u32, u32); // Persist sockfd from syscalls to be used in the corresponding lsm hooks
BPF_LRU_HASH(sock_ctx_map, u64, net_ctx_ext_t); // Socket address to process context
BPF_LRU_HASH(network_map, local_net_id_t, net_ctx_t); // Network identifier to process context
BPF_ARRAY(file_filter, path_filter_t, 3); // Used to filter vfs_write events
BPF_ARRAY(string_store, path_filter_t, 1); // Store strings from userspace
BPF_PERCPU_ARRAY(bufs, buf_t, MAX_BUFFERS); // Percpu global buffer variables
BPF_PERCPU_ARRAY(bufs_off, u32, MAX_BUFFERS); // Holds offsets to bufs respectively
BPF_PROG_ARRAY(prog_array, MAX_TAIL_CALL); // Used to store programs for tail calls
BPF_PROG_ARRAY(sys_enter_tails, MAX_EVENT_ID); // Used to store programs for tail calls
BPF_PROG_ARRAY(sys_exit_tails, MAX_EVENT_ID); // Used to store programs for tail calls
BPF_STACK_TRACE(stack_addresses, MAX_STACK_ADDRESSES); // Used to store stack traces
/*================================== EVENTS ====================================*/
BPF_PERF_OUTPUT(events); // Events submission
BPF_PERF_OUTPUT(file_writes); // File writes events submission
BPF_PERF_OUTPUT(net_events); // Network events submission
/*================== KERNEL VERSION DEPENDANT HELPER FUNCTIONS =================*/
static __always_inline u32 get_mnt_ns_id(struct nsproxy *ns)
{
struct mnt_namespace* mntns = READ_KERN(ns->mnt_ns);
return READ_KERN(mntns->ns.inum);
}
static __always_inline u32 get_pid_ns_id(struct nsproxy *ns)
{
struct pid_namespace* pidns = READ_KERN(ns->pid_ns_for_children);
return READ_KERN(pidns->ns.inum);
}
static __always_inline u32 get_uts_ns_id(struct nsproxy *ns)
{
struct uts_namespace* uts_ns = READ_KERN(ns->uts_ns);
return READ_KERN(uts_ns->ns.inum);
}
static __always_inline u32 get_ipc_ns_id(struct nsproxy *ns)
{
struct ipc_namespace* ipc_ns = READ_KERN(ns->ipc_ns);
return READ_KERN(ipc_ns->ns.inum);
}
static __always_inline u32 get_net_ns_id(struct nsproxy *ns)
{
struct net* net_ns = READ_KERN(ns->net_ns);
return READ_KERN(net_ns ->ns.inum);
}
static __always_inline u32 get_cgroup_ns_id(struct nsproxy *ns)
{
struct cgroup_namespace* cgroup_ns = READ_KERN(ns->cgroup_ns);
return READ_KERN(cgroup_ns->ns.inum);
}
static __always_inline u32 get_task_mnt_ns_id(struct task_struct *task)
{
return get_mnt_ns_id(READ_KERN(task->nsproxy));
}
static __always_inline u32 get_task_pid_ns_id(struct task_struct *task)
{
return get_pid_ns_id(READ_KERN(task->nsproxy));
}
static __always_inline u32 get_task_uts_ns_id(struct task_struct *task)
{
return get_uts_ns_id(READ_KERN(task->nsproxy));
}
static __always_inline u32 get_task_ipc_ns_id(struct task_struct *task)
{
return get_ipc_ns_id(READ_KERN(task->nsproxy));
}
static __always_inline u32 get_task_net_ns_id(struct task_struct *task)
{
return get_net_ns_id(READ_KERN(task->nsproxy));
}
static __always_inline u32 get_task_cgroup_ns_id(struct task_struct *task)
{
return get_cgroup_ns_id(READ_KERN(task->nsproxy));
}
static __always_inline u32 get_task_ns_pid(struct task_struct *task)
{
struct nsproxy *namespaceproxy = READ_KERN(task->nsproxy);
struct pid_namespace *pid_ns_children = READ_KERN(namespaceproxy->pid_ns_for_children);
unsigned int level = READ_KERN(pid_ns_children->level);
#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 0) && !defined(RHEL_RELEASE_GT_8_0))
// kernel 4.14-4.18:
return READ_KERN(READ_KERN(task->pids[PIDTYPE_PID].pid)->numbers[level].nr);
#else
// kernel 4.19 onwards, and CO:RE:
struct pid *tpid = READ_KERN(task->thread_pid);
return READ_KERN(tpid->numbers[level].nr);
#endif
}
static __always_inline u32 get_task_ns_tgid(struct task_struct *task)
{
struct nsproxy *namespaceproxy = READ_KERN(task->nsproxy);
struct pid_namespace *pid_ns_children = READ_KERN(namespaceproxy->pid_ns_for_children);
unsigned int level = READ_KERN(pid_ns_children->level);
struct task_struct *group_leader = READ_KERN(task->group_leader);
#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 0) && !defined(RHEL_RELEASE_GT_8_0))
// kernel 4.14-4.18:
return READ_KERN(READ_KERN(group_leader->pids[PIDTYPE_PID].pid)->numbers[level].nr);
#else
// kernel 4.19 onwards, and CO:RE:
struct pid *tpid = READ_KERN(group_leader->thread_pid);
return READ_KERN(tpid->numbers[level].nr);
#endif
}
static __always_inline u32 get_task_ns_ppid(struct task_struct *task)
{
struct task_struct *real_parent = READ_KERN(task->real_parent);
struct nsproxy *namespaceproxy = READ_KERN(real_parent->nsproxy);
struct pid_namespace *pid_ns_children = READ_KERN(namespaceproxy->pid_ns_for_children);
unsigned int level = READ_KERN(pid_ns_children->level);
#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 19, 0) && !defined(RHEL_RELEASE_GT_8_0)) && !defined(CORE)
// kernel 4.14-4.18:
return READ_KERN(READ_KERN(real_parent->pids[PIDTYPE_PID].pid)->numbers[level].nr);
#else
// kernel 4.19 onwards, and CO:RE:
struct pid *tpid = READ_KERN(real_parent->thread_pid);
return READ_KERN(tpid->numbers[level].nr);
#endif
}
static __always_inline char * get_task_uts_name(struct task_struct *task)
{
struct nsproxy *np = READ_KERN(task->nsproxy);
struct uts_namespace *uts_ns = READ_KERN(np->uts_ns);
return READ_KERN(uts_ns->name.nodename);
}
static __always_inline u32 get_task_ppid(struct task_struct *task)
{
struct task_struct *parent = READ_KERN(task->real_parent);
return READ_KERN(parent->pid);
}
static __always_inline u32 get_task_host_pid(struct task_struct *task)
{
return READ_KERN(task->pid);
}
static __always_inline int get_task_parent_flags(struct task_struct *task)
{
struct task_struct *parent = READ_KERN(task->real_parent);
return READ_KERN(parent->flags);
}
static __always_inline const char * get_binprm_filename(struct linux_binprm *bprm)
{
return READ_KERN(bprm->filename);
}
static __always_inline const char * get_cgroup_dirname(struct cgroup *cgrp)
{
struct kernfs_node *kn = READ_KERN(cgrp->kn);
if (kn == NULL)
return NULL;
return READ_KERN(kn->name);
}
static __always_inline bool is_x86_compat(struct task_struct *task)
{
#if defined(bpf_target_x86)
return READ_KERN(task->thread_info.status) & TS_COMPAT;
#else
return false;
#endif
}
static __always_inline bool is_arm64_compat(struct task_struct *task)
{
#if defined(bpf_target_arm64)
return READ_KERN(task->thread_info.flags) & _TIF_32BIT;
#else
return false;
#endif
}
static __always_inline bool is_compat(struct task_struct *task)
{
#if defined(bpf_target_x86)
return is_x86_compat(task);
#elif defined(bpf_target_arm64)
return is_arm64_compat(task);
#else
return false;
#endif
}
#if defined(bpf_target_x86)
static __always_inline struct pt_regs* get_task_pt_regs(struct task_struct *task)
{
void* __ptr = READ_KERN(task->stack) + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;
return ((struct pt_regs *)__ptr) - 1;
}
#endif
static __always_inline int get_syscall_ev_id_from_regs()
{
#if defined(bpf_target_x86)
struct task_struct *task = (struct task_struct *)bpf_get_current_task();
struct pt_regs *real_ctx = get_task_pt_regs(task);
int syscall_nr = READ_KERN(real_ctx->orig_ax);
if (is_x86_compat(task)) {
// Translate 32bit syscalls to 64bit syscalls (which also represent the event ids)
u32 *id_64 = bpf_map_lookup_elem(&sys_32_to_64_map, &syscall_nr);
if (id_64 == 0)
return -1;
syscall_nr = *id_64;
}
return syscall_nr;
#else
return 0;
#endif
}
static __always_inline struct dentry* get_mnt_root_ptr_from_vfsmnt(struct vfsmount *vfsmnt)
{
return READ_KERN(vfsmnt->mnt_root);
}
static __always_inline struct dentry* get_d_parent_ptr_from_dentry(struct dentry *dentry)
{
return READ_KERN(dentry->d_parent);
}
static __always_inline struct qstr get_d_name_from_dentry(struct dentry *dentry)
{
return READ_KERN(dentry->d_name);
}
static __always_inline struct file* get_file_ptr_from_bprm(struct linux_binprm *bprm)
{
return READ_KERN(bprm->file);
}
static __always_inline struct mm_struct* get_mm_from_task(struct task_struct *task)
{
return READ_KERN(task->mm);
}
static __always_inline unsigned long get_arg_start_from_mm(struct mm_struct *mm)
{
return READ_KERN(mm->arg_start);
}
static __always_inline unsigned long get_arg_end_from_mm(struct mm_struct *mm)
{
return READ_KERN(mm->arg_end);
}
static __always_inline int get_argc_from_bprm(struct linux_binprm *bprm)
{
return READ_KERN(bprm->argc);
}
static __always_inline unsigned long get_env_start_from_mm(struct mm_struct *mm)
{
return READ_KERN(mm->env_start);
}
static __always_inline unsigned long get_env_end_from_mm(struct mm_struct *mm)
{
return READ_KERN(mm->env_end);
}
static __always_inline int get_envc_from_bprm(struct linux_binprm *bprm)
{
return READ_KERN(bprm->envc);
}
static __always_inline dev_t get_dev_from_file(struct file *file)
{
struct inode *f_inode = READ_KERN(file->f_inode);
struct super_block *i_sb = READ_KERN(f_inode->i_sb);
return READ_KERN(i_sb->s_dev);
}
static __always_inline unsigned long get_inode_nr_from_file(struct file *file)
{
struct inode *f_inode = READ_KERN(file->f_inode);
return READ_KERN(f_inode->i_ino);
}
static __always_inline unsigned short get_inode_mode_from_file(struct file *file)
{
struct inode *f_inode = READ_KERN(file->f_inode);
return READ_KERN(f_inode->i_mode);
}
static __always_inline struct path get_path_from_file(struct file *file)
{
return READ_KERN(file->f_path);
}
static __always_inline unsigned long get_vma_flags(struct vm_area_struct *vma)
{
return READ_KERN(vma->vm_flags);
}
static inline struct mount *real_mount(struct vfsmount *mnt)
{
return container_of(mnt, struct mount, mnt);
}
static __always_inline u32 get_inet_rcv_saddr(struct inet_sock *inet)
{
return READ_KERN(inet->inet_rcv_saddr);
}
static __always_inline u32 get_inet_saddr(struct inet_sock *inet)
{
return READ_KERN(inet->inet_saddr);
}
static __always_inline u32 get_inet_daddr(struct inet_sock *inet)
{
return READ_KERN(inet->inet_daddr);
}
static __always_inline u16 get_inet_sport(struct inet_sock *inet)
{
return READ_KERN(inet->inet_sport);
}
static __always_inline u16 get_inet_num(struct inet_sock *inet)
{
return READ_KERN(inet->inet_num);
}
static __always_inline u16 get_inet_dport(struct inet_sock *inet)
{
return READ_KERN(inet->inet_dport);
}
static __always_inline struct sock* get_socket_sock(struct socket *socket)
{
return READ_KERN(socket->sk);
}
static __always_inline u16 get_sock_family(struct sock *sock)
{
return READ_KERN(sock->sk_family);
}
static __always_inline u16 get_sock_protocol(struct sock *sock)
{
#if (LINUX_VERSION_CODE < KERNEL_VERSION(5, 6, 0))
/* kernel 4.18-5.5:
this is a workaround for reading sk_protocol bit-field, because bpf_probe_read doesn't really support reading
this type of fields. so we use the sk_gso_max_segs field and go 24 bits backwards (i.e. 3 bytes) because
sk_type is 16 bits, and sk_protocol is 8 bits (i.e. 1 byte).
note: we define protocol as u16 so it'll be compatible with newer kernels.
*/
u16 protocol = 0;
bpf_probe_read(&protocol, 1, (void *)(&sock->sk_gso_max_segs) - 3);
return protocol;
#else
// kernel 5.6 onwards:
return READ_KERN(sock->sk_protocol);
#endif
}
static __always_inline u16 get_sockaddr_family(struct sockaddr *address)
{
return READ_KERN(address->sa_family);
}
static __always_inline struct in6_addr get_sock_v6_rcv_saddr(struct sock *sock)
{
return READ_KERN(sock->sk_v6_rcv_saddr);
}
static __always_inline struct in6_addr get_ipv6_pinfo_saddr(struct ipv6_pinfo *np)
{
return READ_KERN(np->saddr);
}
static __always_inline u32 get_ipv6_pinfo_flow_label(struct ipv6_pinfo *np)
{
return READ_KERN(np->flow_label);
}
static __always_inline struct in6_addr get_sock_v6_daddr(struct sock *sock)
{
return READ_KERN(sock->sk_v6_daddr);
}
static __always_inline int get_sock_bound_dev_if(struct sock *sock)
{
return READ_KERN(sock->sk_bound_dev_if);
}
static __always_inline volatile unsigned char get_sock_state(struct sock *sock)
{
volatile unsigned char sk_state_own_impl;
bpf_probe_read((void *)&sk_state_own_impl, sizeof(sk_state_own_impl), (const void *)&sock->sk_state);
return sk_state_own_impl;
}
static __always_inline struct ipv6_pinfo* get_inet_pinet6(struct inet_sock *inet)
{
struct ipv6_pinfo *pinet6_own_impl;
bpf_probe_read(&pinet6_own_impl, sizeof(pinet6_own_impl), &inet->pinet6);
return pinet6_own_impl;
}
/*============================== HELPER FUNCTIONS ==============================*/
static __inline int has_prefix(char *prefix, char *str, int n)
{
int i;
#pragma unroll
for (i = 0; i < n; prefix++, str++, i++) {
if (!*prefix)
return 1;
if (*prefix != *str) {
return 0;
}
}
// prefix is too long
return 0;
}
static __always_inline int init_context(context_t *context)
{
struct task_struct *task;
task = (struct task_struct *)bpf_get_current_task();
u64 id = bpf_get_current_pid_tgid();
context->host_tid = id;
context->host_pid = id >> 32;
context->host_ppid = get_task_ppid(task);
context->tid = get_task_ns_pid(task);
context->pid = get_task_ns_tgid(task);
context->ppid = get_task_ns_ppid(task);
context->mnt_id = get_task_mnt_ns_id(task);
context->pid_id = get_task_pid_ns_id(task);
context->uid = bpf_get_current_uid_gid();
bpf_get_current_comm(&context->comm, sizeof(context->comm));
char * uts_name = get_task_uts_name(task);
if (uts_name)
bpf_probe_read_str(&context->uts_name, TASK_COMM_LEN, uts_name);
container_id_t *container_id = bpf_map_lookup_elem(&pid_to_cont_id_map, &context->host_tid);
if (container_id != NULL) {
__builtin_memcpy(context->cont_id, container_id->id, CONT_ID_LEN);
}
context->ts = bpf_ktime_get_ns();
// Clean Stack Trace ID
context->stack_id = 0;
return 0;
}
static __always_inline int get_config(u32 key)
{
u32 *config = bpf_map_lookup_elem(&config_map, &key);
if (config == NULL)
return 0;
return *config;
}
// returns 1 if you should trace based on uid, 0 if not
static __always_inline int uint_filter_matches(int filter_config, void *filter_map, u64 key, u32 less_idx, u32 greater_idx)
{
int config = get_config(filter_config);
if (!config)
return 1;
u8* equality = bpf_map_lookup_elem(filter_map, &key);
if (equality != NULL) {
return *equality;