-
Notifications
You must be signed in to change notification settings - Fork 3.6k
Expand file tree
/
Copy pathcommon.h
More file actions
929 lines (810 loc) · 23.8 KB
/
common.h
File metadata and controls
929 lines (810 loc) · 23.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2016-2021 Authors of Cilium */
#ifndef __LIB_COMMON_H_
#define __LIB_COMMON_H_
#include <bpf/ctx/ctx.h>
#include <bpf/api.h>
#include <linux/if_ether.h>
#include <linux/ipv6.h>
#include <linux/in.h>
#include <linux/socket.h>
#include "eth.h"
#include "endian.h"
#include "mono.h"
#include "config.h"
/* FIXME: GH-3239 LRU logic is not handling timeouts gracefully enough
* #ifndef HAVE_LRU_HASH_MAP_TYPE
* #define NEEDS_TIMEOUT 1
* #endif
*/
#define NEEDS_TIMEOUT 1
#ifndef AF_INET
#define AF_INET 2
#endif
#ifndef AF_INET6
#define AF_INET6 10
#endif
#ifndef IP_DF
#define IP_DF 0x4000
#endif
#ifndef EVENT_SOURCE
#define EVENT_SOURCE 0
#endif
#ifndef THIS_MTU
/* If not available, fall back to generically detected MTU instead of more
* fine-grained per-device MTU.
*/
# define THIS_MTU MTU
#endif
#define PORT_UDP_VXLAN 4789
#define PORT_UDP_GENEVE 6081
#define PORT_UDP_VXLAN_LINUX 8472
#ifdef PREALLOCATE_MAPS
#define CONDITIONAL_PREALLOC 0
#else
#define CONDITIONAL_PREALLOC BPF_F_NO_PREALLOC
#endif
/* TODO: ipsec v6 tunnel datapath still needs separate fixing */
#ifndef ENABLE_IPSEC
# ifdef ENABLE_IPV6
# define ENABLE_ENCAP_HOST_REMAP 1
# endif
#endif
/* XDP to SKB transferred meta data. */
#define XFER_PKT_NO_SVC 1 /* Skip upper service handling. */
/* These are shared with test/bpf/check-complexity.sh, when modifying any of
* the below, that script should also be updated.
*/
#define CILIUM_CALL_DROP_NOTIFY 1
#define CILIUM_CALL_ERROR_NOTIFY 2
#define CILIUM_CALL_SEND_ICMP6_ECHO_REPLY 3
#define CILIUM_CALL_HANDLE_ICMP6_NS 4
#define CILIUM_CALL_SEND_ICMP6_TIME_EXCEEDED 5
#define CILIUM_CALL_ARP 6
#define CILIUM_CALL_IPV4_FROM_LXC 7
#define CILIUM_CALL_NAT64 8
#define CILIUM_CALL_NAT46 9
#define CILIUM_CALL_IPV6_FROM_LXC 10
#define CILIUM_CALL_IPV4_TO_LXC_POLICY_ONLY 11
#define CILIUM_CALL_IPV4_TO_HOST_POLICY_ONLY CILIUM_CALL_IPV4_TO_LXC_POLICY_ONLY
#define CILIUM_CALL_IPV6_TO_LXC_POLICY_ONLY 12
#define CILIUM_CALL_IPV6_TO_HOST_POLICY_ONLY CILIUM_CALL_IPV6_TO_LXC_POLICY_ONLY
#define CILIUM_CALL_IPV4_TO_ENDPOINT 13
#define CILIUM_CALL_IPV6_TO_ENDPOINT 14
#define CILIUM_CALL_IPV4_NODEPORT_NAT 15
#define CILIUM_CALL_IPV6_NODEPORT_NAT 16
#define CILIUM_CALL_IPV4_NODEPORT_REVNAT 17
#define CILIUM_CALL_IPV6_NODEPORT_REVNAT 18
#define CILIUM_CALL_IPV4_ENCAP_NODEPORT_NAT 19
#define CILIUM_CALL_IPV4_NODEPORT_DSR 20
#define CILIUM_CALL_IPV6_NODEPORT_DSR 21
#define CILIUM_CALL_IPV4_FROM_HOST 22
#define CILIUM_CALL_IPV6_FROM_HOST 23
#define CILIUM_CALL_IPV6_ENCAP_NODEPORT_NAT 24
#define CILIUM_CALL_SIZE 25
typedef __u64 mac_t;
union v6addr {
struct {
__u32 p1;
__u32 p2;
__u32 p3;
__u32 p4;
};
struct {
__u64 d1;
__u64 d2;
};
__u8 addr[16];
} __packed;
static __always_inline bool validate_ethertype(struct __ctx_buff *ctx,
__u16 *proto)
{
void *data = ctx_data(ctx);
void *data_end = ctx_data_end(ctx);
struct ethhdr *eth = data;
if (ETH_HLEN == 0) {
/* The packet is received on L2-less device. Determine L3
* protocol from skb->protocol.
*/
*proto = ctx_get_protocol(ctx);
return true;
}
if (data + ETH_HLEN > data_end)
return false;
*proto = eth->h_proto;
if (bpf_ntohs(*proto) < ETH_P_802_3_MIN)
return false; /* non-Ethernet II unsupported */
return true;
}
static __always_inline __maybe_unused bool
____revalidate_data_pull(struct __ctx_buff *ctx, void **data_, void **data_end_,
void **l3, const __u32 l3_len, const bool pull,
__u8 eth_hlen)
{
const __u64 tot_len = eth_hlen + l3_len;
void *data_end;
void *data;
/* Verifier workaround, do this unconditionally: invalid size of register spill. */
if (pull)
ctx_pull_data(ctx, tot_len);
data_end = ctx_data_end(ctx);
data = ctx_data(ctx);
if (data + tot_len > data_end)
return false;
/* Verifier workaround: pointer arithmetic on pkt_end prohibited. */
*data_ = data;
*data_end_ = data_end;
*l3 = data + eth_hlen;
return true;
}
static __always_inline __maybe_unused bool
__revalidate_data_pull(struct __ctx_buff *ctx, void **data, void **data_end,
void **l3, const __u32 l3_len, const bool pull)
{
return ____revalidate_data_pull(ctx, data, data_end, l3, l3_len, pull,
ETH_HLEN);
}
/* revalidate_data_pull() initializes the provided pointers from the ctx and
* ensures that the data is pulled in for access. Should be used the first
* time that the ctx data is accessed, subsequent calls can be made to
* revalidate_data() which is cheaper.
* Returns true if 'ctx' is long enough for an IP header of the provided type,
* false otherwise.
*/
#define revalidate_data_pull(ctx, data, data_end, ip) \
__revalidate_data_pull(ctx, data, data_end, (void **)ip, sizeof(**ip), true)
/* revalidate_data_maybe_pull() does the same as revalidate_data_maybe_pull()
* except that the skb data pull is controlled by the "pull" argument.
*/
#define revalidate_data_maybe_pull(ctx, data, data_end, ip, pull) \
__revalidate_data_pull(ctx, data, data_end, (void **)ip, sizeof(**ip), pull)
/* revalidate_data() initializes the provided pointers from the ctx.
* Returns true if 'ctx' is long enough for an IP header of the provided type,
* false otherwise.
*/
#define revalidate_data(ctx, data, data_end, ip) \
__revalidate_data_pull(ctx, data, data_end, (void **)ip, sizeof(**ip), false)
#define revalidate_data_with_eth_hlen(ctx, data, data_end, ip, eth_len) \
____revalidate_data_pull(ctx, data, data_end, (void **)ip, \
sizeof(**ip), false, eth_len)
/* Macros for working with L3 cilium defined IPV6 addresses */
#define BPF_V6(dst, ...) BPF_V6_1(dst, fetch_ipv6(__VA_ARGS__))
#define BPF_V6_1(dst, ...) BPF_V6_4(dst, __VA_ARGS__)
#define BPF_V6_4(dst, a1, a2, a3, a4) \
({ \
dst.p1 = a1; \
dst.p2 = a2; \
dst.p3 = a3; \
dst.p4 = a4; \
})
#define ENDPOINT_KEY_IPV4 1
#define ENDPOINT_KEY_IPV6 2
/* Structure representing an IPv4 or IPv6 address, being used for:
* - key as endpoints map
* - key for tunnel endpoint map
* - value for tunnel endpoint map
*/
struct endpoint_key {
union {
struct {
__u32 ip4;
__u32 pad1;
__u32 pad2;
__u32 pad3;
};
union v6addr ip6;
};
__u8 family;
__u8 key;
__u16 pad5;
} __packed;
#define ENDPOINT_F_HOST 1 /* Special endpoint representing local host */
/* Value of endpoint map */
struct endpoint_info {
__u32 ifindex;
__u16 unused; /* used to be sec_label, no longer used */
__u16 lxc_id;
__u32 flags;
mac_t mac;
mac_t node_mac;
__u32 pad[4];
};
struct egress_info {
__u32 egress_ip;
__u32 tunnel_endpoint;
};
struct edt_id {
__u64 id;
};
struct edt_info {
__u64 bps;
__u64 t_last;
__u64 t_horizon_drop;
__u64 pad[4];
};
struct remote_endpoint_info {
__u32 sec_label;
__u32 tunnel_endpoint;
__u8 key;
};
struct policy_key {
__u32 sec_label;
__u16 dport;
__u8 protocol;
__u8 egress:1,
pad:7;
};
struct policy_entry {
__be16 proxy_port;
__u8 deny:1,
pad:7;
__u8 pad0;
__u16 pad1;
__u16 pad2;
__u64 packets;
__u64 bytes;
};
struct metrics_key {
__u8 reason; /* 0: forwarded, >0 dropped */
__u8 dir:2, /* 1: ingress 2: egress */
pad:6;
__u16 reserved[3]; /* reserved for future extension */
};
struct metrics_value {
__u64 count;
__u64 bytes;
};
enum {
POLICY_INGRESS = 1,
POLICY_EGRESS = 2,
};
enum {
POLICY_MATCH_NONE = 0,
POLICY_MATCH_L3_ONLY = 1,
POLICY_MATCH_L3_L4 = 2,
POLICY_MATCH_L4_ONLY = 3,
POLICY_MATCH_ALL = 4,
};
enum {
CAPTURE_INGRESS = 1,
CAPTURE_EGRESS = 2,
};
enum {
CILIUM_NOTIFY_UNSPEC,
CILIUM_NOTIFY_DROP,
CILIUM_NOTIFY_DBG_MSG,
CILIUM_NOTIFY_DBG_CAPTURE,
CILIUM_NOTIFY_TRACE,
CILIUM_NOTIFY_POLICY_VERDICT,
CILIUM_NOTIFY_CAPTURE,
};
#define NOTIFY_COMMON_HDR \
__u8 type; \
__u8 subtype; \
__u16 source; \
__u32 hash;
#define NOTIFY_CAPTURE_HDR \
NOTIFY_COMMON_HDR \
__u32 len_orig; /* Length of original packet */ \
__u16 len_cap; /* Length of captured bytes */ \
__u16 version; /* Capture header version */
#define __notify_common_hdr(t, s) \
.type = (t), \
.subtype = (s), \
.source = EVENT_SOURCE, \
.hash = get_hash_recalc(ctx)
#define __notify_pktcap_hdr(o, c) \
.len_orig = (o), \
.len_cap = (c), \
.version = NOTIFY_CAPTURE_VER
/* Capture notifications version. Must be incremented when format changes. */
#define NOTIFY_CAPTURE_VER 1
#ifndef TRACE_PAYLOAD_LEN
#define TRACE_PAYLOAD_LEN 128ULL
#endif
#ifndef BPF_F_PSEUDO_HDR
# define BPF_F_PSEUDO_HDR (1ULL << 4)
#endif
#define IS_ERR(x) (unlikely((x < 0) || (x == CTX_ACT_DROP)))
/* Cilium IPSec code to indicate packet needs to be handled
* by IPSec stack. Maps to CTX_ACT_OK.
*/
#define IPSEC_ENDPOINT CTX_ACT_OK
/* Return value to indicate that proxy redirection is required */
#define POLICY_ACT_PROXY_REDIRECT (1 << 16)
/* Cilium error codes, must NOT overlap with TC return codes.
* These also serve as drop reasons for metrics,
* where reason > 0 corresponds to -(DROP_*)
*
* These are shared with pkg/monitor/api/drop.go and api/v1/flow/flow.proto.
* When modifying any of the below, those files should also be updated.
*/
#define DROP_UNUSED1 -130 /* unused */
#define DROP_UNUSED2 -131 /* unused */
#define DROP_INVALID_SIP -132
#define DROP_POLICY -133
#define DROP_INVALID -134
#define DROP_CT_INVALID_HDR -135
#define DROP_FRAG_NEEDED -136
#define DROP_CT_UNKNOWN_PROTO -137
#define DROP_UNUSED4 -138 /* unused */
#define DROP_UNKNOWN_L3 -139
#define DROP_MISSED_TAIL_CALL -140
#define DROP_WRITE_ERROR -141
#define DROP_UNKNOWN_L4 -142
#define DROP_UNKNOWN_ICMP_CODE -143
#define DROP_UNKNOWN_ICMP_TYPE -144
#define DROP_UNKNOWN_ICMP6_CODE -145
#define DROP_UNKNOWN_ICMP6_TYPE -146
#define DROP_NO_TUNNEL_KEY -147
#define DROP_UNUSED5 -148 /* unused */
#define DROP_UNUSED6 -149 /* unused */
#define DROP_UNKNOWN_TARGET -150
#define DROP_UNROUTABLE -151
#define DROP_UNUSED7 -152 /* unused */
#define DROP_CSUM_L3 -153
#define DROP_CSUM_L4 -154
#define DROP_CT_CREATE_FAILED -155
#define DROP_INVALID_EXTHDR -156
#define DROP_FRAG_NOSUPPORT -157
#define DROP_NO_SERVICE -158
#define DROP_UNUSED8 -159 /* unused */
#define DROP_NO_TUNNEL_ENDPOINT -160
#define DROP_UNUSED9 -161 /* unused */
#define DROP_EDT_HORIZON -162
#define DROP_UNKNOWN_CT -163
#define DROP_HOST_UNREACHABLE -164
#define DROP_NO_CONFIG -165
#define DROP_UNSUPPORTED_L2 -166
#define DROP_NAT_NO_MAPPING -167
#define DROP_NAT_UNSUPP_PROTO -168
#define DROP_NO_FIB -169
#define DROP_ENCAP_PROHIBITED -170
#define DROP_INVALID_IDENTITY -171
#define DROP_UNKNOWN_SENDER -172
#define DROP_NAT_NOT_NEEDED -173 /* Mapped as drop code, though drop not necessary. */
#define DROP_IS_CLUSTER_IP -174
#define DROP_FRAG_NOT_FOUND -175
#define DROP_FORBIDDEN_ICMP6 -176
#define DROP_NOT_IN_SRC_RANGE -177
#define DROP_PROXY_LOOKUP_FAILED -178
#define DROP_PROXY_SET_FAILED -179
#define DROP_PROXY_UNKNOWN_PROTO -180
#define DROP_POLICY_DENY -181
#define NAT_PUNT_TO_STACK DROP_NAT_NOT_NEEDED
/* Cilium metrics reasons for forwarding packets and other stats.
* If reason is larger than below then this is a drop reason and
* value corresponds to -(DROP_*), see above.
*
* These are shared with pkg/monitor/api/drop.go.
* When modifying any of the below, those files should also be updated.
*/
#define REASON_FORWARDED 0
#define REASON_PLAINTEXT 3
#define REASON_DECRYPT 4
#define REASON_LB_NO_BACKEND_SLOT 5
#define REASON_LB_NO_BACKEND 6
#define REASON_LB_REVNAT_UPDATE 7
#define REASON_LB_REVNAT_STALE 8
#define REASON_FRAG_PACKET 9
#define REASON_FRAG_PACKET_UPDATE 10
#define REASON_MISSED_CUSTOM_CALL 11
/* Lookup scope for externalTrafficPolicy=Local */
#define LB_LOOKUP_SCOPE_EXT 0
#define LB_LOOKUP_SCOPE_INT 1
/* Cilium metrics direction for dropping/forwarding packet */
#define METRIC_INGRESS 1
#define METRIC_EGRESS 2
#define METRIC_SERVICE 3
/* Magic ctx->mark identifies packets origination and encryption status.
*
* The upper 16 bits plus lower 8 bits (e.g. mask 0XFFFF00FF) contain the
* packets security identity. The lower/upper halves are swapped to recover
* the identity.
*
* The 4 bits at 0X0F00 provide
* - the magic marker values which indicate whether the packet is coming from
* an ingress or egress proxy, a local process and its current encryption
* status.
*
* The 4 bits at 0xF000 provide
* - the key index to use for encryption when multiple keys are in-flight.
* In the IPsec case this becomes the SPI on the wire.
*/
#define MARK_MAGIC_HOST_MASK 0x0F00
#define MARK_MAGIC_PROXY_INGRESS 0x0A00
#define MARK_MAGIC_PROXY_EGRESS 0x0B00
#define MARK_MAGIC_HOST 0x0C00
#define MARK_MAGIC_DECRYPT 0x0D00
#define MARK_MAGIC_ENCRYPT 0x0E00
#define MARK_MAGIC_IDENTITY 0x0F00 /* mark carries identity */
#define MARK_MAGIC_TO_PROXY 0x0200
#define MARK_MAGIC_KEY_ID 0xF000
#define MARK_MAGIC_KEY_MASK 0xFF00
/* IPSec cannot be configured with NodePort BPF today, hence non-conflicting
* overlap with MARK_MAGIC_KEY_ID.
*/
#define MARK_MAGIC_SNAT_DONE 0x1500
/* MARK_MAGIC_HEALTH_IPIP_DONE can overlap with MARK_MAGIC_SNAT_DONE with both
* being mutual exclusive given former is only under DSR. Used to push health
* probe packets to ipip tunnel device & to avoid looping back.
*/
#define MARK_MAGIC_HEALTH_IPIP_DONE MARK_MAGIC_SNAT_DONE
/* MARK_MAGIC_HEALTH can overlap with MARK_MAGIC_DECRYPT with both being
* mutual exclusive. Note, MARK_MAGIC_HEALTH is user-facing UAPI for LB!
*/
#define MARK_MAGIC_HEALTH MARK_MAGIC_DECRYPT
/* IPv4 option used to carry service addr and port for DSR. Lower 16bits set to
* zero so that they can be OR'd with service port.
*
* Copy = 1 (option is copied to each fragment)
* Class = 0 (control option)
* Number = 26 (not used according to [1])
* Len = 8 (option type (1) + option len (1) + addr (4) + port (2))
*
* [1]: https://www.iana.org/assignments/ip-parameters/ip-parameters.xhtml
*/
#define DSR_IPV4_OPT_32 0x9a080000
#define DSR_IPV4_OPT_MASK 0xffff0000
#define DSR_IPV4_DPORT_MASK 0x0000ffff
/* IPv6 option type of Destination Option used to carry service IPv6 addr and
* port for DSR.
*
* 0b00 - "skip over this option and continue processing the header"
* 0 - "Option Data does not change en-route"
* 11011 - Unassigned [1]
*
* [1]: https://www.iana.org/assignments/ipv6-parameters/ipv6-parameters.xhtml#ipv6-parameters-2
*/
#define DSR_IPV6_OPT_TYPE 0x1B
#define DSR_IPV6_OPT_LEN 0x14 /* to store ipv6 addr + port */
#define DSR_IPV6_EXT_LEN 0x2 /* = (sizeof(dsr_opt_v6) - 8) / 8 */
/* We cap key index at 4 bits because mark value is used to map ctx to key */
#define MAX_KEY_INDEX 15
/* encrypt_key is the index into the encrypt map */
struct encrypt_key {
__u32 ctx;
} __packed;
/* encrypt_config is the current encryption context on the node */
struct encrypt_config {
__u8 encrypt_key;
} __packed;
/**
* or_encrypt_key - mask and shift key into encryption format
*/
static __always_inline __u32 or_encrypt_key(__u8 key)
{
return (((__u32)key & 0x0F) << 12) | MARK_MAGIC_ENCRYPT;
}
/*
* ctx->tc_index uses
*
* cilium_host @egress
* bpf_host -> bpf_lxc
*/
#define TC_INDEX_F_SKIP_INGRESS_PROXY 1
#define TC_INDEX_F_SKIP_EGRESS_PROXY 2
#define TC_INDEX_F_SKIP_NODEPORT 4
#define TC_INDEX_F_SKIP_RECIRCULATION 8
#define TC_INDEX_F_SKIP_HOST_FIREWALL 16
/* ctx_{load,store}_meta() usage: */
enum {
CB_SRC_LABEL,
#define CB_PORT CB_SRC_LABEL /* Alias, non-overlapping */
#define CB_HINT CB_SRC_LABEL /* Alias, non-overlapping */
#define CB_PROXY_MAGIC CB_SRC_LABEL /* Alias, non-overlapping */
#define CB_ENCRYPT_MAGIC CB_SRC_LABEL /* Alias, non-overlapping */
#define CB_DST_ENDPOINT_ID CB_SRC_LABEL /* Alias, non-overlapping */
CB_IFINDEX,
#define CB_ADDR_V4 CB_IFINDEX /* Alias, non-overlapping */
#define CB_ADDR_V6_1 CB_IFINDEX /* Alias, non-overlapping */
#define CB_ENCRYPT_IDENTITY CB_IFINDEX /* Alias, non-overlapping */
#define CB_IPCACHE_SRC_LABEL CB_IFINDEX /* Alias, non-overlapping */
CB_POLICY,
#define CB_ADDR_V6_2 CB_POLICY /* Alias, non-overlapping */
CB_NAT46_STATE,
#define CB_NAT CB_NAT46_STATE /* Alias, non-overlapping */
#define CB_ADDR_V6_3 CB_NAT46_STATE /* Alias, non-overlapping */
#define CB_FROM_HOST CB_NAT46_STATE /* Alias, non-overlapping */
CB_CT_STATE,
#define CB_ADDR_V6_4 CB_CT_STATE /* Alias, non-overlapping */
#define CB_ENCRYPT_DST CB_CT_STATE /* Alias, non-overlapping,
* Not used by xfrm.
*/
#define CB_CUSTOM_CALLS CB_CT_STATE /* Alias, non-overlapping */
};
/* State values for NAT46 */
enum {
NAT46_CLEAR,
NAT64,
NAT46,
};
#define TUPLE_F_OUT 0 /* Outgoing flow */
#define TUPLE_F_IN 1 /* Incoming flow */
#define TUPLE_F_RELATED 2 /* Flow represents related packets */
#define TUPLE_F_SERVICE 4 /* Flow represents packets to service */
#define CT_EGRESS 0
#define CT_INGRESS 1
#define CT_SERVICE 2
#ifdef ENABLE_NODEPORT
#define NAT_MIN_EGRESS NODEPORT_PORT_MIN_NAT
#else
#define NAT_MIN_EGRESS EPHEMERAL_MIN
#endif
enum {
CT_NEW,
CT_ESTABLISHED,
CT_REPLY,
CT_RELATED,
CT_REOPENED,
};
/* Service flags (lb{4,6}_service->flags) */
enum {
SVC_FLAG_EXTERNAL_IP = (1 << 0), /* External IPs */
SVC_FLAG_NODEPORT = (1 << 1), /* NodePort service */
SVC_FLAG_LOCAL_SCOPE = (1 << 2), /* externalTrafficPolicy=Local */
SVC_FLAG_HOSTPORT = (1 << 3), /* hostPort forwarding */
SVC_FLAG_AFFINITY = (1 << 4), /* sessionAffinity=clientIP */
SVC_FLAG_LOADBALANCER = (1 << 5), /* LoadBalancer service */
SVC_FLAG_ROUTABLE = (1 << 6), /* Not a surrogate/ClusterIP entry */
SVC_FLAG_SOURCE_RANGE = (1 << 7), /* Check LoadBalancer source range */
};
/* Service flags (lb{4,6}_service->flags2) */
enum {
SVC_FLAG_LOCALREDIRECT = (1 << 0), /* local redirect */
};
struct ipv6_ct_tuple {
/* Address fields are reversed, i.e.,
* these field names are correct for reply direction traffic.
*/
union v6addr daddr;
union v6addr saddr;
/* The order of dport+sport must not be changed!
* These field names are correct for original direction traffic.
*/
__be16 dport;
__be16 sport;
__u8 nexthdr;
__u8 flags;
} __packed;
struct ipv4_ct_tuple {
/* Address fields are reversed, i.e.,
* these field names are correct for reply direction traffic.
*/
__be32 daddr;
__be32 saddr;
/* The order of dport+sport must not be changed!
* These field names are correct for original direction traffic.
*/
__be16 dport;
__be16 sport;
__u8 nexthdr;
__u8 flags;
} __packed;
struct ct_entry {
__u64 rx_packets;
__u64 rx_bytes;
__u64 tx_packets;
__u64 tx_bytes;
__u32 lifetime;
__u16 rx_closing:1,
tx_closing:1,
nat46:1,
lb_loopback:1,
seen_non_syn:1,
node_port:1,
proxy_redirect:1, /* Connection is redirected to a proxy */
dsr:1,
reserved:8;
__u16 rev_nat_index;
/* In the kernel ifindex is u32, so we need to check in cilium-agent
* that ifindex of a NodePort device is <= MAX(u16).
*/
__u16 ifindex;
/* *x_flags_seen represents the OR of all TCP flags seen for the
* transmit/receive direction of this entry.
*/
__u8 tx_flags_seen;
__u8 rx_flags_seen;
__u32 src_sec_id; /* Used from userspace proxies, do not change offset! */
/* last_*x_report is a timestamp of the last time a monitor
* notification was sent for the transmit/receive direction.
*/
__u32 last_tx_report;
__u32 last_rx_report;
};
struct lb6_key {
union v6addr address; /* Service virtual IPv6 address */
__be16 dport; /* L4 port filter, if unset, all ports apply */
__u16 backend_slot; /* Backend iterator, 0 indicates the svc frontend */
__u8 proto; /* L4 protocol, currently not used (set to 0) */
__u8 scope; /* LB_LOOKUP_SCOPE_* for externalTrafficPolicy=Local */
__u8 pad[2];
};
/* See lb4_service comments */
struct lb6_service {
union {
__u32 backend_id; /* Backend ID in lb6_backends */
__u32 affinity_timeout; /* In seconds, only for svc frontend */
};
__u16 count;
__u16 rev_nat_index;
__u8 flags;
__u8 flags2;
__u8 pad[2];
};
/* See lb4_backend comments */
struct lb6_backend {
union v6addr address;
__be16 port;
__u8 proto;
__u8 pad;
};
struct lb6_health {
struct lb6_backend peer;
};
struct lb6_reverse_nat {
union v6addr address;
__be16 port;
} __packed;
struct ipv6_revnat_tuple {
__sock_cookie cookie;
union v6addr address;
__be16 port;
__u16 pad;
};
struct ipv6_revnat_entry {
union v6addr address;
__be16 port;
__u16 rev_nat_index;
};
struct lb4_key {
__be32 address; /* Service virtual IPv4 address */
__be16 dport; /* L4 port filter, if unset, all ports apply */
__u16 backend_slot; /* Backend iterator, 0 indicates the svc frontend */
__u8 proto; /* L4 protocol, currently not used (set to 0) */
__u8 scope; /* LB_LOOKUP_SCOPE_* for externalTrafficPolicy=Local */
__u8 pad[2];
};
struct lb4_service {
union {
__u32 backend_id; /* Backend ID in lb4_backends */
__u32 affinity_timeout; /* In seconds, only for svc frontend */
};
/* For the service frontend, count denotes number of service backend
* slots (otherwise zero).
*/
__u16 count;
__u16 rev_nat_index; /* Reverse NAT ID in lb4_reverse_nat */
__u8 flags;
__u8 flags2;
__u8 pad[2];
};
struct lb4_backend {
__be32 address; /* Service endpoint IPv4 address */
__be16 port; /* L4 port filter */
__u8 proto; /* L4 protocol, currently not used (set to 0) */
__u8 pad;
};
struct lb4_health {
struct lb4_backend peer;
};
struct lb4_reverse_nat {
__be32 address;
__be16 port;
} __packed;
struct ipv4_revnat_tuple {
__sock_cookie cookie;
__be32 address;
__be16 port;
__u16 pad;
};
struct ipv4_revnat_entry {
__be32 address;
__be16 port;
__u16 rev_nat_index;
};
union lb4_affinity_client_id {
__u32 client_ip;
__net_cookie client_cookie;
} __packed;
struct lb4_affinity_key {
union lb4_affinity_client_id client_id;
__u16 rev_nat_id;
__u8 netns_cookie:1,
reserved:7;
__u8 pad1;
__u32 pad2;
} __packed;
union lb6_affinity_client_id {
union v6addr client_ip;
__net_cookie client_cookie;
} __packed;
struct lb6_affinity_key {
union lb6_affinity_client_id client_id;
__u16 rev_nat_id;
__u8 netns_cookie:1,
reserved:7;
__u8 pad1;
__u32 pad2;
} __packed;
struct lb_affinity_val {
__u64 last_used;
__u32 backend_id;
__u32 pad;
} __packed;
struct lb_affinity_match {
__u32 backend_id;
__u16 rev_nat_id;
__u16 pad;
} __packed;
struct ct_state {
__u16 rev_nat_index;
__u16 loopback:1,
node_port:1,
proxy_redirect:1, /* Connection is redirected to a proxy */
dsr:1,
reserved:12;
__be32 addr;
__be32 svc_addr;
__u32 src_sec_id;
__u16 ifindex;
__u16 backend_id; /* Backend ID in lb4_backends */
};
#define SRC_RANGE_STATIC_PREFIX(STRUCT) \
(8 * (sizeof(STRUCT) - sizeof(struct bpf_lpm_trie_key)))
struct lb4_src_range_key {
struct bpf_lpm_trie_key lpm_key;
__u16 rev_nat_id;
__u16 pad;
__u32 addr;
};
struct lb6_src_range_key {
struct bpf_lpm_trie_key lpm_key;
__u16 rev_nat_id;
__u16 pad;
union v6addr addr;
};
static __always_inline int redirect_ep(struct __ctx_buff *ctx __maybe_unused,
int ifindex __maybe_unused,
bool needs_backlog __maybe_unused)
{
/* If our datapath has proper redirect support, we make use
* of it here, otherwise we terminate tc processing by letting
* stack handle forwarding e.g. in ipvlan case.
*
* Going via CPU backlog queue (aka needs_backlog) is required
* whenever we cannot do a fast ingress -> ingress switch but
* instead need an ingress -> egress netns traversal or vice
* versa.
*/
#ifdef ENABLE_HOST_REDIRECT
if (needs_backlog || !is_defined(ENABLE_REDIRECT_FAST)) {
return redirect(ifindex, 0);
} else {
# ifdef ENCAP_IFINDEX
/* When coming from overlay, we need to set packet type
* to HOST as otherwise we might get dropped in IP layer.
*/
ctx_change_type(ctx, PACKET_HOST);
# endif /* ENCAP_IFINDEX */
return redirect_peer(ifindex, 0);
}
#else
return CTX_ACT_OK;
#endif /* ENABLE_HOST_REDIRECT */
}
struct lpm_v4_key {
struct bpf_lpm_trie_key lpm;
__u8 addr[4];
};
struct lpm_v6_key {
struct bpf_lpm_trie_key lpm;
__u8 addr[16];
};
struct lpm_val {
/* Just dummy for now. */
__u8 flags;
};
#include "overloadable.h"
#endif /* __LIB_COMMON_H_ */