From 8a51fb6fe0dc10f80f34eb5855a679c43ab62f16 Mon Sep 17 00:00:00 2001 From: Fei Chen Date: Fri, 4 Aug 2023 04:27:10 -0700 Subject: [PATCH] log sampled invalid server ids Summary: when analyzing a quic_cid_real0_pkts increase issue, I found it is useful if we know what are invalid server ids. I am sampling it in bpf program via quic_stats and log it in katranlb. Reviewed By: lima1756 Differential Revision: D48018555 fbshipit-source-id: 3bb120a1f823a9f91069e7a4d3fa598763f088ed --- katran/lib/BalancerStructs.h | 1 + katran/lib/KatranLb.cpp | 12 ++++++++++++ katran/lib/KatranLb.h | 6 ++++++ katran/lib/bpf/balancer_kern.c | 1 + katran/lib/bpf/balancer_structs.h | 1 + 5 files changed, 21 insertions(+) diff --git a/katran/lib/BalancerStructs.h b/katran/lib/BalancerStructs.h index 6394e82fa..ea0f0c120 100644 --- a/katran/lib/BalancerStructs.h +++ b/katran/lib/BalancerStructs.h @@ -117,6 +117,7 @@ struct lb_quic_packets_stats { uint64_t ch_routed; uint64_t cid_initial; uint64_t cid_invalid_server_id; + uint64_t cid_invalid_server_id_sample; uint64_t cid_routed; uint64_t cid_unknown_real_dropped; uint64_t cid_v0; diff --git a/katran/lib/KatranLb.cpp b/katran/lib/KatranLb.cpp index bbc01818b..56555ac2c 100644 --- a/katran/lib/KatranLb.cpp +++ b/katran/lib/KatranLb.cpp @@ -38,6 +38,7 @@ namespace katran { namespace { using EventId = monitoring::EventId; +constexpr int kMaxInvalidServerIds = 10000; } // namespace KatranLb::KatranLb( @@ -2058,6 +2059,17 @@ lb_quic_packets_stats KatranLb::getLbQuicPacketsStats() { sum_stat.ch_routed += stat.ch_routed; sum_stat.cid_initial += stat.cid_initial; sum_stat.cid_invalid_server_id += stat.cid_invalid_server_id; + if (stat.cid_invalid_server_id_sample && + (invalidServerIds_.find(stat.cid_invalid_server_id_sample) == + invalidServerIds_.end()) && + invalidServerIds_.size() < kMaxInvalidServerIds) { + LOG(ERROR) << "Invalid server id " + << stat.cid_invalid_server_id_sample << " in quic packet"; + invalidServerIds_.insert(stat.cid_invalid_server_id_sample); + if (invalidServerIds_.size() == kMaxInvalidServerIds) { + LOG(ERROR) << "Too many invalid server ids, will skip logging"; + } + } sum_stat.cid_routed += stat.cid_routed; sum_stat.cid_unknown_real_dropped += stat.cid_unknown_real_dropped; sum_stat.cid_v0 += stat.cid_v0; diff --git a/katran/lib/KatranLb.h b/katran/lib/KatranLb.h index 68cbae183..639c3012f 100644 --- a/katran/lib/KatranLb.h +++ b/katran/lib/KatranLb.h @@ -1156,6 +1156,12 @@ class KatranLb { * Callback to be notified when a real is added or deleted */ RealsIdCallback* realsIdCallback_{nullptr}; + + /** + * set of invalid server ids sampled from katran + */ + + std::unordered_set invalidServerIds_; }; } // namespace katran diff --git a/katran/lib/bpf/balancer_kern.c b/katran/lib/bpf/balancer_kern.c index ffbec0051..1469cdb08 100644 --- a/katran/lib/bpf/balancer_kern.c +++ b/katran/lib/bpf/balancer_kern.c @@ -776,6 +776,7 @@ process_packet(struct xdp_md* xdp, __u64 off, bool is_ipv6) { // pos 0 means the entry for the server id is not initialized. // fallback to ch quic_packets_stats->cid_invalid_server_id += 1; + quic_packets_stats->cid_invalid_server_id_sample = qpr.server_id; quic_packets_stats->ch_routed += 1; } else { pckt.real_index = key; diff --git a/katran/lib/bpf/balancer_structs.h b/katran/lib/bpf/balancer_structs.h index c4ca7b228..a53a9f7ca 100644 --- a/katran/lib/bpf/balancer_structs.h +++ b/katran/lib/bpf/balancer_structs.h @@ -143,6 +143,7 @@ struct lb_quic_packets_stats { __u64 ch_routed; __u64 cid_initial; __u64 cid_invalid_server_id; + __u64 cid_invalid_server_id_sample; __u64 cid_routed; __u64 cid_unknown_real_dropped; __u64 cid_v0;