Refactor challenge hashing, add sanity limit

This adds a sanity limit of 200 hash collision entries in the runtime lookup hashtables for ACME challenge repsonses. The hashtable of course only stores "live" challenge data that hasn't expired yet. In practice (there's some hash collision statistics involved), this limit *should* only kick in if 200 or more challenges are defined for a single domainname, because the hashtable's slot count is fairly oversized, and becomes increasingly even more oversized as more challenges per domainname are configured. The sanity limit is checked when adding a new challenge set, and will cause explicit rejection and thus failure of the gdnsdctl command that attempts to add excess challenges too quickly. This should prevent runaway/broken ACME automations from configuring insane amounts of challenges for a single domainname within an acme_challenge_ttl window, which could otherwise cause all kinds of performance degradation for the parts of the daemon. Even if we didn't have this limit, at around 285 entries for the same domainname, they wouldn't all fit in our maximal 16KB TCP response size. Before this patch, when that happened the daemon simply stopped outputting additional TXT records once the 16KB limit was reached, allowing the remainder to eventually come into view for new queries as older ones expired. Explicit rejection seems cleaner from a user/tool-facing perspective than that anyways.
gdnsd · Oct 20, 2018 · 60bc88d · 60bc88d
1 parent 4ba8f15
commit 60bc88d
Showing 1 changed file with 150 additions and 47 deletions.
diff --git a/src/chal.c b/src/chal.c
@@ -32,6 +32,56 @@
 #include <ev.h>
 #include <urcu-qsbr.h>
 
+// General implementation notes:
+// --
+// A "challenge" (chal_t) is a singular challenge, i.e. one single response TXT
+// value configured for one single domainname.
+// --
+// A "challenge set" (cset_t) is a set of up to 100 challenges that were sent
+// to the daemon together in a single control socket transation (single
+// gdnsdctl invocation).
+// --
+// Expiration is tracked and processed in terms of whole challenge sets, and
+// since the expiration TTL is a fixed configurable value for all csets, we can
+// store all the active sets in a linked list in entry-time order and process
+// in-order expiries off the oldest end of the list.  This linked list has
+// "oldest" and "newest" as the ends of the linked list, and a "next_newer"
+// pointer inside of each cset to link them up.
+// --
+// Separately from all of the above, there's a hashtable (chal_tbl, of type
+// chal_tbl_t) which is used by runtime lookups returning DNS response data.  A
+// fresh new hashtable is created every time a new cset is inserted or an old
+// expires, and then it's RCU-swapped into place for runtime lookups (before
+// deletion of old csets, in the case of expiry/flush).
+// The hashtable is sized to the next power of two greater than or equal to
+// double the count of all individual challenges configured, and hashes on the
+// domainname the challenge is for.  It's legal and expected to configure
+// multiple simultaneous challenges for a single domainname, and these all go
+// into the same hashtable collision slot together, just like actual hash
+// collisions of distinct names.  The lookup-time code iterates all colliding
+// entries in the collision slot and outputs all exact matches.
+// ---
+// Because of the sizing and collision method here, we don't expect to have
+// long collision lists except in the case of true multi-output duplicates
+// (configuring many distinct responses for one actual domainname).
+// The more multi-output duplicates there are in the total set, the more
+// over-sized the hashtable becomes for the actual number of slots needed,
+// which makes hash duplicates with differing domainnames sharing a collision
+// slot even less-likely.
+// ---
+// Given the above, we define a sanity limit here of 200 entries per collision
+// slot, which should only be realistically triggerable with many entries for
+// an identical domainname.  At somewhere around 285 configured challenges for
+// a single domainname we'd run out of room in our hardcoded maximum 16KB
+// response sizes anyways.  When the sanity limit is reached by the addition of
+// a new cset, the cset is rejected (and gdnsdctl fails).
+// This is useful because it prevents scenarios where a runaway ACME automation
+// tool or script might inadvertently spam thousands or millions of challenges
+// into the daemon through the control socket in a short period of time, which
+// could slow down main-thread processing in general, and maybe even cause
+// slight performance impact to dnsio threads executing challenge queries.
+#define CHAL_COLLIDE_SANITY_MAX 200
+
 // Challenge payload TXT RR len, fully pre-encoded
 // 2 bytes type, 2 bytes class, 4 bytes ttl,
 // 2 bytes rdlen, 1 byte txt chunklen
@@ -75,10 +125,6 @@ struct cset_s_ {
 static cset_t* oldest = NULL;
 static cset_t* newest = NULL;
 
-// Sum of cset_t->count for all live sets in the list above, maintained
-// during insert/remove of cset_t, used to size hashtable
-static size_t chal_count = 0;
-
 // Global expiration timer, ticking towards "oldest" expire-time, if any
 // cset_t are active at all.
 static ev_timer expire_timer;
@@ -91,7 +137,7 @@ static ev_timer expire_timer;
 // isn't a great idea either.
 typedef struct {
     size_t count;
-    chal_t* chals[0];
+    const chal_t* chals[0];
 } chal_collide_t;
 
 // chal_tbl_t is a hashtable indexing into all the chal_t of all the current
@@ -106,38 +152,85 @@ typedef struct {
 // expiry).
 static chal_tbl_t* chal_tbl = NULL;
 
-static void chal_tbl_create_and_swap(cset_t* cset)
+F_NONNULL
+static void chal_tbl_destruct(chal_tbl_t* destructme)
+{
+    for (size_t i = 0; i <= destructme->mask; i++)
+        if (destructme->tbl[i])
+            free(destructme->tbl[i]);
+    free(destructme);
+}
+
+// Add a cset to a challenge hash table as its being constructed, fails with
+// retval true if sanity-check size constraint fails, but only if check was true
+F_NONNULL
+static bool chal_tbl_hash_cset(chal_tbl_t* ctbl, const cset_t* cset, const bool check)
+{
+    for (size_t i = 0; i < cset->count; i++) {
+        const chal_t* ch = &cset->chals[i];
+        chal_collide_t** slotptr = &ctbl->tbl[ch->dnhash & ctbl->mask];
+        size_t old_ct = 0;
+        if (*slotptr) {
+            old_ct = (*slotptr)->count;
+            if (check && old_ct > CHAL_COLLIDE_SANITY_MAX)
+                return true;
+        }
+        *slotptr = xrealloc(*slotptr, sizeof(**slotptr) + (sizeof((*slotptr)->chals[0]) * (old_ct + 1U)));
+        (*slotptr)->chals[old_ct] = ch;
+        (*slotptr)->count = old_ct + 1U;
+    }
+
+    return false;
+}
+
+// Create a new chal_tbl using whatever's currently in the linked list plus
+// optionally one new cset we're attempting to add.  Will return NULL if cset
+// is NULL and there were no existing ones (e.g. re-create after deleting
+// last).
+static chal_tbl_t* chal_tbl_create(const cset_t* oldest_set, const cset_t* adding)
 {
     chal_tbl_t* new_chal_tbl = NULL;
-    if (cset) {
-        const uint32_t mask = count2mask(chal_count << 1U);
+
+    // Calculate the total challenge count between all existing csets and the
+    // optional new one:
+    unsigned total_count = adding ? adding->count : 0;
+    const cset_t* iter_old = oldest_set;
+    while (iter_old) {
+        total_count += iter_old->count;
+        iter_old = iter_old->next_newer;
+    }
+
+    if (total_count) { // We have things to hash
+        const uint32_t mask = count2mask(total_count << 1U);
         new_chal_tbl = xcalloc(sizeof(*new_chal_tbl) + (sizeof(new_chal_tbl->tbl[0]) * (mask + 1U)));
         new_chal_tbl->mask = mask;
-        while (cset) {
-            for (size_t i = 0; i < cset->count; i++) {
-                chal_t* ch = &cset->chals[i];
-                chal_collide_t** slotptr = &new_chal_tbl->tbl[ch->dnhash & mask];
-                size_t old_ct = 0;
-                if (*slotptr)
-                    old_ct = (*slotptr)->count;
-                *slotptr = xrealloc(*slotptr, sizeof(**slotptr) + (sizeof((*slotptr)->chals[0]) * (old_ct + 1U)));
-                (*slotptr)->chals[old_ct] = ch;
-                (*slotptr)->count = old_ct + 1U;
+        iter_old = oldest_set;
+        while (iter_old) {
+            chal_tbl_hash_cset(new_chal_tbl, iter_old, false);
+            iter_old = iter_old->next_newer;
+        }
+        if (adding) {
+            // Ask the hasher to check size constraints when adding new csets,
+            // and can fail here, which means we need to destruct our new table
+            // and return NULL.
+            if (chal_tbl_hash_cset(new_chal_tbl, adding, true)) {
+                chal_tbl_destruct(new_chal_tbl);
+                new_chal_tbl = NULL;
             }
-            cset = cset->next_newer;
         }
     }
 
+    return new_chal_tbl;
+}
+
+// Can swap in NULL with this, e.g. for flush
+static void chal_tbl_swap_and_free(chal_tbl_t* new_chal_tbl)
+{
     chal_tbl_t* old_chal_tbl = chal_tbl;
     rcu_assign_pointer(chal_tbl, new_chal_tbl);
     synchronize_rcu();
-    if (old_chal_tbl) {
-        for (size_t i = 0; i <= old_chal_tbl->mask; i++) {
-            if (old_chal_tbl->tbl[i])
-                free(old_chal_tbl->tbl[i]);
-        }
-        free(old_chal_tbl);
-    }
+    if (old_chal_tbl)
+        chal_tbl_destruct(old_chal_tbl);
 }
 
 F_NONNULL
@@ -148,16 +241,18 @@ static void cset_expire(struct ev_loop* loop, ev_timer* t, const int revents V_U
     const ev_tstamp cutoff = ev_now(loop) + TIME_FUDGE;
 
     // Skip past the to-be-expired without actually deleting them yet
-    cset_t* cset = oldest;
-    while (cset && cset->expiry <= cutoff) {
-        chal_count -= cset->count;
-        cset = cset->next_newer;
-    }
-
-    // Create new hashtable, RCU-swap, delete old hashtable
-    chal_tbl_create_and_swap(cset);
-
-    // Delete expired csets
+    cset_t* iter_old = oldest;
+    while (iter_old && iter_old->expiry <= cutoff)
+        iter_old = iter_old->next_newer;
+
+    // Create new hashtable, RCU-swap, delete old hashtable.  New may be NULL
+    // and implicitly empty, if iter_old is NULL because the above loop wants
+    // to expire everything.
+    chal_tbl_t* new_chal_tbl = chal_tbl_create(iter_old, NULL);
+    chal_tbl_swap_and_free(new_chal_tbl);
+
+    // Delete expired csets now that RCU swap gauranteed no runtime references,
+    // and actual move the global "oldest" as we go
     while (oldest && oldest->expiry <= cutoff) {
         cset_t* nn = oldest->next_newer;
         free(oldest);
@@ -170,25 +265,24 @@ static void cset_expire(struct ev_loop* loop, ev_timer* t, const int revents V_U
         ev_timer_start(loop, t);
     } else {
         newest = NULL;
-        gdnsd_assert(!chal_count);
     }
 }
 
 void cset_flush(struct ev_loop* loop)
 {
-    // Create new empty hashtable, RCU-swap, delete old hashtable
-    chal_tbl_create_and_swap(NULL);
+    // RCU-swap a NULL in and delete old hashtable
+    chal_tbl_swap_and_free(NULL);
 
-    // Delete expired csets
+    // Delete all csets, as if they all expired, updating "oldest" as we go
+    // until it becomes NULL
     while (oldest) {
-        chal_count -= oldest->count;
         cset_t* nn = oldest->next_newer;
         free(oldest);
         oldest = nn;
     }
     newest = NULL;
-    gdnsd_assert(!chal_count);
 
+    // Kill expire timer, nothing to expire
     if (loop) {
         ev_timer* t = &expire_timer;
         ev_timer_stop(loop, t);
@@ -221,13 +315,12 @@ bool cset_create(struct ev_loop* loop, size_t ttl_remain, size_t count, size_t d
 
     cset_t* cset = xmalloc(sizeof(*cset) + (sizeof(cset->chals[0]) * count));
     cset->count = count;
-    chal_count += count;
     if (!ttl_remain || ttl_remain > gcfg->acme_challenge_ttl)
         ttl_remain = gcfg->acme_challenge_ttl;
     cset->expiry = ev_now(loop) + ttl_remain;
     cset->next_newer = NULL;
 
-    log_debug("Creating ACME DNS-01 challenge set with %zu items:", count);
+    log_debug("Attempting to create ACME DNS-01 challenge set with %zu items:", count);
 
     size_t didx = 0;
     for (size_t i = 0; i < count; i++) {
@@ -261,6 +354,15 @@ bool cset_create(struct ev_loop* loop, size_t ttl_remain, size_t count, size_t d
         return true;
     }
 
+    chal_tbl_t* new_chal_tbl = chal_tbl_create(oldest, cset);
+
+    if (!new_chal_tbl) {
+        log_err("Rejected acme-dns-01 challenge creation: collision sanity constraints exceeded, likely a runaway ACME automation script");
+        free(cset);
+        return true;
+    }
+
+    // Update linked list and deal with timer
     if (!oldest) {
         gdnsd_assert(!newest); // empty before this creation
         oldest = newest = cset;
@@ -274,7 +376,8 @@ bool cset_create(struct ev_loop* loop, size_t ttl_remain, size_t count, size_t d
         newest = cset;
     }
 
-    chal_tbl_create_and_swap(oldest);
+    // Swap the new hashtable in for runtime lookups
+    chal_tbl_swap_and_free(new_chal_tbl);
 
     return false;
 }
@@ -373,8 +476,8 @@ bool chal_respond(const unsigned qname_comp, const unsigned qtype, const uint8_t
         chal_collide_t* coll = t->tbl[qname_hash & t->mask];
         if (coll) {
             for (unsigned i = 0; i < coll->count; i++) {
-                chal_t* ch = coll->chals[i];
-                if (ch->dnhash == qname_hash && !dname_cmp(qname, ch->dname)) {
+                const chal_t* ch = coll->chals[i];
+                if (ch->dnhash == qname_hash && likely(!dname_cmp(qname, ch->dname))) {
                     matched = true;
                     if (qname_is_chal && qtype == DNS_TYPE_TXT) {
                         if ((*offset_p + 2U + CHAL_RR_LEN) > MAX_RESPONSE)