Skip to content

Commit e8b32b8

Browse files
committed
Prevent complex recursion during query requeing and connection cleanup
c-ares utilizes recursion for some operations, and some of these processes can have unintended side effects, such as if a callback is called that then recurses into the same function. This can cause strange cleanup conditions that lead to crashes. Try to disassociate queries with connections as early as possible and move cleaning up unneeded connections to its own scan rather than trying to detect each time a query is disassociated from a connection. Fix By: Brad House (@bradh352)
1 parent 47be750 commit e8b32b8

File tree

5 files changed

+77
-57
lines changed

5 files changed

+77
-57
lines changed

src/lib/ares__close_sockets.c

Lines changed: 49 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -80,41 +80,59 @@ void ares__close_sockets(struct server_state *server)
8080
}
8181
}
8282

83-
void ares__check_cleanup_conn(const ares_channel_t *channel,
84-
struct server_connection *conn)
83+
void ares__check_cleanup_conns(const ares_channel_t *channel)
8584
{
86-
ares_bool_t do_cleanup = ARES_FALSE;
85+
ares__slist_node_t *snode;
8786

88-
if (channel == NULL || conn == NULL) {
87+
if (channel == NULL) {
8988
return; /* LCOV_EXCL_LINE: DefensiveCoding */
9089
}
9190

92-
if (ares__llist_len(conn->queries_to_conn)) {
93-
return;
91+
/* Iterate across each server */
92+
for (snode = ares__slist_node_first(channel->servers); snode != NULL;
93+
snode = ares__slist_node_next(snode)) {
94+
95+
struct server_state *server = ares__slist_node_val(snode);
96+
ares__llist_node_t *cnode;
97+
98+
/* Iterate across each connection */
99+
cnode = ares__llist_node_first(server->connections);
100+
while (cnode != NULL) {
101+
ares__llist_node_t *next = ares__llist_node_next(cnode);
102+
struct server_connection *conn = ares__llist_node_val(cnode);
103+
ares_bool_t do_cleanup = ARES_FALSE;
104+
cnode = next;
105+
106+
/* Has connections, not eligible */
107+
if (ares__llist_len(conn->queries_to_conn)) {
108+
continue;
109+
}
110+
111+
/* If we are configured not to stay open, close it out */
112+
if (!(channel->flags & ARES_FLAG_STAYOPEN)) {
113+
do_cleanup = ARES_TRUE;
114+
}
115+
116+
/* If the associated server has failures, close it out. Resetting the
117+
* connection (and specifically the source port number) can help resolve
118+
* situations where packets are being dropped.
119+
*/
120+
if (conn->server->consec_failures > 0) {
121+
do_cleanup = ARES_TRUE;
122+
}
123+
124+
/* If the udp connection hit its max queries, always close it */
125+
if (!conn->is_tcp && channel->udp_max_queries > 0 &&
126+
conn->total_queries >= channel->udp_max_queries) {
127+
do_cleanup = ARES_TRUE;
128+
}
129+
130+
if (!do_cleanup) {
131+
continue;
132+
}
133+
134+
/* Clean it up */
135+
ares__close_connection(conn, ARES_SUCCESS);
136+
}
94137
}
95-
96-
/* If we are configured not to stay open, close it out */
97-
if (!(channel->flags & ARES_FLAG_STAYOPEN)) {
98-
do_cleanup = ARES_TRUE;
99-
}
100-
101-
/* If the associated server has failures, close it out. Resetting the
102-
* connection (and specifically the source port number) can help resolve
103-
* situations where packets are being dropped.
104-
*/
105-
if (conn->server->consec_failures > 0) {
106-
do_cleanup = ARES_TRUE;
107-
}
108-
109-
/* If the udp connection hit its max queries, always close it */
110-
if (!conn->is_tcp && channel->udp_max_queries > 0 &&
111-
conn->total_queries >= channel->udp_max_queries) {
112-
do_cleanup = ARES_TRUE;
113-
}
114-
115-
if (!do_cleanup) {
116-
return;
117-
}
118-
119-
ares__close_connection(conn, ARES_SUCCESS);
120138
}

src/lib/ares_cancel.c

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,28 +60,26 @@ void ares_cancel(ares_channel_t *channel)
6060
node = ares__llist_node_first(list_copy);
6161
while (node != NULL) {
6262
struct query *query;
63-
struct server_connection *conn;
6463

6564
/* Cache next since this node is being deleted */
6665
next = ares__llist_node_next(node);
6766

6867
query = ares__llist_node_claim(node);
69-
conn = query->conn;
7068
query->node_all_queries = NULL;
7169

7270
/* NOTE: its possible this may enqueue new queries */
7371
query->callback(query->arg, ARES_ECANCELLED, 0, NULL);
7472
ares__free_query(query);
7573

76-
/* See if the connection should be cleaned up */
77-
ares__check_cleanup_conn(channel, conn);
78-
7974
node = next;
8075
}
8176

8277
ares__llist_destroy(list_copy);
8378
}
8479

80+
/* See if the connections should be cleaned up */
81+
ares__check_cleanup_conns(channel);
82+
8583
ares_queue_notify_empty(channel);
8684

8785
done:

src/lib/ares_private.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -442,8 +442,7 @@ void ares__dnsrec_convert_cb(void *arg, ares_status_t status, size_t timeouts,
442442
void ares__close_connection(struct server_connection *conn,
443443
ares_status_t requeue_status);
444444
void ares__close_sockets(struct server_state *server);
445-
void ares__check_cleanup_conn(const ares_channel_t *channel,
446-
struct server_connection *conn);
445+
void ares__check_cleanup_conns(const ares_channel_t *channel);
447446
void ares__free_query(struct query *query);
448447

449448
ares_rand_state *ares__init_rand_state(void);

src/lib/ares_process.c

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,17 @@ static void end_query(ares_channel_t *channel, struct server_state *server,
6969
struct query *query, ares_status_t status,
7070
const ares_dns_record_t *dnsrec);
7171

72+
73+
static void ares__query_disassociate_from_conn(struct query *query)
74+
{
75+
/* If its not part of a connection, it can't be tracked for timeouts either */
76+
ares__slist_node_destroy(query->node_queries_by_timeout);
77+
ares__llist_node_destroy(query->node_queries_to_conn);
78+
query->node_queries_by_timeout = NULL;
79+
query->node_queries_to_conn = NULL;
80+
query->conn = NULL;
81+
}
82+
7283
/* Invoke the server state callback after a success or failure */
7384
static void invoke_server_state_cb(const struct server_state *server,
7485
ares_bool_t success, int flags)
@@ -203,6 +214,8 @@ static void processfds(ares_channel_t *channel, fd_set *read_fds,
203214
/* Write last as the other 2 operations might have triggered writes */
204215
write_tcp_data(channel, write_fds, write_fd);
205216

217+
/* See if any connections should be cleaned up */
218+
ares__check_cleanup_conns(channel);
206219
ares__channel_unlock(channel);
207220
}
208221

@@ -396,8 +409,6 @@ static void read_tcp_data(ares_channel_t *channel,
396409
/* Since we processed the answer, clear the tag so space can be reclaimed */
397410
ares__buf_tag_clear(server->tcp_parser);
398411
}
399-
400-
ares__check_cleanup_conn(channel, conn);
401412
}
402413

403414
static int socket_list_append(ares_socket_t **socketlist, ares_socket_t fd,
@@ -521,8 +532,6 @@ static void read_udp_packets_fd(ares_channel_t *channel,
521532
/* Try to read again only if *we* set up the socket, otherwise it may be
522533
* a blocking socket and would cause recvfrom to hang. */
523534
} while (read_len >= 0 && channel->sock_funcs == NULL);
524-
525-
ares__check_cleanup_conn(channel, conn);
526535
}
527536

528537
static void read_packets(ares_channel_t *channel, fd_set *read_fds,
@@ -595,13 +604,15 @@ static void read_packets(ares_channel_t *channel, fd_set *read_fds,
595604
/* If any queries have timed out, note the timeout and move them on. */
596605
static void process_timeouts(ares_channel_t *channel, const ares_timeval_t *now)
597606
{
598-
ares__slist_node_t *node =
599-
ares__slist_node_first(channel->queries_by_timeout);
600-
while (node != NULL) {
607+
ares__slist_node_t *node;
608+
609+
/* Just keep popping off the first as this list will re-sort as things come
610+
* and go. We don't want to try to rely on 'next' as some operation might
611+
* cause a cleanup of that pointer and would become invalid */
612+
while ((node = ares__slist_node_first(channel->queries_by_timeout)) != NULL) {
601613
struct query *query = ares__slist_node_val(node);
602-
/* Node might be removed, cache next */
603-
ares__slist_node_t *next = ares__slist_node_next(node);
604614
struct server_connection *conn;
615+
605616
/* Since this is sorted, as soon as we hit a query that isn't timed out,
606617
* break */
607618
if (!ares__timedout(now, &query->timeout)) {
@@ -613,9 +624,6 @@ static void process_timeouts(ares_channel_t *channel, const ares_timeval_t *now)
613624
conn = query->conn;
614625
server_increment_failures(conn->server, query->using_tcp);
615626
ares__requeue_query(query, now, ARES_ETIMEOUT);
616-
ares__check_cleanup_conn(channel, conn);
617-
618-
node = next;
619627
}
620628
}
621629

@@ -798,6 +806,8 @@ ares_status_t ares__requeue_query(struct query *query,
798806
ares_channel_t *channel = query->channel;
799807
size_t max_tries = ares__slist_len(channel->servers) * channel->tries;
800808

809+
ares__query_disassociate_from_conn(query);
810+
801811
if (status != ARES_SUCCESS) {
802812
query->error_status = status;
803813
}
@@ -1030,8 +1040,6 @@ ares_status_t ares__send_query(struct query *query, const ares_timeval_t *now)
10301040
ares_status_t status;
10311041
ares_bool_t new_connection = ARES_FALSE;
10321042

1033-
query->conn = NULL;
1034-
10351043
/* Choose the server to send the query to */
10361044
if (channel->rotate) {
10371045
/* Pull random server */
@@ -1294,12 +1302,9 @@ static ares_bool_t same_address(const struct sockaddr *sa,
12941302
static void ares_detach_query(struct query *query)
12951303
{
12961304
/* Remove the query from all the lists in which it is linked */
1305+
ares__query_disassociate_from_conn(query);
12971306
ares__htable_szvp_remove(query->channel->queries_by_qid, query->qid);
1298-
ares__slist_node_destroy(query->node_queries_by_timeout);
1299-
ares__llist_node_destroy(query->node_queries_to_conn);
13001307
ares__llist_node_destroy(query->node_all_queries);
1301-
query->node_queries_by_timeout = NULL;
1302-
query->node_queries_to_conn = NULL;
13031308
query->node_all_queries = NULL;
13041309
}
13051310

test/ares-test-mock-et.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ TEST_P(MockUDPEventThreadTest, BadLoopbackServerNoTimeouts) {
9393
EXPECT_EQ(ARES_ECONNREFUSED, result[i].status_);
9494
EXPECT_EQ(0, result[i].timeouts_);
9595
#else
96-
EXPECT_TRUE(result[i].status_ == ARES_ECONNREFUSED || result[i].status_ == ARES_ETIMEOUT);
96+
EXPECT_TRUE(result[i].status_ == ARES_ECONNREFUSED || result[i].status_ == ARES_ETIMEOUT || result[i].status_ == ARES_ESERVFAIL);
9797
#endif
9898
}
9999
}

0 commit comments

Comments
 (0)