Skip to content

Commit

Permalink
Add "tcp_send_timeout" option to set a TCP send data timeout
Browse files Browse the repository at this point in the history
When using Stubby as a system DNS over TLS resolver with a Internet
connection that disconnects and reconnects from time to time there is often
a long waiting time (~20 minutes) after the connection reconnects before
DNS queries start to work again.

This is because in this particular case all the upstream TLS TCP
connections in Stubby are stuck waiting for upstream server response.
Which will never arrive since the host external IP address might have
changed and / or NAT router connection tracking entries for these TCP
connections might have been removed when the Internet connection
reconnected.

By default Linux tries to retransmit data on a TCP connection 15 times
before finally terminating it.
This takes 16 - 20 minutes, which is obviously a very long time to wait for
system DNS resolving to work again.
This is a real problem on weak mobile connections.

Thankfully, there is a "TCP_USER_TIMEOUT" per-socket option that allows
explicitly setting how long the network stack will wait in such cases.

Let's add a matching "tcp_send_timeout" option to getdns that allows
setting this option on outgoing TCP sockets.
For backward compatibility the code won't try to set it by default.

With this option set to, for example, 15 seconds Stubby recovers pretty
much instantly in such cases.

Signed-off-by: Maciej S. Szmigiero <mail@maciej.szmigiero.name>
  • Loading branch information
maciejsszmigiero committed Jul 12, 2020
1 parent fc4eb46 commit 606a88f
Show file tree
Hide file tree
Showing 8 changed files with 120 additions and 2 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Expand Up @@ -559,6 +559,8 @@ else ()
endif ()
endif ()

check_symbol_exists(TCP_USER_TIMEOUT "sys/socket.h;netinet/tcp.h" HAVE_DECL_TCP_USER_TIMEOUT)

# Main library
add_library(getdns_objects OBJECT
src/anchor.c
Expand Down
2 changes: 2 additions & 0 deletions cmake/include/cmakeconfig.h.in
Expand Up @@ -211,6 +211,8 @@

#cmakedefine USE_OSX_TCP_FASTOPEN 1

#cmakedefine HAVE_DECL_TCP_USER_TIMEOUT 1

#cmakedefine HAVE_NEW_UV_TIMER_CB 1

#cmakedefine HAVE_TARGET_ENDIANNESS
Expand Down
43 changes: 43 additions & 0 deletions src/context.c
Expand Up @@ -1435,6 +1435,7 @@ getdns_context_create_with_extended_memory_functions(

result->timeout = 5000;
result->idle_timeout = 0;
result->tcp_send_timeout = -1;
result->follow_redirects = GETDNS_REDIRECTS_FOLLOW;
result->dns_root_servers = NULL;
#if defined(HAVE_LIBUNBOUND) && !defined(HAVE_UB_CTX_SET_STUB)
Expand Down Expand Up @@ -2367,6 +2368,34 @@ getdns_context_set_idle_timeout(getdns_context *context, uint64_t timeout)
return GETDNS_RETURN_GOOD;
} /* getdns_context_set_timeout */

/*
* getdns_context_unset_tcp_send_timeout
*
*/
getdns_return_t
getdns_context_unset_tcp_send_timeout(getdns_context *context)
{
if (!context)
return GETDNS_RETURN_INVALID_PARAMETER;

context->tcp_send_timeout = -1;
return GETDNS_RETURN_GOOD;
}

/*
* getdns_context_set_tcp_send_timeout
*
*/
getdns_return_t
getdns_context_set_tcp_send_timeout(struct getdns_context *context,
uint32_t value)
{
if (!context || value > INT_MAX)
return GETDNS_RETURN_INVALID_PARAMETER;

context->tcp_send_timeout = value;
return GETDNS_RETURN_GOOD;
}

/*
* getdns_context_set_follow_redirects
Expand Down Expand Up @@ -3837,6 +3866,9 @@ _get_context_settings(const getdns_context* context)
(context->timeout > 0xFFFFFFFFull) ? 0xFFFFFFFF: (uint32_t) context->timeout)
|| getdns_dict_set_int(result, "idle_timeout",
(context->idle_timeout > 0xFFFFFFFFull) ? 0xFFFFFFFF : (uint32_t) context->idle_timeout)
|| ( context->tcp_send_timeout != -1
&& getdns_dict_set_int(result, "tcp_send_timeout",
context->tcp_send_timeout))
|| getdns_dict_set_int(result, "limit_outstanding_queries",
context->limit_outstanding_queries)
|| getdns_dict_set_int(result, "dnssec_allowed_skew",
Expand Down Expand Up @@ -4308,6 +4340,16 @@ CONTEXT_GETTER(timeout , uint64_t)
CONTEXT_GETTER(idle_timeout , uint64_t)
CONTEXT_GETTER(follow_redirects , getdns_redirects_t)

getdns_return_t
getdns_context_get_tcp_send_timeout(
const getdns_context *context, uint32_t* value)
{
if (!context || !value) return GETDNS_RETURN_INVALID_PARAMETER;
*value = context->tcp_send_timeout == -1 ? 0
: context->tcp_send_timeout;
return GETDNS_RETURN_GOOD;
}

getdns_return_t
getdns_context_get_dns_root_servers(
const getdns_context *context, getdns_list **value)
Expand Down Expand Up @@ -4647,6 +4689,7 @@ _getdns_context_config_setting(getdns_context *context,
CONTEXT_SETTING_INT(dns_transport)
CONTEXT_SETTING_ARRAY(dns_transport_list, transport_list)
CONTEXT_SETTING_INT(idle_timeout)
CONTEXT_SETTING_INT(tcp_send_timeout)
CONTEXT_SETTING_INT(limit_outstanding_queries)
CONTEXT_SETTING_INT(timeout)
CONTEXT_SETTING_INT(follow_redirects)
Expand Down
1 change: 1 addition & 0 deletions src/context.h
Expand Up @@ -325,6 +325,7 @@ struct getdns_context {
size_t namespace_count;
uint64_t timeout;
uint64_t idle_timeout;
int tcp_send_timeout; /* -1 is unset */
getdns_redirects_t follow_redirects;
getdns_list *dns_root_servers;

Expand Down
18 changes: 18 additions & 0 deletions src/getdns/getdns.h.in
Expand Up @@ -1514,6 +1514,24 @@ getdns_context_set_dns_transport_list(getdns_context *context,
getdns_return_t
getdns_context_set_idle_timeout(getdns_context *context, uint64_t timeout);

/**
* Set the number of milliseconds send data may remain unacknowledged by
* the peer in a TCP connection, if supported by the operation system.
* When not set (the default), the system default is left alone.
*
* @see getdns_context_get_tcp_send_timeout
* @see getdns_context_unset_tcp_send_timeout
* @param context The context to configure
* @param value The number of milliseconds the send data may remain
* unacknowledged by the peer in a TCP connection.
* @return GETDNS_RETURN_GOOD when successful.
* @return GETDNS_RETURN_INVALID_PARAMETER when context was NULL or the
* value was too high.
*/
getdns_return_t
getdns_context_set_tcp_send_timeout(getdns_context *context,
uint32_t value);

/**
* Limit the number of outstanding DNS queries. When more than limit requests
* are scheduled, they are kept on an internal queue, to be rescheduled when
Expand Down
28 changes: 28 additions & 0 deletions src/getdns/getdns_extra.h.in
Expand Up @@ -540,6 +540,18 @@ getdns_context_set_tls_query_padding_blocksize(getdns_context *context, uint16_t
getdns_return_t
getdns_context_unset_edns_maximum_udp_payload_size(getdns_context *context);

/**
* Configure context to use the system default setting for the time
* send data may remain unacknowledged by the peer in a TCP connection.
* @see getdns_context_set_tcp_send_timeout
* @see getdns_context_get_tcp_send_timeout
* @param context The context to configure
* @return GETDNS_RETURN_GOOD on success
* @return GETDNS_RETURN_INVALID_PARAMETER if context is null.
*/
getdns_return_t
getdns_context_unset_tcp_send_timeout(getdns_context *context);


typedef enum getdns_loglevel_type {
GETDNS_LOG_EMERG = 0,
Expand Down Expand Up @@ -992,6 +1004,22 @@ getdns_return_t
getdns_context_get_idle_timeout(
const getdns_context *context, uint64_t *timeout);

/**
* Get the number of milliseconds send data may remain unacknowledged by
* the peer in a TCP connection setting from context.
* @see getdns_context_set_tcp_send_timeout
* @see getdns_context_unset_tcp_send_timeout
* @param[in] context The context from which to get the setting
* @param[out] value The number of milliseconds the send data may remain
* unacknowledged by the peer in a TCP connection.
* When the value is unset, 0 is returned.
* @return GETDNS_RETURN_GOOD when successful
* @return GETDNS_RETURN_INVALID_PARAMETER when context or value was NULL.
*/
getdns_return_t
getdns_context_get_tcp_send_timeout(const getdns_context *context,
uint32_t *value);

/**
* Get the setting that says whether or not DNS queries follow redirects.
* @see getdns_context_set_follow_redirects
Expand Down
3 changes: 3 additions & 0 deletions src/libgetdns.symbols
Expand Up @@ -30,6 +30,7 @@ getdns_context_get_resolution_type
getdns_context_get_resolvconf
getdns_context_get_round_robin_upstreams
getdns_context_get_suffix
getdns_context_get_tcp_send_timeout
getdns_context_get_timeout
getdns_context_get_tls_authentication
getdns_context_get_tls_backoff_time
Expand Down Expand Up @@ -78,6 +79,7 @@ getdns_context_set_resolvconf
getdns_context_set_return_dnssec_status
getdns_context_set_round_robin_upstreams
getdns_context_set_suffix
getdns_context_set_tcp_send_timeout
getdns_context_set_timeout
getdns_context_set_tls_authentication
getdns_context_set_tls_backoff_time
Expand All @@ -98,6 +100,7 @@ getdns_context_set_update_callback
getdns_context_set_upstream_recursive_servers
getdns_context_set_use_threads
getdns_context_unset_edns_maximum_udp_payload_size
getdns_context_unset_tcp_send_timeout
getdns_convert_alabel_to_ulabel
getdns_convert_dns_name_to_fqdn
getdns_convert_fqdn_to_dns_name
Expand Down
25 changes: 23 additions & 2 deletions src/stub.c
Expand Up @@ -448,8 +448,27 @@ getdns_sock_nonblock(int sockfd)
#endif
}

/** best effort to set TCP send timeout */
static void
getdns_sock_tcp_send_timeout(getdns_upstream *upstream, int sockfd,
int send_timeout)
{
#if defined(HAVE_DECL_TCP_USER_TIMEOUT)
unsigned int val = send_timeout;
if (setsockopt(sockfd, IPPROTO_TCP, TCP_USER_TIMEOUT,
&val, sizeof(val)) != 0) {
_getdns_upstream_log(upstream,
GETDNS_LOG_UPSTREAM_STATS, GETDNS_LOG_WARNING,
"%-40s : Upstream : "
"Could not enable TCP send timeout\n",
upstream->addr_str);
}
#endif
}

static int
tcp_connect(getdns_upstream *upstream, getdns_transport_list_t transport)
tcp_connect(getdns_upstream *upstream, getdns_transport_list_t transport,
int send_timeout)
{
#if defined(TCP_FASTOPEN) || defined(TCP_FASTOPEN_CONNECT)
# ifdef USE_WINSOCK
Expand All @@ -468,6 +487,7 @@ tcp_connect(getdns_upstream *upstream, getdns_transport_list_t transport)
return -1;

getdns_sock_nonblock(fd);
getdns_sock_tcp_send_timeout(upstream, fd, send_timeout);
#ifdef USE_OSX_TCP_FASTOPEN
sa_endpoints_t endpoints;
endpoints.sae_srcif = 0;
Expand Down Expand Up @@ -2148,7 +2168,8 @@ upstream_connect(getdns_upstream *upstream, getdns_transport_list_t transport,
/* Use existing if available*/
if (upstream->fd != -1)
return upstream->fd;
fd = tcp_connect(upstream, transport);
fd = tcp_connect(upstream, transport,
dnsreq->context->tcp_send_timeout);
if (fd == -1) {
upstream_failed(upstream, 1);
return -1;
Expand Down

0 comments on commit 606a88f

Please sign in to comment.