Skip to content

Commit

Permalink
Merge pull request #14228 from dgquintas/dns_cooldown
Browse files Browse the repository at this point in the history
DNS cooldown mechanism
  • Loading branch information
dgquintas committed Feb 2, 2018
2 parents d2e9b98 + 8adf19c commit 4eca959
Show file tree
Hide file tree
Showing 10 changed files with 560 additions and 47 deletions.
28 changes: 28 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ add_dependencies(buildtests_c compression_test)
add_dependencies(buildtests_c concurrent_connectivity_test)
add_dependencies(buildtests_c connection_refused_test)
add_dependencies(buildtests_c dns_resolver_connectivity_test)
add_dependencies(buildtests_c dns_resolver_cooldown_test)
add_dependencies(buildtests_c dns_resolver_test)
if(_gRPC_PLATFORM_LINUX OR _gRPC_PLATFORM_MAC OR _gRPC_PLATFORM_POSIX)
add_dependencies(buildtests_c dualstack_socket_test)
Expand Down Expand Up @@ -5250,6 +5251,33 @@ target_link_libraries(dns_resolver_connectivity_test
endif (gRPC_BUILD_TESTS)
if (gRPC_BUILD_TESTS)

add_executable(dns_resolver_cooldown_test
test/core/client_channel/resolvers/dns_resolver_cooldown_test.cc
)


target_include_directories(dns_resolver_cooldown_test
PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}
PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include
PRIVATE ${_gRPC_SSL_INCLUDE_DIR}
PRIVATE ${_gRPC_PROTOBUF_INCLUDE_DIR}
PRIVATE ${_gRPC_ZLIB_INCLUDE_DIR}
PRIVATE ${_gRPC_BENCHMARK_INCLUDE_DIR}
PRIVATE ${_gRPC_CARES_INCLUDE_DIR}
PRIVATE ${_gRPC_GFLAGS_INCLUDE_DIR}
)

target_link_libraries(dns_resolver_cooldown_test
${_gRPC_ALLTARGETS_LIBRARIES}
grpc_test_util
grpc
gpr_test_util
gpr
)

endif (gRPC_BUILD_TESTS)
if (gRPC_BUILD_TESTS)

add_executable(dns_resolver_test
test/core/client_channel/resolvers/dns_resolver_test.cc
)
Expand Down
36 changes: 36 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -971,6 +971,7 @@ compression_test: $(BINDIR)/$(CONFIG)/compression_test
concurrent_connectivity_test: $(BINDIR)/$(CONFIG)/concurrent_connectivity_test
connection_refused_test: $(BINDIR)/$(CONFIG)/connection_refused_test
dns_resolver_connectivity_test: $(BINDIR)/$(CONFIG)/dns_resolver_connectivity_test
dns_resolver_cooldown_test: $(BINDIR)/$(CONFIG)/dns_resolver_cooldown_test
dns_resolver_test: $(BINDIR)/$(CONFIG)/dns_resolver_test
dualstack_socket_test: $(BINDIR)/$(CONFIG)/dualstack_socket_test
endpoint_pair_test: $(BINDIR)/$(CONFIG)/endpoint_pair_test
Expand Down Expand Up @@ -1386,6 +1387,7 @@ buildtests_c: privatelibs_c \
$(BINDIR)/$(CONFIG)/concurrent_connectivity_test \
$(BINDIR)/$(CONFIG)/connection_refused_test \
$(BINDIR)/$(CONFIG)/dns_resolver_connectivity_test \
$(BINDIR)/$(CONFIG)/dns_resolver_cooldown_test \
$(BINDIR)/$(CONFIG)/dns_resolver_test \
$(BINDIR)/$(CONFIG)/dualstack_socket_test \
$(BINDIR)/$(CONFIG)/endpoint_pair_test \
Expand Down Expand Up @@ -1840,6 +1842,8 @@ test_c: buildtests_c
$(Q) $(BINDIR)/$(CONFIG)/connection_refused_test || ( echo test connection_refused_test failed ; exit 1 )
$(E) "[RUN] Testing dns_resolver_connectivity_test"
$(Q) $(BINDIR)/$(CONFIG)/dns_resolver_connectivity_test || ( echo test dns_resolver_connectivity_test failed ; exit 1 )
$(E) "[RUN] Testing dns_resolver_cooldown_test"
$(Q) $(BINDIR)/$(CONFIG)/dns_resolver_cooldown_test || ( echo test dns_resolver_cooldown_test failed ; exit 1 )
$(E) "[RUN] Testing dns_resolver_test"
$(Q) $(BINDIR)/$(CONFIG)/dns_resolver_test || ( echo test dns_resolver_test failed ; exit 1 )
$(E) "[RUN] Testing dualstack_socket_test"
Expand Down Expand Up @@ -9927,6 +9931,38 @@ endif
endif


DNS_RESOLVER_COOLDOWN_TEST_SRC = \
test/core/client_channel/resolvers/dns_resolver_cooldown_test.cc \

DNS_RESOLVER_COOLDOWN_TEST_OBJS = $(addprefix $(OBJDIR)/$(CONFIG)/, $(addsuffix .o, $(basename $(DNS_RESOLVER_COOLDOWN_TEST_SRC))))
ifeq ($(NO_SECURE),true)

# You can't build secure targets if you don't have OpenSSL.

$(BINDIR)/$(CONFIG)/dns_resolver_cooldown_test: openssl_dep_error

else



$(BINDIR)/$(CONFIG)/dns_resolver_cooldown_test: $(DNS_RESOLVER_COOLDOWN_TEST_OBJS) $(LIBDIR)/$(CONFIG)/libgrpc_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc.a $(LIBDIR)/$(CONFIG)/libgpr_test_util.a $(LIBDIR)/$(CONFIG)/libgpr.a
$(E) "[LD] Linking $@"
$(Q) mkdir -p `dirname $@`
$(Q) $(LD) $(LDFLAGS) $(DNS_RESOLVER_COOLDOWN_TEST_OBJS) $(LIBDIR)/$(CONFIG)/libgrpc_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc.a $(LIBDIR)/$(CONFIG)/libgpr_test_util.a $(LIBDIR)/$(CONFIG)/libgpr.a $(LDLIBS) $(LDLIBS_SECURE) -o $(BINDIR)/$(CONFIG)/dns_resolver_cooldown_test

endif

$(OBJDIR)/$(CONFIG)/test/core/client_channel/resolvers/dns_resolver_cooldown_test.o: $(LIBDIR)/$(CONFIG)/libgrpc_test_util.a $(LIBDIR)/$(CONFIG)/libgrpc.a $(LIBDIR)/$(CONFIG)/libgpr_test_util.a $(LIBDIR)/$(CONFIG)/libgpr.a

deps_dns_resolver_cooldown_test: $(DNS_RESOLVER_COOLDOWN_TEST_OBJS:.o=.dep)

ifneq ($(NO_SECURE),true)
ifneq ($(NO_DEPS),true)
-include $(DNS_RESOLVER_COOLDOWN_TEST_OBJS:.o=.dep)
endif
endif


DNS_RESOLVER_TEST_SRC = \
test/core/client_channel/resolvers/dns_resolver_test.cc \

Expand Down
10 changes: 10 additions & 0 deletions build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1940,6 +1940,16 @@ targets:
- gpr
exclude_iomgrs:
- uv
- name: dns_resolver_cooldown_test
build: test
language: c
src:
- test/core/client_channel/resolvers/dns_resolver_cooldown_test.cc
deps:
- grpc_test_util
- grpc
- gpr_test_util
- gpr
- name: dns_resolver_test
build: test
language: c
Expand Down
3 changes: 3 additions & 0 deletions include/grpc/impl/codegen/grpc_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,9 @@ typedef struct {
/** The time between the first and second connection attempts, in ms */
#define GRPC_ARG_INITIAL_RECONNECT_BACKOFF_MS \
"grpc.initial_reconnect_backoff_ms"
/** Minimum amount of time between DNS resolutions, in ms */
#define GRPC_ARG_DNS_MIN_TIME_BETWEEN_RESOLUTIONS_MS \
"grpc.dns_min_time_between_resolutions_ms"
/** The timeout used on servers for finishing handshaking on an incoming
connection. Defaults to 120 seconds. */
#define GRPC_ARG_SERVER_HANDSHAKE_TIMEOUT_MS "grpc.server_handshake_timeout_ms"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ typedef struct {
grpc_pollset_set* interested_parties;

/** Closures used by the combiner */
grpc_closure dns_ares_on_retry_timer_locked;
grpc_closure dns_ares_on_resolved_locked;
grpc_closure dns_ares_on_next_resolution_timer_closure;
grpc_closure dns_ares_on_resolved_closure;

/** Combiner guarding the rest of the state */
grpc_combiner* combiner;
Expand All @@ -85,12 +85,15 @@ typedef struct {
grpc_channel_args** target_result;
/** current (fully resolved) result */
grpc_channel_args* resolved_result;
/** retry timer */
bool have_retry_timer;
grpc_timer retry_timer;
/** next resolution timer */
bool have_next_resolution_timer;
grpc_timer next_resolution_timer;
/** retry backoff state */
grpc_core::ManualConstructor<grpc_core::BackOff> backoff;

/** min resolution period. Max one resolution will happen per period */
grpc_millis min_time_between_resolutions;
/** when was the last resolution? -1 if no resolution has happened yet */
grpc_millis last_resolution_timestamp;
/** currently resolving addresses */
grpc_lb_addresses* lb_addresses;
/** currently resolving service config */
Expand All @@ -100,6 +103,7 @@ typedef struct {
static void dns_ares_destroy(grpc_resolver* r);

static void dns_ares_start_resolving_locked(ares_dns_resolver* r);
static void dns_ares_maybe_start_resolving_locked(ares_dns_resolver* r);
static void dns_ares_maybe_finish_next_locked(ares_dns_resolver* r);

static void dns_ares_shutdown_locked(grpc_resolver* r);
Expand All @@ -114,8 +118,8 @@ static const grpc_resolver_vtable dns_ares_resolver_vtable = {

static void dns_ares_shutdown_locked(grpc_resolver* resolver) {
ares_dns_resolver* r = (ares_dns_resolver*)resolver;
if (r->have_retry_timer) {
grpc_timer_cancel(&r->retry_timer);
if (r->have_next_resolution_timer) {
grpc_timer_cancel(&r->next_resolution_timer);
}
if (r->pending_request != nullptr) {
grpc_cancel_ares_request(r->pending_request);
Expand All @@ -131,19 +135,20 @@ static void dns_ares_shutdown_locked(grpc_resolver* resolver) {
static void dns_ares_channel_saw_error_locked(grpc_resolver* resolver) {
ares_dns_resolver* r = (ares_dns_resolver*)resolver;
if (!r->resolving) {
dns_ares_start_resolving_locked(r);
dns_ares_maybe_start_resolving_locked(r);
}
}

static void dns_ares_on_retry_timer_locked(void* arg, grpc_error* error) {
static void dns_ares_on_next_resolution_timer_locked(void* arg,
grpc_error* error) {
ares_dns_resolver* r = (ares_dns_resolver*)arg;
r->have_retry_timer = false;
r->have_next_resolution_timer = false;
if (error == GRPC_ERROR_NONE) {
if (!r->resolving) {
dns_ares_start_resolving_locked(r);
}
}
GRPC_RESOLVER_UNREF(&r->base, "retry-timer");
GRPC_RESOLVER_UNREF(&r->base, "next_resolution_timer");
}

static bool value_in_json_array(grpc_json* array, const char* value) {
Expand Down Expand Up @@ -270,21 +275,22 @@ static void dns_ares_on_resolved_locked(void* arg, grpc_error* error) {
grpc_millis timeout = next_try - grpc_core::ExecCtx::Get()->Now();
gpr_log(GPR_INFO, "dns resolution failed (will retry): %s",
grpc_error_string(error));
GPR_ASSERT(!r->have_retry_timer);
r->have_retry_timer = true;
GRPC_RESOLVER_REF(&r->base, "retry-timer");
GPR_ASSERT(!r->have_next_resolution_timer);
r->have_next_resolution_timer = true;
GRPC_RESOLVER_REF(&r->base, "next_resolution_timer");
if (timeout > 0) {
gpr_log(GPR_DEBUG, "retrying in %" PRIdPTR " milliseconds", timeout);
} else {
gpr_log(GPR_DEBUG, "retrying immediately");
}
grpc_timer_init(&r->retry_timer, next_try,
&r->dns_ares_on_retry_timer_locked);
grpc_timer_init(&r->next_resolution_timer, next_try,
&r->dns_ares_on_next_resolution_timer_closure);
}
if (r->resolved_result != nullptr) {
grpc_channel_args_destroy(r->resolved_result);
}
r->resolved_result = result;
r->last_resolution_timestamp = grpc_core::ExecCtx::Get()->Now();
r->resolved_version++;
dns_ares_maybe_finish_next_locked(r);
GRPC_RESOLVER_UNREF(&r->base, "dns-resolving");
Expand All @@ -299,7 +305,7 @@ static void dns_ares_next_locked(grpc_resolver* resolver,
r->next_completion = on_complete;
r->target_result = target_result;
if (r->resolved_version == 0 && !r->resolving) {
dns_ares_start_resolving_locked(r);
dns_ares_maybe_start_resolving_locked(r);
} else {
dns_ares_maybe_finish_next_locked(r);
}
Expand All @@ -313,7 +319,7 @@ static void dns_ares_start_resolving_locked(ares_dns_resolver* r) {
r->service_config_json = nullptr;
r->pending_request = grpc_dns_lookup_ares(
r->dns_server, r->name_to_resolve, r->default_port, r->interested_parties,
&r->dns_ares_on_resolved_locked, &r->lb_addresses,
&r->dns_ares_on_resolved_closure, &r->lb_addresses,
true /* check_grpclb */,
r->request_service_config ? &r->service_config_json : nullptr);
}
Expand All @@ -331,6 +337,35 @@ static void dns_ares_maybe_finish_next_locked(ares_dns_resolver* r) {
}
}

static void dns_ares_maybe_start_resolving_locked(ares_dns_resolver* r) {
if (r->last_resolution_timestamp >= 0) {
const grpc_millis earliest_next_resolution =
r->last_resolution_timestamp + r->min_time_between_resolutions;
const grpc_millis ms_until_next_resolution =
earliest_next_resolution - grpc_core::ExecCtx::Get()->Now();
if (ms_until_next_resolution > 0) {
const grpc_millis last_resolution_ago =
grpc_core::ExecCtx::Get()->Now() - r->last_resolution_timestamp;
gpr_log(GPR_DEBUG,
"In cooldown from last resolution (from %" PRIdPTR
" ms ago). Will resolve again in %" PRIdPTR " ms",
last_resolution_ago, ms_until_next_resolution);
if (!r->have_next_resolution_timer) {
r->have_next_resolution_timer = true;
GRPC_RESOLVER_REF(&r->base, "next_resolution_timer_cooldown");
grpc_timer_init(&r->next_resolution_timer, ms_until_next_resolution,
&r->dns_ares_on_next_resolution_timer_closure);
}
// TODO(dgq): remove the following two lines once Pick First stops
// discarding subchannels after selecting.
++r->resolved_version;
dns_ares_maybe_finish_next_locked(r);
return;
}
}
dns_ares_start_resolving_locked(r);
}

static void dns_ares_destroy(grpc_resolver* gr) {
gpr_log(GPR_DEBUG, "dns_ares_destroy");
ares_dns_resolver* r = (ares_dns_resolver*)gr;
Expand Down Expand Up @@ -375,12 +410,17 @@ static grpc_resolver* dns_ares_create(grpc_resolver_args* args,
.set_jitter(GRPC_DNS_RECONNECT_JITTER)
.set_max_backoff(GRPC_DNS_RECONNECT_MAX_BACKOFF_SECONDS * 1000);
r->backoff.Init(grpc_core::BackOff(backoff_options));
GRPC_CLOSURE_INIT(&r->dns_ares_on_retry_timer_locked,
dns_ares_on_retry_timer_locked, r,
GRPC_CLOSURE_INIT(&r->dns_ares_on_next_resolution_timer_closure,
dns_ares_on_next_resolution_timer_locked, r,
grpc_combiner_scheduler(r->base.combiner));
GRPC_CLOSURE_INIT(&r->dns_ares_on_resolved_locked,
GRPC_CLOSURE_INIT(&r->dns_ares_on_resolved_closure,
dns_ares_on_resolved_locked, r,
grpc_combiner_scheduler(r->base.combiner));
const grpc_arg* period_arg = grpc_channel_args_find(
args->args, GRPC_ARG_DNS_MIN_TIME_BETWEEN_RESOLUTIONS_MS);
r->min_time_between_resolutions =
grpc_channel_arg_get_integer(period_arg, {1000, 0, INT_MAX});
r->last_resolution_timestamp = -1;
return &r->base;
}

Expand Down
Loading

0 comments on commit 4eca959

Please sign in to comment.