Skip to content

Commit

Permalink
Always schedule next_check within check_interval
Browse files Browse the repository at this point in the history
After naemon#259 we now keep the
next_check schedule over restarts if use_retained_schedule_info is
enabled. However after this patch, if one would lower the check_interval
it was possible that after the restart, the next check of an object
would be more than one check_interval away.

This commit ensures that if the next_check is more than one
check_interval away, then we randomly schedule the next check, instead
of using the retention data.

This fixed MON-11295 (https://jira.op5.com/browse/MON-11295)

Signed-off-by: Jacob Hansen <jhansen@op5.com>
  • Loading branch information
jacobbaungard committed Oct 10, 2018
1 parent 859b20d commit de8f21b
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 4 deletions.
7 changes: 5 additions & 2 deletions src/naemon/checks_host.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,13 @@ void checks_init_hosts(void)
* If use_retained_scheduling_info is enabled, we use the previously set
* next_check. If the check was missed, schedule it within the next
* interval length. If more than one check was missed, we schedule the check
* randomly instead.
* randomly instead. If the next_check is more than one check_interval in
* the future, we also schedule the next check randomly. This indicates
* that the check_interval has been lowered over restarts.
*/
if (use_retained_scheduling_info == TRUE &&
temp_host->next_check > current_time-get_host_check_interval_s(temp_host)) {
temp_host->next_check > current_time-get_host_check_interval_s(temp_host) &&
temp_host->next_check <= current_time+get_host_check_interval_s(temp_host)) {
if (temp_host->next_check < current_time) {
delay = ranged_urand(0, interval_length);
} else {
Expand Down
7 changes: 5 additions & 2 deletions src/naemon/checks_service.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,13 @@ void checks_init_services(void)
* If use_retained_scheduling_info is enabled, we use the previously set
* next_check. If the check was missed, schedule it within the next
* interval length. If more than one check was missed, we schedule the check
* randomly instead.
* randomly instead. If the next_check is more than one check_interval in
* the future, we also schedule the next check randomly. This indicates
* that the check_interval has been lowered over restarts.
*/
if (use_retained_scheduling_info == TRUE &&
temp_service->next_check > current_time-get_service_check_interval_s(temp_service)) {
temp_service->next_check > current_time-get_service_check_interval_s(temp_service) &&
temp_service->next_check <= current_time+get_service_check_interval_s(temp_service)) {
if (temp_service->next_check < current_time) {
delay = ranged_urand(0, interval_length);
} else {
Expand Down
56 changes: 56 additions & 0 deletions tests/test-check-scheduling.c
Original file line number Diff line number Diff line change
Expand Up @@ -745,6 +745,32 @@ START_TEST(host_retain_disabled_next_check)
END_TEST


/* If use_retained_scheduling info is enabled but the next_check in the
* retention data is more than one check_interval away, then we should
* schedule the check randomly within one check_interval.
*/
START_TEST(host_retain_always_within_check_interval)
{
time_t current_time = time(NULL);
time_t expected_max_next_check;
use_retained_scheduling_info=TRUE;

hst->retry_interval = 1.0;
hst->check_interval = 15.0;
hst->current_state = STATE_UP;
hst->state_type = HARD_STATE;
hst->next_check = current_time+get_host_check_interval_s(hst);
hst->check_interval = 5.0;
expected_max_next_check = current_time+get_host_check_interval_s(hst);

/* Simulates a restart */
checks_init_hosts();
ck_assert(hst->next_check >= current_time);
ck_assert(hst->next_check <= expected_max_next_check);
}
END_TEST


/* If use_retained_scheduling_info is enabled the next_check time should be
* retained over restarts
*/
Expand Down Expand Up @@ -844,6 +870,34 @@ START_TEST(service_retain_disabled_next_check)
END_TEST


/* If use_retained_scheduling info is enabled but the next_check in the
* retention data is more than one check_interval away, then we should
* schedule the check randomly within one check_interval.
*/
START_TEST(service_retain_always_within_check_interval)
{
time_t current_time = time(NULL);
time_t expected_max_next_check;
use_retained_scheduling_info=TRUE;

svc->retry_interval = 1.0;
svc->check_interval = 15.0;
svc->current_state = STATE_UP;
svc->state_type = HARD_STATE;
svc->next_check = current_time+get_service_check_interval_s(svc);
printf("service check interval seconds: %lld\n", (long long)get_service_check_interval_s(svc));
svc->check_interval = 5;
expected_max_next_check = current_time+get_service_check_interval_s(svc);
printf("service check interval seconds: %lld\n", (long long)get_service_check_interval_s(svc));

/* Simulates a restart */
checks_init_services();
ck_assert(svc->next_check >= current_time);
ck_assert(svc->next_check <= expected_max_next_check);
}
END_TEST


Suite*
check_scheduling_suite(void)
{
Expand Down Expand Up @@ -880,10 +934,12 @@ check_scheduling_suite(void)
tcase_add_test(tc_retain, host_retain_missed_check);
tcase_add_test(tc_retain, host_retain_missed_multiple_checks);
tcase_add_test(tc_retain, host_retain_disabled_next_check);
tcase_add_test(tc_retain, host_retain_always_within_check_interval);
tcase_add_test(tc_retain, service_retain_next_check);
tcase_add_test(tc_retain, service_retain_missed_check);
tcase_add_test(tc_retain, service_retain_missed_multiple_checks);
tcase_add_test(tc_retain, service_retain_disabled_next_check);
tcase_add_test(tc_retain, service_retain_always_within_check_interval);
suite_add_tcase(s, tc_retain);

tcase_add_checked_fixture(tc_miscellaneous, setup, teardown);
Expand Down

0 comments on commit de8f21b

Please sign in to comment.