Skip to content

Commit

Permalink
eal: add keep alive monitoring
Browse files Browse the repository at this point in the history
Adds functions for detecting and reporting the live-ness of LCores,
the primary requirement of which is minimal overheads for the
core(s) being checked. Core failures are notified via an application
defined callback.

Signed-off-by: Remy Horton <remy.horton@intel.com>
  • Loading branch information
Remy Horton authored and Thomas Monjalon committed Nov 19, 2015
1 parent e6734d2 commit 75583b0
Show file tree
Hide file tree
Showing 10 changed files with 280 additions and 3 deletions.
5 changes: 5 additions & 0 deletions MAINTAINERS
Expand Up @@ -108,6 +108,11 @@ F: app/test/test_malloc.c
F: app/test/test_memory.c
F: app/test/test_memzone.c

Keep alive
M: Remy Horton <remy.horton@intel.com>
F: lib/librte_eal/common/include/rte_keepalive.h
F: lib/librte_eal/common/rte_keepalive.c

Secondary process
K: RTE_PROC_
F: doc/guides/prog_guide/multi_proc_support.rst
Expand Down
1 change: 1 addition & 0 deletions doc/api/doxy-api-index.md
Expand Up @@ -146,4 +146,5 @@ There are many libraries, so their headers may be grouped by topics:
[EAL config] (@ref rte_eal.h),
[common] (@ref rte_common.h),
[ABI compat] (@ref rte_compat.h),
[keepalive] (@ref rte_keepalive.h),
[version] (@ref rte_version.h)
2 changes: 2 additions & 0 deletions doc/guides/rel_notes/release_2_2.rst
Expand Up @@ -15,6 +15,8 @@ New Features
New function rte_ring_free() allows the user to free a ring
if it was created with rte_ring_create().

* **Added keepalive support to EAL.**

* **Added ethdev API to support IEEE1588.**

Added functions to read, write and adjust system time in the NIC.
Expand Down
1 change: 1 addition & 0 deletions lib/librte_eal/bsdapp/eal/Makefile
Expand Up @@ -80,6 +80,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += eal_common_thread.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += rte_malloc.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += malloc_elem.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += malloc_heap.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_BSDAPP) += rte_keepalive.c

CFLAGS_eal.o := -D_GNU_SOURCE
#CFLAGS_eal_thread.o := -D_GNU_SOURCE
Expand Down
6 changes: 5 additions & 1 deletion lib/librte_eal/bsdapp/eal/rte_eal_version.map
Expand Up @@ -130,5 +130,9 @@ DPDK_2.2 {
global:

rte_intr_cap_multiple;
rte_keepalive_create;
rte_keepalive_dispatch_pings;
rte_keepalive_mark_alive;
rte_keepalive_register_core;

} DPDK_2.1;
} DPDK_2.1;
2 changes: 1 addition & 1 deletion lib/librte_eal/common/Makefile
Expand Up @@ -40,7 +40,7 @@ INC += rte_string_fns.h rte_version.h
INC += rte_eal_memconfig.h rte_malloc_heap.h
INC += rte_hexdump.h rte_devargs.h rte_dev.h
INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h
INC += rte_malloc.h rte_time.h
INC += rte_malloc.h rte_keepalive.h rte_time.h

ifeq ($(CONFIG_RTE_INSECURE_FUNCTION_WARNING),y)
INC += rte_warnings.h
Expand Down
146 changes: 146 additions & 0 deletions lib/librte_eal/common/include/rte_keepalive.h
@@ -0,0 +1,146 @@
/*-
* BSD LICENSE
*
* Copyright 2015 Intel Shannon Ltd. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

/**
* @file rte_keepalive.h
* DPDK RTE LCore Keepalive Monitor.
*
**/

#ifndef _KEEPALIVE_H_
#define _KEEPALIVE_H_

#include <rte_memory.h>

#ifndef RTE_KEEPALIVE_MAXCORES
/**
* Number of cores to track.
* @note Must be larger than the highest core id. */
#define RTE_KEEPALIVE_MAXCORES RTE_MAX_LCORE
#endif


/**
* Keepalive failure callback.
*
* Receives a data pointer passed to rte_keepalive_create() and the id of the
* failed core.
*/
typedef void (*rte_keepalive_failure_callback_t)(
void *data,
const int id_core);


/**
* Keepalive state structure.
* @internal
*/
struct rte_keepalive {
/** Core Liveness. */
enum {
ALIVE = 1,
MISSING = 0,
DEAD = 2,
GONE = 3
} __rte_cache_aligned state_flags[RTE_KEEPALIVE_MAXCORES];

/** Last-seen-alive timestamps */
uint64_t last_alive[RTE_KEEPALIVE_MAXCORES];

/**
* Cores to check.
* Indexed by core id, non-zero if the core should be checked.
*/
uint8_t active_cores[RTE_KEEPALIVE_MAXCORES];

/** Dead core handler. */
rte_keepalive_failure_callback_t callback;

/**
* Dead core handler app data.
* Pointer is passed to dead core handler.
*/
void *callback_data;
uint64_t tsc_initial;
uint64_t tsc_mhz;
};


/**
* Initialise keepalive sub-system.
* @param callback
* Function called upon detection of a dead core.
* @param data
* Data pointer to be passed to function callback.
* @return
* Keepalive structure success, NULL on failure.
*/
struct rte_keepalive *rte_keepalive_create(
rte_keepalive_failure_callback_t callback,
void *data);


/**
* Checks & handles keepalive state of monitored cores.
* @param *ptr_timer Triggering timer (unused)
* @param *ptr_data Data pointer (keepalive structure)
*/
void rte_keepalive_dispatch_pings(void *ptr_timer, void *ptr_data);


/**
* Registers a core for keepalive checks.
* @param *keepcfg
* Keepalive structure pointer
* @param id_core
* ID number of core to register.
*/
void rte_keepalive_register_core(struct rte_keepalive *keepcfg,
const int id_core);


/**
* Per-core keepalive check.
* @param *keepcfg
* Keepalive structure pointer
*
* This function needs to be called from within the main process loop of
* the LCore to be checked.
*/
static inline void
rte_keepalive_mark_alive(struct rte_keepalive *keepcfg)
{
keepcfg->state_flags[rte_lcore_id()] = ALIVE;
}


#endif /* _KEEPALIVE_H_ */
113 changes: 113 additions & 0 deletions lib/librte_eal/common/rte_keepalive.c
@@ -0,0 +1,113 @@
/*-
* BSD LICENSE
*
* Copyright 2015 Intel Shannon Ltd. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

#include <inttypes.h>

#include <rte_common.h>
#include <rte_cycles.h>
#include <rte_lcore.h>
#include <rte_log.h>
#include <rte_keepalive.h>
#include <rte_malloc.h>

static void
print_trace(const char *msg, struct rte_keepalive *keepcfg, int idx_core)
{
RTE_LOG(INFO, EAL, "%sLast seen %" PRId64 "ms ago.\n",
msg,
((rte_rdtsc() - keepcfg->last_alive[idx_core])*1000)
/ rte_get_tsc_hz()
);
}


void
rte_keepalive_dispatch_pings(__rte_unused void *ptr_timer,
void *ptr_data)
{
struct rte_keepalive *keepcfg = ptr_data;
int idx_core;

for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES; idx_core++) {
if (keepcfg->active_cores[idx_core] == 0)
continue;

switch (keepcfg->state_flags[idx_core]) {
case ALIVE: /* Alive */
keepcfg->state_flags[idx_core] = MISSING;
keepcfg->last_alive[idx_core] = rte_rdtsc();
break;
case MISSING: /* MIA */
print_trace("Core MIA. ", keepcfg, idx_core);
keepcfg->state_flags[idx_core] = DEAD;
break;
case DEAD: /* Dead */
keepcfg->state_flags[idx_core] = GONE;
print_trace("Core died. ", keepcfg, idx_core);
if (keepcfg->callback)
keepcfg->callback(
keepcfg->callback_data,
idx_core
);
break;
case GONE: /* Buried */
break;
}
}
}


struct rte_keepalive *
rte_keepalive_create(rte_keepalive_failure_callback_t callback,
void *data)
{
struct rte_keepalive *keepcfg;

keepcfg = rte_zmalloc("RTE_EAL_KEEPALIVE",
sizeof(struct rte_keepalive),
RTE_CACHE_LINE_SIZE);
if (keepcfg != NULL) {
keepcfg->callback = callback;
keepcfg->callback_data = data;
keepcfg->tsc_initial = rte_rdtsc();
keepcfg->tsc_mhz = rte_get_tsc_hz() / 1000;
}
return keepcfg;
}


void
rte_keepalive_register_core(struct rte_keepalive *keepcfg, const int id_core)
{
if (id_core < RTE_KEEPALIVE_MAXCORES)
keepcfg->active_cores[id_core] = 1;
}
1 change: 1 addition & 0 deletions lib/librte_eal/linuxapp/eal/Makefile
Expand Up @@ -90,6 +90,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += eal_common_thread.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += rte_malloc.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += malloc_elem.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += malloc_heap.c
SRCS-$(CONFIG_RTE_LIBRTE_EAL_LINUXAPP) += rte_keepalive.c

CFLAGS_eal.o := -D_GNU_SOURCE
CFLAGS_eal_interrupts.o := -D_GNU_SOURCE
Expand Down
6 changes: 5 additions & 1 deletion lib/librte_eal/linuxapp/eal/rte_eal_version.map
Expand Up @@ -133,5 +133,9 @@ DPDK_2.2 {
global:

rte_intr_cap_multiple;
rte_keepalive_create;
rte_keepalive_dispatch_pings;
rte_keepalive_mark_alive;
rte_keepalive_register_core;

} DPDK_2.1;
} DPDK_2.1;

0 comments on commit 75583b0

Please sign in to comment.